Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Python/cython/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.PHONY: build clean test

build:
python setup.py build_ext --inplace

clean:
rm -f compute.c hpc_kernels.o compute*.so compute*.pyd
rm -rf build *.egg-info

test: build
python driver.py
61 changes: 61 additions & 0 deletions Python/cython/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Cython Shared Library - HPCTrainingExamples Kernels

This example wraps **actual C computation kernels from the HPCTrainingExamples repo** into a Python-callable shared library (`.so` / `.pyd`) using Cython.

## Wrapped Kernels

| Function | Original Source | Description |
|---|---|---|
| `py_cpu_func(inp)` | `ManagedMemory/CPU_Code/cpu_code.c` | Doubles every element: `out[i] = in[i] * 2.0` |
| `py_saxpy(a, x, y)` | `Pragma_Examples/OpenMP/C/1_saxpy` | SAXPY: `y = a*x + y` |
| `py_vecadd(a, b)` | `Pragma_Examples/OpenMP/C/3_vecadd` | Vector addition: `c = a + b` |
| `py_reduction(x)` | `Pragma_Examples/OpenMP/C/2_reduction` | Sum-reduction of an array |

The core loops in [hpc_kernels.c](hpc_kernels.c) are extracted directly from the original repo sources (with `main()` and OpenMP timing scaffolding removed). The Cython wrapper in [compute.pyx](compute.pyx) calls into these C functions and handles NumPy array ↔ C pointer conversion.

## Prerequisites

```bash
pip install cython numpy
```

## Build

```bash
# Option 1 - Makefile
make build

# Option 2 - setup.py directly
python setup.py build_ext --inplace
```

This compiles `hpc_kernels.c` + `compute.pyx` into `compute.<platform>.so`.

## Run

```bash
make test
# or
python driver.py
```

The driver benchmarks each Cython-wrapped kernel against its NumPy equivalent and validates correctness using the same expected values as the original C programs.

## Clean

```bash
make clean
```

## File Layout

```
Python/cython/
├── hpc_kernels.h C declarations for the kernels
├── hpc_kernels.c C kernel implementations (from repo)
├── compute.pyx Cython wrapper module
├── setup.py Build script (setuptools + Cython)
├── driver.py Benchmark / validation driver
├── Makefile Build automation
└── README.md
```
78 changes: 78 additions & 0 deletions Python/cython/compute.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# cython: boundscheck=False, wraparound=False, cdivision=True
"""
Cython wrappers around C kernels from HPCTrainingExamples.

Wraps the following repo examples as a shared library:
- cpu_func (ManagedMemory/CPU_Code/cpu_code.c)
- saxpy (Pragma_Examples/OpenMP/C/1_saxpy)
- vecadd (Pragma_Examples/OpenMP/C/3_vecadd)
- reduction (Pragma_Examples/OpenMP/C/2_reduction)
"""

import numpy as np
cimport numpy as np

ctypedef np.float64_t DOUBLE_t
ctypedef np.float32_t FLOAT_t


cdef extern from "hpc_kernels.h":
void cpu_func(double *inp, double *out, int M)
void saxpy(float a, float *x, float *y, int N)
void vecadd(double *a, double *b, double *c, int N)
double reduction(double *x, int n)


# Python wrappers around the c kernels.

def py_cpu_func(np.ndarray[DOUBLE_t, ndim=1] inp):
"""Double every element (ManagedMemory/CPU_Code/cpu_code.c cpu_func)."""
# Ensure correct dtype and contiguous memory to safely take &inp[0]
inp_c = np.ascontiguousarray(inp, dtype=np.float64)
cdef int M = inp_c.shape[0]
if M == 0:
return np.empty(0, dtype=np.float64)
cdef np.ndarray[DOUBLE_t, ndim=1] out = np.empty(M, dtype=np.float64)
cpu_func(&inp_c[0], &out[0], M)
return out


def py_saxpy(float a,
np.ndarray[FLOAT_t, ndim=1] x,
np.ndarray[FLOAT_t, ndim=1] y):
"""y = a*x + y (Pragma_Examples/OpenMP/C/1_saxpy saxpy)."""
# Coerce to float32 contiguous arrays and check lengths
x_c = np.ascontiguousarray(x, dtype=np.float32)
y_c = np.ascontiguousarray(y, dtype=np.float32)
cdef int N = x_c.shape[0]
if y_c.shape[0] != N:
raise ValueError("x and y must have the same length")
if N == 0:
return y_c.copy()
cdef np.ndarray[FLOAT_t, ndim=1] y_out = y_c.copy()
saxpy(a, &x_c[0], &y_out[0], N)
return y_out


def py_vecadd(np.ndarray[DOUBLE_t, ndim=1] a,
np.ndarray[DOUBLE_t, ndim=1] b):
"""c = a + b (Pragma_Examples/OpenMP/C/3_vecadd vecadd)."""
a_c = np.ascontiguousarray(a, dtype=np.float64)
b_c = np.ascontiguousarray(b, dtype=np.float64)
cdef int N = a_c.shape[0]
if b_c.shape[0] != N:
raise ValueError("a and b must have the same length")
if N == 0:
return np.empty(0, dtype=np.float64)
cdef np.ndarray[DOUBLE_t, ndim=1] c = np.empty(N, dtype=np.float64)
vecadd(&a_c[0], &b_c[0], &c[0], N)
return c


def py_reduction(np.ndarray[DOUBLE_t, ndim=1] x):
"""Sum all elements (Pragma_Examples/OpenMP/C/2_reduction reduction)."""
x_c = np.ascontiguousarray(x, dtype=np.float64)
cdef int n = x_c.shape[0]
if n == 0:
return 0.0
return reduction(&x_c[0], n)
111 changes: 111 additions & 0 deletions Python/cython/driver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/env python3
"""
Driver for the Cython-wrapped HPC Training Examples kernels.

Exercises every kernel from the shared library and validates
results against NumPy equivalents, matching the behaviour of
the original C programs in the repo.
"""

import time
import numpy as np

try:
import compute
except ImportError:
raise ImportError(
"Could not import the 'compute' shared library.\n"
"Build it first: python setup.py build_ext --inplace"
)


def bench(label, func, *args, repeats=50):
"""Time a function and return its result."""
result = func(*args) # warm-up
t0 = time.perf_counter()
for _ in range(repeats):
func(*args)
elapsed = (time.perf_counter() - t0) / repeats
print(f" {label:40s} {elapsed*1e6:10.1f} us")
return result


def test_cpu_func():
"""ManagedMemory/CPU_Code/cpu_code.c — doubles every element."""
M = 100_000
inp = np.ones(M, dtype=np.float64)

out = bench("Cython cpu_func", compute.py_cpu_func, inp)

# The original C program expects sum(out) == 200000
assert np.allclose(out, inp * 2.0), "cpu_func mismatch!"
total = out.sum()
print(f" Result is {total:.6f} (expected {M * 2.0:.6f})")


def test_saxpy():
"""Pragma_Examples/OpenMP/C/1_saxpy — y = a*x + y."""
N = 1_000_000
a = np.float32(2.0)
x = np.ones(N, dtype=np.float32)
y = np.full(N, 2.0, dtype=np.float32)

y_out = bench("Cython saxpy", compute.py_saxpy, a, x, y)
y_ref = bench("NumPy a*x + y", lambda: a * x + y)

# Original program expects y[0] == 4.0, y[N-1] == 4.0
assert np.allclose(y_out, a * x + y), "saxpy mismatch!"
print(f" y[0] {y_out[0]:.6f} y[N-1] {y_out[-1]:.6f} (expected 4.0)")


def test_vecadd():
"""Pragma_Examples/OpenMP/C/3_vecadd — c = a + b."""
N = 100_000
a = np.array([np.sin(i+1)**2 for i in range(N)], dtype=np.float64)
b = np.array([np.cos(i+1)**2 for i in range(N)], dtype=np.float64)

c_cy = bench("Cython vecadd", compute.py_vecadd, a, b)
c_np = bench("NumPy a + b", np.add, a, b)

assert np.allclose(c_cy, c_np), "vecadd mismatch!"
# Original expects mean(c) ≈ 1.0 (sin²+cos²=1)
avg = c_cy.mean()
print(f" Final result: {avg:.6f} (expected ≈1.0)")


def test_reduction():
"""Pragma_Examples/OpenMP/C/2_reduction — sum of array."""
n = 100_000
x = np.full(n, 2.0, dtype=np.float64)

s_cy = bench("Cython reduction", compute.py_reduction, x)
s_np = bench("NumPy np.sum", np.sum, x)

# Original expects sum == 200000
assert abs(s_cy - s_np) < 1e-6, "reduction mismatch!"
print(f" Sum={s_cy:.6f} (expected {n * 2.0:.6f})")


def main():
print("=" * 62)
print("HPCTrainingExamples — Cython Shared Library Tests")
print("=" * 62)

print(f"\n--- cpu_func (ManagedMemory/CPU_Code) ---")
test_cpu_func()

print(f"\n--- saxpy (Pragma_Examples/OpenMP/C/1_saxpy) ---")
test_saxpy()

print(f"\n--- vecadd (Pragma_Examples/OpenMP/C/3_vecadd) ---")
test_vecadd()

print(f"\n--- reduction (Pragma_Examples/OpenMP/C/2_reduction) ---")
test_reduction()

print(f"\n{'=' * 62}")
print("All tests passed.")


if __name__ == "__main__":
main()
43 changes: 43 additions & 0 deletions Python/cython/hpc_kernels.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* hpc_kernels.c
*
* CPU computation kernels extracted from HPCTrainingExamples.
* Each function mirrors the core loop from the original source
* (with the main() and OpenMP/timing scaffolding stripped out so
* the pure kernel can be called from Python via Cython).
*
* Original sources:
* cpu_func - ManagedMemory/CPU_Code/cpu_code.c
* saxpy - Pragma_Examples/OpenMP/C/1_saxpy/0_saxpy_portyourself/saxpy.c
* vecadd - Pragma_Examples/OpenMP/C/3_vecadd/0_vecadd_portyourself/vecadd.c
* reduction - Pragma_Examples/OpenMP/C/2_reduction/0_reduction_portyourself/reduction.c
*/

#include "hpc_kernels.h"

/* cpu_func (ManagedMemory/CPU_Code/cpu_code.c) */
void cpu_func(double *in, double *out, int M) {
for (int i = 0; i < M; i++) {
out[i] = in[i] * 2.0;
}
}

void saxpy(float a, float *x, float *y, int N) {
for (int i = 0; i < N; i++) {
y[i] = a * x[i] + y[i];
}
}

void vecadd(double *a, double *b, double *c, int N) {
for (int i = 0; i < N; i++) {
c[i] = a[i] + b[i];
}
}

double reduction(double *x, int n) {
double sum = 0.0;
for (int i = 0; i < n; i++) {
sum = sum + x[i];
}
return sum;
}
28 changes: 28 additions & 0 deletions Python/cython/hpc_kernels.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* hpc_kernels.h
*
* Declarations for CPU computation kernels extracted from
* HPCTrainingExamples so they can be compiled into a shared
* library via Cython.
*
* Sources:
* cpu_func – ManagedMemory/CPU_Code/cpu_code.c
* saxpy – Pragma_Examples/OpenMP/C/1_saxpy/0_saxpy_portyourself/saxpy.c
* vecadd – Pragma_Examples/OpenMP/C/3_vecadd/0_vecadd_portyourself/vecadd.c
* reduction – Pragma_Examples/OpenMP/C/2_reduction/0_reduction_portyourself/reduction.c
*/

#ifndef HPC_KERNELS_H
#define HPC_KERNELS_H

/* Double every element: out[i] = in[i] * 2.0 */
void cpu_func(double *in, double *out, int M);

/* SAXPY: y[i] = a * x[i] + y[i] */
void saxpy(float a, float *x, float *y, int N);

void vecadd(double *a, double *b, double *c, int N);

double reduction(double *x, int n);

#endif /* HPC_KERNELS_H */
16 changes: 16 additions & 0 deletions Python/cython/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from setuptools import setup, Extension
from Cython.Build import cythonize
import numpy as np

extensions = [
Extension(
"compute",
sources=["compute.pyx", "hpc_kernels.c"],
include_dirs=[np.get_include(), "."],
)
]

setup(
name="hpc-cython-kernels",
ext_modules=cythonize(extensions, compiler_directives={"language_level": "3"}),
)