Userspace program:
#!/usr/bin/python
import pyopencl as cl
import numpy as np
# create context
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
# create some input data, in this case array filled with zeros
a = np.array(np.zeros((20),dtype=np.uint64))
# create opencl buffer
buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=a)
# load kernel source
f=open("kernel.c","r")
SRC = ''.join(f.readlines())
f.close()
# compile it
prg = cl.Program(ctx, SRC).build()
print("Input:")
print(a)
# launch the kernel
event = prg.brm(queue, a.shape, None, buf)
event.wait()
# copy data back from opencl
cl.enqueue_copy(queue, a, buf)
# print it
print("CL returned:")
print(a)
kernel.c:
// kernel definition
kernel void brm(global ulong *buf) {
// kernel index
private size_t me = get_global_id(0);
// local vars
private ulong a;
// read input data
a = buf[me];
// excersise some magic
a = a + me*me;
// write result back to memory
buf[me] = a;
}