Userspace program: #!/usr/bin/python import pyopencl as cl import numpy as np # create context ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) # create some input data, in this case array filled with zeros a = np.array(np.zeros((20),dtype=np.uint64)) # create opencl buffer buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=a) # load kernel source f=open("kernel.c","r") SRC = ''.join(f.readlines()) f.close() # compile it prg = cl.Program(ctx, SRC).build() print("Input:") print(a) # launch the kernel event = prg.brm(queue, a.shape, None, buf) event.wait() # copy data back from opencl cl.enqueue_copy(queue, a, buf) # print it print("CL returned:") print(a) kernel.c: // kernel definition kernel void brm(global ulong *buf) { // kernel index private size_t me = get_global_id(0); // local vars private ulong a; // read input data a = buf[me]; // excersise some magic a = a + me*me; // write result back to memory buf[me] = a; }