__kernel void compute_distances(__global const uchar* x, __global float* distances) { int i = get_global_id(0); int j = get_global_id(1); int index = i * get_global_size(1) + j; if (i == j) { distances[index] = 0; return; } float distance = 0; for (int k = 0; k < {N}; k++) { distance += x[i * {N} + k] ^ x[j * {N} + k]; } distances[index] = pow(2, -distance); } __kernel void evaluate(__global const uchar* program, __global const uchar* x, __global uchar* scratch, __global uchar* y) { int program_index = get_global_id(0) * {MAX_PROGRAM_SIZE} * (1 + {N} + 2); int scratch_index = get_global_id(0) * {MAX_PROGRAM_SIZE}; int input_index = get_global_id(1) * {N}; int output_index = get_global_id(1); scratch[scratch_index] = 0; for (int i = 0; i < {MAX_PROGRAM_SIZE}; i++) { uchar output = program[program_index++]; for (int j = 0; j < {N}; j++) { output += program[program_index++] * x[input_index + j]; } int left_index = program[program_index++]; int right_index = program[program_index++]; output += scratch[scratch_index + left_index] * scratch[scratch_index + right_index]; output %= {M}; if (program[program_index] == 255) { y[output_index] = output; return; } else { scratch[scratch_index + i + 1] = output; } } } __kernel void compute_coherences(__global const uchar* y, __global const uchar* z, __global const float* distances, __global float* coherences) { int index = get_global_id(0); int sample_size = get_global_size(0); float numerator = 0; float denominator = 0; for (int i = 0; i < sample_size; i++) { int p = z[i] ^ y[index * sample_size + i]; for (int j = 0; j < sample_size; j++) { int q = z[j] ^ y[index * sample_size + j]; float distance = distances[i * sample_size + j]; denominator += distance; if (p == q) { numerator += distance; } } } coherences[index] = numerator / denominator; } __kernel void initialize_sort(__global uint* indices, __global uint* offset) { uint index = get_global_id(0); indices[index] = index; if (index == 0) { *offset = 0; } } __kernel void increment_offset(__global uint* offset) { uint x = *offset; if (x == 0) { *offset = 1; } else { *offset = 0; } } __kernel void sort(__global const float* coherences, __global uint* indices, __global uint* offset) { uint index = get_global_id(0) * 2 + *offset; uint a = indices[index]; uint b = indices[index + 1]; float coherence_a = coherences[a]; float coherence_b = coherences[b]; if (coherence_a < coherence_b) { indices[index] = b; indices[index + 1] = a; } } __kernel void evolve(__global const uchar* program, __global float* coherences) { int index_a = get_global_id(0); }