import hashlib import numpy as np import math import pyopencl as cl import random N = 8 M = 2 sample_size = 64 def encode(v, offset): byte_values = [] for i in range(0, math.ceil(N / 8)): x = 0 for j in range(0, 8): index = i * 8 + j x <<= 1 x |= int(v[offset + index]) byte_values.append(x) return bytearray(x) def sha(v, offset): global M x = encode(v, offset) m = hashlib.sha256() m.update(x) result = m.digest() return result[0] % M def create_program_r(model, output_var): global N, M (constant, scalars, child) = model program = 'int ' + output_var + ' = ' + str(constant) + ';\n' scalars_part = ' + '.join([str(scalars[i]) + ' * x[gid * ' + str(N) + ' + ' + str(i) + ']' for i in range(0, len(scalars)) if scalars[i] > 0]) if len(scalars_part) > 0: program += output_var + ' += ' + scalars_part + ';\n' if not child is None: left_output = output_var + '0' right_output = output_var + '1' (left, right) = child program += create_program_r(left, left_output) program += create_program_r(right, right_output) program += output_var + ' += ' + left_output + ' * ' + right_output + ';\n' program += output_var + ' %= ' + str(M) + ';\n' return program def create_program(model, name, offset): output_var = 'output' program = '__kernel void ' + name + '(__global const int *x, __global int *out) {\n' program += 'int gid = get_global_id(0);\n' program += create_program_r(model, output_var) program += 'out[' + str(offset) + ' + gid] = ' + output_var + ';\n' program += '}\n' return program def distances_program(): global N, sample_size program = "__kernel void p(__global const int *x, __global float *distances) {\n" program += " int gid = get_global_id(0);\n" program += " int i = gid / " + str(sample_size) + ";\n" program += " int j = gid % " + str(sample_size) + ";\n" program += " float distance = 0;\n" program += " if (i == j) {\n" program += " distances[gid] = distance;\n" program += " return;\n" program += " }\n" program += " for (int k = 0; k < " + str(N) + "; k++) {\n" program += " distance += x[i * " + str(N) + " + k] ^ x[j * " + str(N) + " + k];\n" program += " }\n" program += " distances[gid] = pow(2, -distance);\n" program += "}\n" return program def coherence_program(): global sample_size program = "__kernel void p(__global const int *y, __global const int *z, __global const float *distances, __global float *coherences) {\n" program += " int gid = get_global_id(0);\n" program += " float numerator = 0;\n" program += " float denominator = 0;\n" program += " for (int i = 0; i < " + str(sample_size) + "; i++) {\n" program += " int p = z[i] ^ y[gid * " + str(sample_size) + " + i];\n" program += " for (int j = 0; j < " + str(sample_size) + "; j++) {\n" program += " int q = z[j] ^ y[gid * " + str(sample_size) + " + j];\n" program += " float distance = distances[i * " + str(sample_size) + " + j];\n" program += " denominator += distance;\n" program += " if (p == q) {\n" program += " numerator += distance;\n" program += " }\n" program += " }\n" program += " }\n" program += " coherences[gid] = numerator / denominator;\n" program += "}\n" return program def random_sample(): global N, sample_size x = np.zeros((N * sample_size,)).astype(np.int32) for i in range(0, len(x)): x[i] = random.randint(0, 1) return x def clone_model(model, p_mutation): global N, M p_constant = p_mutation * random.random() p_flip = p_mutation * random.random() p_add_child = p_mutation * random.random() p_drop_child = p_mutation * random.random() (constant, xors, child) = model if random.random() < p_constant: constant += random.randint(0, M - 1) constant %= M clone_xors = np.zeros((N,)) np.copyto(clone_xors, xors) for i in range(0, N): if random.random() < p_flip: offset = 1 if M == 2 else random.randint(1, M - 1) clone_xors[i] += offset clone_xors[i] %= M if child is None: if random.random() < p_add_child: left = random_child(p_mutation) right = random_child(p_mutation) return (constant, clone_xors, (left, right)) return (constant, clone_xors, None) if random.random() < p_drop_child: return (constant, clone_xors, None) (left, right) = child clone_left = clone_model(left, p_mutation) clone_right = clone_model(right, p_mutation) return (constant, clone_xors, (clone_left, clone_right)) def random_child(p_mutation): global N, M constant = random.randint(0, M - 1) xors = np.zeros((N,)) p_flip = p_mutation * random.random() p_child = p_mutation * random.random() index = random.randint(0, N - 1) xors[index] = 1 if M == 2 else random.randint(1, M - 1) for i in range(0, N): if i != index and random.random() < p_flip: xors[i] = 1 if M == 2 else random.randint(1, M - 1) if random.random() < p_child: left = random_child(p_mutation * random.random()) right = random_child(p_mutation * random.random()) return (constant, xors, (left, right)) return (constant, xors, None) def null_candidate(): global N return (0, np.zeros((N,)), None) def main(): global N, M, sample_size epochs = 1000 num_survivors = 100 num_offspring = 10 num_candidates = num_survivors + num_survivors * num_offspring local_work_size = (512,) x = random_sample() z = np.zeros((sample_size,)).astype(np.int32) coherences = np.zeros((num_candidates,)).astype(np.float32) ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) mf = cl.mem_flags candidates = [null_candidate() for _ in range(0, num_candidates)] for i in range(0, sample_size): z[i] = sha(x, N * i) x_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=x) z_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=z) distances_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, 4 * sample_size * sample_size) coherences_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, 4 * num_candidates) outputs_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, 4 * sample_size * num_candidates) distances_kernel = cl.Program(ctx, distances_program()).build().p coherence_kernel = cl.Program(ctx, coherence_program()).build().p distances_kernel(queue, (sample_size * sample_size,), local_work_size, x_gpu, distances_gpu) for epoch in range(0, epochs): program = cl.Program(ctx, '\n'.join([create_program(candidates[i], 'k' + '{:0>9}'.format(i), i * sample_size) for i in range(0, num_candidates)])).build() for knl in program.all_kernels(): knl(queue, (sample_size,), local_work_size, x_gpu, outputs_gpu) coherence_kernel(queue, (num_candidates,), local_work_size, outputs_gpu, z_gpu, distances_gpu, coherences_gpu) cl.enqueue_copy(queue, coherences, coherences_gpu) top_n = sorted(range(len(coherences)), key=lambda i: coherences[i])[-num_survivors:] survivors = [candidates[index] for index in top_n] print(epoch, coherences[top_n[-1]]) for i in range(0, num_survivors): candidate = survivors[i] candidates[i] = candidate for j in range(0, num_offspring): index = num_survivors + j * num_survivors + i candidates[index] = clone_model(candidate, random.random()) if __name__ == "__main__": main()