probabilities/mutations_cuda.py

270 lines
9.2 KiB
Python
Raw Normal View History

2023-01-01 23:45:51 +00:00
# Sample source code from the Tutorial Introduction in the documentation.
import hashlib
import numpy as np
import math
import pycuda.driver as cuda
from pycuda.driver import Stream
import pycuda.autoinit
from pycuda.compiler import SourceModule
import pycuda.gpuarray as gpuarray
import random
'''
a = numpy.random.randn(4,4)
a = a.astype(numpy.float32)
a_gpu = cuda.mem_alloc(a.size * a.dtype.itemsize)
cuda.memcpy_htod(a_gpu, a)
mod = SourceModule("""
__global__ void doublify(float *a)
{
int idx = threadIdx.x + threadIdx.y*4;
a[idx] *= 2;
}
""")
func = mod.get_function("doublify")
func(a_gpu, block=(4,4,1))
a_doubled = numpy.empty_like(a)
cuda.memcpy_dtoh(a_doubled, a_gpu)
print("original array:")
print(a)
print("doubled with kernel:")
print(a_doubled)
# alternate kernel invocation -------------------------------------------------
func(cuda.InOut(a), block=(4, 4, 1))
print("doubled with InOut:")
print(a)
# part 2 ----------------------------------------------------------------------
a_gpu = gpuarray.to_gpu(numpy.random.randn(4,4).astype(numpy.float32))
a_doubled = (2*a_gpu).get()
print("original array:")
print(a_gpu)
print("doubled with gpuarray:")
print(a_doubled)
'''
N = 8
M = 2
sample_size = 64
def encode(v, offset):
byte_values = []
for i in range(0, math.ceil(N / 8)):
x = 0
for j in range(0, 8):
index = i * 8 + j
if offset + index >= len(v):
break
x <<= 1
x |= int(v[offset + index])
byte_values.append(x)
return bytearray(x)
def sha(v, offset):
global M
x = encode(v, offset)
m = hashlib.sha256()
m.update(x)
result = m.digest()
return result[0] % M
def create_program_r(model, output_var):
global N, M
(constant, scalars, child) = model
program = 'int ' + output_var + ' = ' + str(constant) + ';\n'
scalars_part = ' + '.join([str(scalars[i]) + ' * x[gid * ' + str(N) + ' + ' + str(i) + ']' for i in range(0, len(scalars)) if scalars[i] > 0])
if len(scalars_part) > 0:
program += output_var + ' += ' + scalars_part + ';\n'
if not child is None:
left_output = output_var + '0'
right_output = output_var + '1'
(left, right) = child
program += create_program_r(left, left_output)
program += create_program_r(right, right_output)
program += output_var + ' += ' + left_output + ' * ' + right_output + ';\n'
program += output_var + ' %= ' + str(M) + ';\n'
return program
def create_program(model, name, offset):
output_var = 'output'
program = '__global__ void ' + name + '(const int *x, int *out) {\n'
program += 'int gid = threadIdx.x + blockIdx.x * blockDim.x;\n'
program += create_program_r(model, output_var)
program += 'out[' + str(offset) + ' + gid] = ' + output_var + ';\n'
program += '}\n'
return program
def distances_program():
global N, sample_size
program = "__global__ void p(const int *x, double *distances) {\n"
program += " int gid = threadIdx.x + blockIdx.x * blockDim.x;\n"
program += " int i = gid / " + str(sample_size) + ";\n"
program += " int j = gid % " + str(sample_size) + ";\n"
program += " if (i == j) {\n"
program += " distances[gid] = 0;\n"
program += " return;\n"
program += " }\n"
program += " int distance = 0;\n"
program += " for (int k = 0; k < " + str(N) + "; k++) {\n"
program += " distance += x[i * " + str(N) + " + k] ^ x[j * " + str(N) + " + k];\n"
program += " }\n"
program += " distances[gid] = pow((double)2.0, (double)-distance);\n"
program += "}\n"
return program
def coherence_program():
global sample_size
program = "__global__ void p(const int *y, const int *z, const double *distances, double *coherences) {\n"
program += " int gid = threadIdx.x + blockIdx.x * blockDim.x;\n"
program += " double numerator = 0;\n"
program += " double denominator = 0;\n"
program += " for (int i = 0; i < " + str(sample_size) + "; i++) {\n"
program += " int p = z[i] ^ y[gid * " + str(sample_size) + " + i];\n"
program += " for (int j = 0; j < " + str(sample_size) + "; j++) {\n"
program += " int q = z[j] ^ y[gid * " + str(sample_size) + " + j];\n"
program += " double distance = distances[i * " + str(sample_size) + " + j];\n"
program += " denominator += distance;\n"
program += " if (p == q) {\n"
program += " numerator += distance;\n"
program += " }\n"
program += " }\n"
program += " }\n"
program += " coherences[gid] = numerator / denominator;\n"
program += "}\n"
return program
def random_sample():
global N, sample_size
x = np.zeros((N * sample_size,)).astype(np.int32)
for i in range(0, len(x)):
x[i] = random.randint(0, 1)
return x
def clone_model(model, p_mutation):
global N, M
p_constant = p_mutation * random.random()
p_flip = p_mutation * random.random()
p_add_child = p_mutation * random.random()
p_drop_child = p_mutation * random.random()
(constant, xors, child) = model
if random.random() < p_constant:
constant += random.randint(0, M - 1)
constant %= M
clone_xors = np.zeros((N,))
np.copyto(clone_xors, xors)
for i in range(0, N):
if random.random() < p_flip:
offset = 1 if M == 2 else random.randint(1, M - 1)
clone_xors[i] += offset
clone_xors[i] %= M
if child is None:
if random.random() < p_add_child:
left = random_child(p_mutation)
right = random_child(p_mutation)
return (constant, clone_xors, (left, right))
return (constant, clone_xors, None)
if random.random() < p_drop_child:
return (constant, clone_xors, None)
(left, right) = child
clone_left = clone_model(left, p_mutation)
clone_right = clone_model(right, p_mutation)
return (constant, clone_xors, (clone_left, clone_right))
def random_child(p_mutation):
global N, M
constant = random.randint(0, M - 1)
xors = np.zeros((N,))
p_flip = p_mutation * random.random()
p_child = p_mutation * random.random()
index = random.randint(0, N - 1)
xors[index] = 1 if M == 2 else random.randint(1, M - 1)
for i in range(0, N):
if i != index and random.random() < p_flip:
xors[i] = 1 if M == 2 else random.randint(1, M - 1)
if random.random() < p_child:
left = random_child(p_mutation * random.random())
right = random_child(p_mutation * random.random())
return (constant, xors, (left, right))
return (constant, xors, None)
def null_candidate():
global N
return (0, np.zeros((N,)), None)
def main():
global N, M, sample_size
epochs = 1000
num_survivors = 100
num_offspring = 10
num_candidates = num_survivors + num_survivors * num_offspring
block_size = 1
x = random_sample()
z = np.zeros((sample_size,)).astype(np.int32)
coherences = np.zeros((num_candidates,)).astype(np.float64)
candidates = [null_candidate() for _ in range(0, num_candidates)]
for i in range(0, sample_size):
z[i] = sha(x, N * i)
# print(z)
x_gpu = cuda.mem_alloc(4 * N * sample_size)
cuda.memcpy_htod(x_gpu, x)
z_gpu = cuda.mem_alloc(4 * sample_size)
cuda.memcpy_htod(z_gpu, z)
distances_gpu = cuda.mem_alloc(8 * sample_size * sample_size)
coherences_gpu = cuda.mem_alloc(8 * num_candidates)
outputs_gpu = cuda.mem_alloc(4 * sample_size * num_candidates)
distances_kernel = SourceModule(distances_program()).get_function('p')
coherence_kernel = SourceModule(coherence_program()).get_function('p')
distances_kernel(x_gpu, distances_gpu, block=(block_size, 1, 1), grid=(int(sample_size * sample_size / block_size), 1, 1))
# distances = np.zeros((sample_size,sample_size)).astype(np.double)
# cuda.memcpy_dtoh(distances, distances_gpu)
# print(distances)
for epoch in range(0, epochs):
mod = SourceModule('\n'.join([create_program(candidates[i], 'k' + str(i), i * sample_size) for i in range(0, num_candidates)]))
stream = Stream()
for i in range(0, num_candidates):
f = mod.get_function('k' + str(i))
f(x_gpu, outputs_gpu, stream=stream, block=(block_size, 1, 1), grid=(int(sample_size / block_size), 1, 1))
stream.synchronize()
# outputs = np.zeros((sample_size * num_candidates,)).astype(np.int32)
# cuda.memcpy_dtoh(outputs, outputs_gpu)
# print(outputs)
coherence_kernel(outputs_gpu, z_gpu, distances_gpu, coherences_gpu, block=(block_size, 1, 1), grid=(int(num_candidates / block_size), 1, 1))
cuda.memcpy_dtoh(coherences, coherences_gpu)
top_n = sorted(range(len(coherences)), key=lambda i: coherences[i])[-num_survivors:]
survivors = [candidates[index] for index in top_n]
print(epoch, coherences[top_n[-1]])
for i in range(0, num_survivors):
candidate = survivors[i]
candidates[i] = candidate
for j in range(0, num_offspring):
index = num_survivors + j * num_survivors + i
candidates[index] = clone_model(candidate, random.random())
if __name__ == "__main__":
main()