From fd2045dfca50c20261356160655b87a23658ac3a Mon Sep 17 00:00:00 2001 From: Rob Taglang Date: Sun, 1 Jan 2023 18:45:51 -0500 Subject: [PATCH] Add probabilities work to git --- 2_point_plot.py | 77 +++ Dockerfile | 6 + README.md | 43 ++ model.txt | 0 model_probabilities.py | 171 +++++++ model_probabilities2.py | 260 ++++++++++ model_probabilities3.py | 463 +++++++++++++++++ model_probabilities4.py | 208 ++++++++ model_probabilities5.py | 219 ++++++++ model_probabilities6.py | 201 ++++++++ model_probabilities7.py | 249 +++++++++ model_probabilities8.py | 219 ++++++++ model_probabilities9.py | 310 ++++++++++++ mutations.cl | 96 ++++ mutations.py | 511 +++++++++++++++++++ mutations10.py | 425 ++++++++++++++++ mutations11.py | 535 ++++++++++++++++++++ mutations12.py | 391 +++++++++++++++ mutations13.py | 447 +++++++++++++++++ mutations14.py | 549 ++++++++++++++++++++ mutations15.py | 628 +++++++++++++++++++++++ mutations16.py | 663 ++++++++++++++++++++++++ mutations17.py | 669 +++++++++++++++++++++++++ mutations18.py | 845 +++++++++++++++++++++++++++++++ mutations19.py | 1052 +++++++++++++++++++++++++++++++++++++++ mutations2.py | 570 +++++++++++++++++++++ mutations20.py | 316 ++++++++++++ mutations21.py | 368 ++++++++++++++ mutations22.py | 405 +++++++++++++++ mutations23.py | 761 ++++++++++++++++++++++++++++ mutations24.py | 656 ++++++++++++++++++++++++ mutations25.py | 791 +++++++++++++++++++++++++++++ mutations26.py | 741 +++++++++++++++++++++++++++ mutations3.py | 541 ++++++++++++++++++++ mutations4.py | 591 ++++++++++++++++++++++ mutations5.py | 417 ++++++++++++++++ mutations6.py | 488 ++++++++++++++++++ mutations7.py | 455 +++++++++++++++++ mutations8.py | 451 +++++++++++++++++ mutations9.py | 414 +++++++++++++++ mutations_cuda.py | 269 ++++++++++ mutations_gpu.py | 207 ++++++++ mutations_opencl.py | 5 + shifts.py | 29 ++ space_analysis.py | 142 ++++++ space_analysis2.py | 255 ++++++++++ space_analysis3.py | 385 ++++++++++++++ space_analysis4.py | 229 +++++++++ train_generator.py | 164 ++++++ 49 files changed, 18887 insertions(+) create mode 100755 2_point_plot.py create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 model.txt create mode 100755 model_probabilities.py create mode 100755 model_probabilities2.py create mode 100755 model_probabilities3.py create mode 100755 model_probabilities4.py create mode 100755 model_probabilities5.py create mode 100755 model_probabilities6.py create mode 100755 model_probabilities7.py create mode 100755 model_probabilities8.py create mode 100755 model_probabilities9.py create mode 100644 mutations.cl create mode 100644 mutations.py create mode 100644 mutations10.py create mode 100644 mutations11.py create mode 100644 mutations12.py create mode 100644 mutations13.py create mode 100644 mutations14.py create mode 100644 mutations15.py create mode 100644 mutations16.py create mode 100644 mutations17.py create mode 100644 mutations18.py create mode 100644 mutations19.py create mode 100644 mutations2.py create mode 100644 mutations20.py create mode 100644 mutations21.py create mode 100644 mutations22.py create mode 100644 mutations23.py create mode 100644 mutations24.py create mode 100644 mutations25.py create mode 100644 mutations26.py create mode 100644 mutations3.py create mode 100644 mutations4.py create mode 100644 mutations5.py create mode 100644 mutations6.py create mode 100644 mutations7.py create mode 100644 mutations8.py create mode 100644 mutations9.py create mode 100644 mutations_cuda.py create mode 100644 mutations_gpu.py create mode 100644 mutations_opencl.py create mode 100644 shifts.py create mode 100644 space_analysis.py create mode 100644 space_analysis2.py create mode 100644 space_analysis3.py create mode 100644 space_analysis4.py create mode 100644 train_generator.py diff --git a/2_point_plot.py b/2_point_plot.py new file mode 100755 index 0000000..1bd44d8 --- /dev/null +++ b/2_point_plot.py @@ -0,0 +1,77 @@ +import numpy as np +import matplotlib.pyplot as plt + +def flip(n, index): + return n ^ (1 << index) + +def distance(i, j): + return bin(i ^ j).count('1') + +def matrix_system_with_two_knowns(p, q, N): + S = 2 ** N + mat = np.zeros((S, S)) + val = np.zeros(S) + for i in range(0, S): + if i == p: + mat[i][i] = 1.0 + val[i] = 1.0 + elif i == q: + mat[i][i] = 1.0 + else: + mat[i][i] = -1.0 + for j in range(0, N): + mat[i][flip(i,j)] = 1.0 / N + return (mat, val) + +def main(): + final_values = [] + final_x = [] + final_y = [] + + for N in range(11, 12): + print(N) + S = 2 ** N + distances = np.zeros((S, S)) + for i in range(0, S): + for j in range(0, S): + distances[i][j] = distance(i,j) + + # final_values = [] + # final_basis = [] + visited_distances = set() + for p in range(0, S): + for q in range(p + 1, S): + pq_distance = distances[p, q] + if pq_distance in visited_distances: + continue + visited_distances.add(pq_distance) + (mat, val) = matrix_system_with_two_knowns(p, q, N) + solution = np.linalg.inv(mat).dot(val) + for i in range(0, len(solution)): + final_x.append(distances[i, p] / N) + final_y.append(distances[i, q] / N) + final_values.append(solution[i]) + + # values = list(set(solution)) + # values.sort() + # if len(values) <= 1: + # continue + # basis = [1.0 * i / (len(values) - 1) for i in range(len(values))] + + # final_values.extend(values) + # final_basis.extend(basis) + + # fig, ax = plt.subplots() + # ax.scatter(final_values, final_basis) + + # print(np.linalg.lstsq((final_x, final_y), final_values)) + + fig = plt.figure() + ax = fig.add_subplot(projection='3d') + ax.scatter(final_x, final_y, final_values) + + ax.grid(True) + plt.show() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..62464b8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM nvidia/cuda:11.6.0-devel-ubuntu20.04 +RUN apt-get update && apt-get install -y python3 python3-pip +RUN pip install numpy pycuda +WORKDIR /app +COPY mutations_cuda.py /app/mutations_cuda.py +CMD ["python3", "-u", "mutations_cuda.py"] \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..bd4b1b7 --- /dev/null +++ b/README.md @@ -0,0 +1,43 @@ +Terminology: + +Sample space 'S' has 'n' bits, and there is a function 'f', that maps 'x' (an n-bit vector) in 'S' to 'y'. + +f(x) = y + +We can use PCA to generate candidates for some sub-sample of 'S', 'P'. Candidates that exhibit generalization +properties (score higher than the previous generation on a sub-sample they haven't seen before, 'Q') can be +cascaded into the input for training the next generation of candidates. + +This candidate generation process is 'G'. 'G' is considered to perform well if the candidates that it +generates exhibit generalization properties. + +To bootstrap, we can use PCA for 'G' and store the state machine instructions 'S_G' for creating the highest-performing +candidates on a particular problem 'f' as a sample space for training a new generator 'G_n'. + +Use 'G' to generate candidates for 'G_n'. Training samples come from 'S_G', but candidates should be evaluated +based on how well the candidates they generate perform on 'f'. + +So, we need to be able to score a particular g e G_n. We can evaluate for a fixed number of epochs and use some combination +of the average difficulty and evaluation score. + +A generator G is a state machine with input + +G(|j-bit step|m * n-bit inputs|) = y + +Where y is a bit in an instruction. + +'a' is an address in 'A' |log2(n)| + +|opcode 2-bit| +|00 - xor| +|01 - end| +|10 - and| +|11 - nand| + +xor is followed by an address 'a' for an input bit. + +This process can be repeated indefinitely, replacing 'G' with 'G_n' to create new generators that outperform the previous +generation for solving 'f'. + +A candidate is a state machine with input +f(|n-bit input|) = y diff --git a/model.txt b/model.txt new file mode 100644 index 0000000..e69de29 diff --git a/model_probabilities.py b/model_probabilities.py new file mode 100755 index 0000000..15c80af --- /dev/null +++ b/model_probabilities.py @@ -0,0 +1,171 @@ +import math +from statistics import median + +def count_one_bits(n): + return bin(n).count("1") + +def compute_distance(a, b): + distance = count_one_bits(a ^ b) + # return 1 / (8 ** distance) + return 1 / (2 ** distance) + +def xor_n(n): + return count_one_bits(n) % 2 + +def compute_distances(N): + return [[compute_distance(i, j) for j in range(N)] for i in range(N)] + +def compute_nn_probabilities(i, knowns, distances): + total = 0.0 + total_zero = 0.0 + total_one = 0.0 + for known in knowns: + j = known[0] + distance = distances[i][j] + total += distance + if known[1] == 0: + total_zero += distance + else: + total_one += distance + p_zero = total_zero / total + p_one = total_one / total + return (p_zero, p_one) + +def compute_est_coherence(i, knowns, coherences, distances): + total = 0.0 + coherence = 0.0 + for known in knowns: + j = known[0] + distance = distances[i][j] + total += distance + coherence += distance * coherences[j] + return coherence / total + +def compute_est_coherences(N, knowns, distances): + nn_probabilities = [None for i in range(N)] + est_coherences = [None for i in range(N)] + + # for known in knowns: + # i = known[0] + # nn_probabilities[i] = compute_nn_probabilities(i, knowns, distances) + for known in knowns: + i = known[0] + nn_probabilities[i] = (1.0 - known[1], 1.0 * known[1]) + + for i in range(len(nn_probabilities)): + if not nn_probabilities[i] is None: + continue + nn_probabilities[i] = compute_nn_probabilities(i, knowns, distances) + + print(nn_probabilities) + + for i in range(len(nn_probabilities)): + total = 0.0 + coherence = 0.0 + p_i = nn_probabilities[i] + for j in range(len(nn_probabilities)): + if i == j: + continue + p_j = nn_probabilities[j] + distance = distances[i][j] + total += distance + coherence += (p_i[0] * p_j[0] + p_i[1] * p_j[1]) * distance + # print(coherence, total) + est_coherences[i] = coherence / total + + # for known in knowns: + # i = known[0] + # est_coherences[i] = nn_probabilities[i][known[1]] + + # for i in range(len(est_coherences)): + # if not est_coherences[i] is None: + # continue + # est_coherences[i] = compute_est_coherence(i, knowns, est_coherences, distances) + + # print(est_coherences) + + return est_coherences + +def score(coherences): + # while len(coherences) > 1: + # coherences = [(coherences[i] + coherences[i + 1]) / 2 for i in range(0, len(coherences), 2)] + # return coherences[0] + + # return median(coherences) + return sum(coherences) / len(coherences) + +def xor_by_index(knowns, index): + mask = 1 << index + knowns = knowns[:] + for i in range(len(knowns)): + known = knowns[i] + if known[0] & mask: + knowns[i] = (known[0], known[1] ^ 1) + return knowns + +def main(): + n = 3 + N = 2 ** n + distances = compute_distances(N) + + knowns = [(i, xor_n(i)) for i in [ + 0, 3, 4, 5, 7 + # 3, 5, 6, 10, 12, 14 + # 1, 3, 7, 10, 14, 15 + # 0, 3, 5, 6, 10, 11, 12 + # 0, 3, 5, 6, 10, 11, 12, 24, 30 + ]] + print(knowns) + print() + + # knowns = [ + # (1, 1), + # (3, 0), + # (7, 1), + # (10, 0), + # (14, 1), + # (15, 0) + # ] + + # knowns = [ + # (0, 0), + # (3, 0), + # (4, 1), + # (5, 0), + # (7, 1) + # ] + + # knowns = [ + # (0, 0), + # (1, 1), + # (2, 1), + # (3, 0), + # (4, 1), + # (5, 0), + # (6, 0), + # (7, 1) + # ] + + coherences = compute_est_coherences(N, knowns, distances) + best_coherence = score(coherences) + print(best_coherence) + + while best_coherence < 1.0: + print() + # print(knowns) + # print() + best_index = -1 + for i in range(0, n): + coherences = compute_est_coherences(N, xor_by_index(knowns, i), distances) + coherence = score(coherences) + print(coherence) + if coherence > best_coherence: + best_coherence = coherence + best_index = i + if best_index < 0: + break + knowns = xor_by_index(knowns, best_index) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/model_probabilities2.py b/model_probabilities2.py new file mode 100755 index 0000000..c73651b --- /dev/null +++ b/model_probabilities2.py @@ -0,0 +1,260 @@ +import math +from statistics import median, stdev + +def count_one_bits(n): + return bin(n).count("1") + +def compute_distance(a, b): + distance = count_one_bits(a ^ b) + # return 1 / (8 ** distance) + if distance == 0: + return 0 + # return 1 / (64 ** (distance - 1)) + return distance + +def xor_n(n): + return count_one_bits(n) % 2 + +def compute_distances(N): + return [[compute_distance(i, j) for j in range(N)] for i in range(N)] + +def compute_nn_probabilities(i, knowns, distances): + total = 0.0 + total_zero = 0.0 + total_one = 0.0 + for known in knowns: + j = known[0] + if i == j: + continue + distance = distances[i][j] + total += distance + if known[1] == 0: + total_zero += distance + else: + total_one += distance + p_zero = total_zero / total + p_one = total_one / total + return (p_zero, p_one) + +def interpolate_probabilities(i, knowns, distances, probabilities, dim): + total = 0.0 + total_dim = [0.0] * dim + for known in knowns: + j = known[0] + if i == j: + continue + distance = distances[i][j] + total += distance + probability = probabilities[j] + for index in range(dim): + total_dim[index] += distance * probability[index] + for index in range(dim): + total_dim[index] /= total + return total_dim + +def compute_est_coherence(i, knowns, coherences, distances): + total = 0.0 + coherence = 0.0 + for known in knowns: + j = known[0] + distance = distances[i][j] + total += distance + coherence += distance * coherences[j] + return coherence / total + +def compute_est_coherences(N, knowns, distances): + nn_probabilities = [None for i in range(N)] + nn_correct_probabilities = [None for i in range(N)] + coherences = [] + + for known in knowns: + i = known[0] + nn_probabilities[i] = compute_nn_probabilities(i, knowns, distances) + + # for i in range(len(nn_probabilities)): + # if not nn_probabilities[i] is None: + # continue + # nn_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_probabilities, 2) + + for known in knowns: + i = known[0] + nn_correct_probabilities[i] = [nn_probabilities[i][known[1]]] + + # for i in range(len(nn_correct_probabilities)): + # if not nn_correct_probabilities[i] is None: + # continue + # nn_correct_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_correct_probabilities, 1) + + coherences_0 = [] + coherences_1 = [] + for known_i in knowns: + i = known_i[0] + coherence = 0.0 + total = 0.0 + for known_j in knowns: + j = known_j[0] + if i == j: + continue + + distance = distances[i][j] + total += distance + + nn_p_i_0 = nn_probabilities[i][0] + nn_p_i_1 = nn_probabilities[i][1] + nn_c_p_i = nn_correct_probabilities[i][0] + + nn_p_j_0 = nn_probabilities[j][0] + nn_p_j_1 = nn_probabilities[j][1] + nn_c_p_j = nn_correct_probabilities[j][0] + + p_i_0 = nn_p_i_0 * nn_c_p_i + nn_p_i_1 * (1 - nn_c_p_i) + p_i_1 = nn_p_i_1 * nn_c_p_i + nn_p_i_0 * (1 - nn_c_p_i) + + p_j_0 = nn_p_j_0 * nn_c_p_j + nn_p_j_1 * (1 - nn_c_p_j) + p_j_1 = nn_p_j_1 * nn_c_p_j + nn_p_j_0 * (1 - nn_c_p_j) + + coherence += distance * (p_i_0 * p_j_0 + p_i_1 * p_j_1) + coherences.append(coherence / total) + if known_i[1] == 0: + coherences_0.append(coherence / total) + else: + coherences_1.append(coherence / total) + + return coherences + +def score(coherences, knowns, distances): + # while len(coherences) > 1: + # coherences = [(coherences[i] + coherences[i + 1]) / 2 for i in range(0, len(coherences), 2)] + # return coherences[0] + + # return median(coherences) + # return sum(coherences) / len(coherences) + if len(coherences) == 0: + return 1.0 + numerator_0 = 0.0 + denominator_0 = 0.0 + numerator_1 = 0.0 + denominator_1 = 0.0 + count_0 = 0.0 + count_1 = 0.0 + for i in range(len(knowns)): + weight = 0 + for j in range(len(knowns)): + weight += distances[knowns[i][0]][knowns[j][0]] + print(weight, end=' ') + if knowns[i][1] == 0: + denominator_0 += weight + numerator_0 += weight * coherences[i] + count_0 += 1 + else: + denominator_1 += weight + numerator_1 += weight * coherences[i] + count_1 += 1 + # print() + if count_0 == 0 or count_1 == 0: + return 1.0 + + # return ((sum(coherences[0]) / len(coherences[0])) + (sum(coherences[1]) / len(coherences[1]))) / 2.0 + # return (sum(coherences[0]) + sum(coherences[1])) / (len(coherences[0]) + len(coherences[1])) + # div_0 = (numerator_0 / denominator_0 if denominator_0 > 0 else 1.0) * 0.5 + # div_1 = (numerator_1 / denominator_1 if denominator_1 > 0 else 1.0) * 0.5 + # return div_0 + div_1 + # aligned = 1.0 - abs(0.5 - max(count_0 / (count_0 + count_1), count_1 / (count_0 + count_1))) + # return ((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) * (aligned ** 0.1) + # return (((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) + 0.12 * aligned) * (1.0 / 1.12) + return (numerator_0 + numerator_1) / (denominator_0 + denominator_1) + +def xor_by_index(knowns, index): + mask = 1 << index + knowns = knowns[:] + for i in range(len(knowns)): + known = knowns[i] + if known[0] & mask: + knowns[i] = (known[0], known[1] ^ 1) + return knowns + +def main(): + n = 8 + N = 2 ** n + distances = compute_distances(N) + + knowns = [(i, xor_n(i)) for i in [ + # 0, 3, 4, 5, 7 + # 3, 5, 6, 10, 12, 14 + # 1, 3, 7, 10, 14, 15 + # 0, 3, 5, 6, 10, 11, 12 + 0, 3, 5, 6, 10, 11, 12, 24, 30 + # 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127 + # 128, 131, 248, 0, 7, 13, 17, 19 + ]] + + for known_i in knowns: + i = known_i[0] + for known_j in knowns: + j = known_j[0] + print(distances[i][j], end=' ') + print() + + print(knowns) + print() + + # knowns = [ + # (1, 1), + # (3, 0), + # (7, 1), + # (10, 0), + # (14, 1), + # (15, 0) + # ] + + # knowns = [ + # (0, 0), + # (3, 0), + # (4, 1), + # (5, 0), + # (7, 1) + # ] + + # knowns = [ + # (0, 0), + # (1, 1), + # (2, 1), + # (3, 0), + # (4, 1), + # (5, 0), + # (6, 0), + # (7, 1) + # ] + + coherences = compute_est_coherences(N, knowns, distances) + best_coherence = score(coherences, knowns, distances) + print(best_coherence) + + flipped = [] + while best_coherence < 1.0: + print() + # print(knowns) + # print() + best_index = -1 + # best_coherence = 0 + for i in range(0, n): + if i in flipped: + continue + mutated_knowns = xor_by_index(knowns, i) + coherences = compute_est_coherences(N, mutated_knowns, distances) + coherence = score(coherences, mutated_knowns, distances) + # print(coherence) + print(coherence, end=' ') + print(mutated_knowns) + if coherence > best_coherence: + best_coherence = coherence + best_index = i + if best_index < 0: + break + knowns = xor_by_index(knowns, best_index) + # flipped.append(best_index) + print(knowns) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/model_probabilities3.py b/model_probabilities3.py new file mode 100755 index 0000000..a0980e2 --- /dev/null +++ b/model_probabilities3.py @@ -0,0 +1,463 @@ +import hashlib +import math +from statistics import median, stdev +import numpy as np + +def count_one_bits(n): + return bin(n).count("1") + +def compute_distance(a, b): + distance = count_one_bits(a ^ b) + # return 1 / (8 ** distance) + if distance == 0: + return 0 + # return 1 / (64 ** (distance - 1)) + return distance + +def xor_n(n): + return count_one_bits(n) % 2 + +def sha_n(n): + m = hashlib.sha256() + m.update(str(n).encode("utf-8")) + result = m.digest() + return result[0] & 0b1 + +def compute_distances(N): + return [[compute_distance(i, j) for j in range(N)] for i in range(N)] + +def compute_nn_probabilities(i, knowns, distances): + total = 0.0 + total_zero = 0.0 + total_one = 0.0 + for known in knowns: + j = known[0] + if i == j: + continue + distance = distances[i][j] + total += distance + if known[1] == 0: + total_zero += distance + else: + total_one += distance + p_zero = total_zero / total + p_one = total_one / total + return (p_zero, p_one) + +def interpolate_probabilities(i, knowns, distances, probabilities, dim): + total = 0.0 + total_dim = [0.0] * dim + for known in knowns: + j = known[0] + if i == j: + continue + distance = distances[i][j] + total += distance + probability = probabilities[j] + for index in range(dim): + total_dim[index] += distance * probability[index] + for index in range(dim): + total_dim[index] /= total + return total_dim + +def compute_est_coherence(i, knowns, coherences, distances): + total = 0.0 + coherence = 0.0 + for known in knowns: + j = known[0] + distance = distances[i][j] + total += distance + coherence += distance * coherences[j] + return coherence / total + +def compute_est_coherences(N, knowns, distances): + nn_probabilities = [None for i in range(N)] + nn_correct_probabilities = [None for i in range(N)] + coherences = [] + + for known in knowns: + i = known[0] + nn_probabilities[i] = compute_nn_probabilities(i, knowns, distances) + + # for i in range(len(nn_probabilities)): + # if not nn_probabilities[i] is None: + # continue + # nn_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_probabilities, 2) + + for known in knowns: + i = known[0] + nn_correct_probabilities[i] = [nn_probabilities[i][known[1]]] + + # for i in range(len(nn_correct_probabilities)): + # if not nn_correct_probabilities[i] is None: + # continue + # nn_correct_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_correct_probabilities, 1) + + coherences_0 = [] + coherences_1 = [] + for known_i in knowns: + i = known_i[0] + coherence = 0.0 + total = 0.0 + for known_j in knowns: + j = known_j[0] + if i == j: + continue + + distance = distances[i][j] + total += distance + + nn_p_i_0 = nn_probabilities[i][0] + nn_p_i_1 = nn_probabilities[i][1] + nn_c_p_i = nn_correct_probabilities[i][0] + + nn_p_j_0 = nn_probabilities[j][0] + nn_p_j_1 = nn_probabilities[j][1] + nn_c_p_j = nn_correct_probabilities[j][0] + + p_i_0 = nn_p_i_0 * nn_c_p_i + nn_p_i_1 * (1 - nn_c_p_i) + p_i_1 = nn_p_i_1 * nn_c_p_i + nn_p_i_0 * (1 - nn_c_p_i) + + p_j_0 = nn_p_j_0 * nn_c_p_j + nn_p_j_1 * (1 - nn_c_p_j) + p_j_1 = nn_p_j_1 * nn_c_p_j + nn_p_j_0 * (1 - nn_c_p_j) + + coherence += distance * (p_i_0 * p_j_0 + p_i_1 * p_j_1) + coherences.append(coherence / total) + if known_i[1] == 0: + coherences_0.append(coherence / total) + else: + coherences_1.append(coherence / total) + + return coherences + +def score(coherences, knowns, distances): + # while len(coherences) > 1: + # coherences = [(coherences[i] + coherences[i + 1]) / 2 for i in range(0, len(coherences), 2)] + # return coherences[0] + + # return median(coherences) + # return sum(coherences) / len(coherences) + if len(coherences) == 0: + return 1.0 + numerator_0 = 0.0 + denominator_0 = 0.0 + numerator_1 = 0.0 + denominator_1 = 0.0 + count_0 = 0.0 + count_1 = 0.0 + for i in range(len(knowns)): + weight = 0 + for j in range(len(knowns)): + weight += distances[knowns[i][0]][knowns[j][0]] + print(weight, end=' ') + if knowns[i][1] == 0: + denominator_0 += weight + numerator_0 += weight * coherences[i] + count_0 += 1 + else: + denominator_1 += weight + numerator_1 += weight * coherences[i] + count_1 += 1 + # print() + if count_0 == 0 or count_1 == 0: + return 1.0 + + # return ((sum(coherences[0]) / len(coherences[0])) + (sum(coherences[1]) / len(coherences[1]))) / 2.0 + # return (sum(coherences[0]) + sum(coherences[1])) / (len(coherences[0]) + len(coherences[1])) + # div_0 = (numerator_0 / denominator_0 if denominator_0 > 0 else 1.0) * 0.5 + # div_1 = (numerator_1 / denominator_1 if denominator_1 > 0 else 1.0) * 0.5 + # return div_0 + div_1 + # aligned = 1.0 - abs(0.5 - max(count_0 / (count_0 + count_1), count_1 / (count_0 + count_1))) + # return ((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) * (aligned ** 0.1) + # return (((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) + 0.12 * aligned) * (1.0 / 1.12) + return (numerator_0 + numerator_1) / (denominator_0 + denominator_1) + +def xor_by_index(knowns, index, reverse=False): + mask = 1 << index + knowns = knowns[:] + for i in range(len(knowns)): + known = knowns[i] + if known[0] & mask or (not (known[0] & mask) and reverse): + knowns[i] = (known[0], known[1] ^ 1) + return knowns + +def flip(n, index): + return n ^ (1 << index) + +def matrix_from_knowns(knowns, N): + S = 2 ** N + mat = np.zeros((S, S)) + val = np.zeros(S) + unknowns = set([i for i in range(0, S)]) + for (i, value) in knowns: + mat[i][i] = 1.0 + val[i] = value + unknowns.remove(i) + for i in unknowns: + mat[i][i] = -1.0 + for j in range(0, N): + mat[i][flip(i,j)] = 1.0 / N + return (mat, val) + +def compute_splits(knowns, N): + splits = [] + for i in range(0, N): + mask = 1 << i + left_0 = 0 + left_1 = 0 + right_0 = 0 + right_1 = 0 + for (j, value) in knowns: + if j & mask == 0: + if value == 0: + left_0 += 1 + else: + left_1 += 1 + else: + if value == 0: + right_0 += 1 + else: + right_1 += 1 + print((left_0, left_1), (right_0, right_1)) + left_ratio = min(left_0, left_1) / (left_0 + left_1) + right_ratio = min(right_0, right_1) / (right_0 + right_1) + # print(left_ratio, right_ratio) + splits.append((left_ratio + right_ratio) / 2) + return splits + +def compute_coherence(knowns, N): + S = 2 ** N + # (mat, val) = matrix_from_knowns(knowns, N) + # solution = np.linalg.inv(mat).dot(val) + # for it in range(0, 1000): + # next = np.zeros(len(solution)) + # for i in range(0, len(solution)): + # sum = 0.0 + # for j in range(0, N): + # sum += solution[flip(i,j)] + # next[i] = sum / N + # solution = next + # return 0.0 + + # coherence_0 = 0.0 + # coherence_1 = 0.0 + # zeros = 0.0 + # ones = 0.0 + # lowest = 1.0 + # print() + (mat, val) = matrix_from_knowns(knowns, N) + A = np.linalg.inv(mat).dot(val) + knowns_nn = [] + for known_index in range(0, len(knowns)): + (mat, val) = matrix_from_knowns(knowns[:known_index] + knowns[known_index + 1:], N) + solution = np.linalg.inv(mat).dot(val) + (i, value) = knowns[known_index] + value_nn = solution[i] + knowns_nn.append((i, value_nn)) + (mat, val) = matrix_from_knowns(knowns_nn, N) + B = np.linalg.inv(mat).dot(val) + return 1.0 - (sum(abs(A - B)) / len(A)) + # # print(A) + # # print(B) + # A_sub_B = A - B + # print(A) + # print(B) + # print(A) + # print(B) + # print(np.dot(A, B) / len(A)) + # return 1.0 - (np.dot(A_sub_B, A_sub_B) / len(A)) + # print(i, value, value_nn, partial) + # coherence += ((value * value_nn) + ((1 - value) * (1 - value_nn))) / len(knowns) + # if value == 0: + # coherence_0 += partial + # zeros += 1 + # else: + # coherence_1 += partial + # ones += 1 + # if zeros == 0 or ones == 0: + # return 1.0 + # return 0.5 * coherence_0 / zeros + 0.5 * coherence_1 / ones + + # coherences = np.zeros(S) + # (mat, val) = matrix_from_knowns(knowns, N) + # solution = np.linalg.inv(mat).dot(val) + # print(solution) + # for i in range(0, S): + # p = solution[i] + # coherence = 0.0 + # for j in range(0, N): + # q = solution[flip(i,j)] + # coherence += ((p * q) + ((1 - p) * (1 - q))) / N + # coherences[i] = coherence + # print(coherences) + # return sum(coherences) / len(coherences) + +def compute_split_knowns(knowns, N): + sum = 0 + splits = [] + for i in range(0, N): + mask = 1 << i + left = [] + right = [] + for (j, value) in knowns: + k = (j & ((1 << i) - 1)) | ((j & ~((1 << (i + 1)) - 1)) >> 1) + masked_known = (k, value) + if j & mask == 0: + left.append(masked_known) + else: + right.append(masked_known) + left_coherence = compute_coherence(left, N - 1) + right_coherence = compute_coherence(right, N - 1) + splits.append((left_coherence, right_coherence)) + sum += min(left_coherence, right_coherence) * (1.0 - abs(left_coherence - right_coherence)) + # print() + # print(splits) + # print() + return sum / N + +def remove_bit(i, n): + return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1) + +def compute_split_knowns_r(knowns, N): + if len(knowns) == 0: + raise ValueError('This should never happen') + + hist = np.zeros(N) + for i in range(0, N): + mask = 1 << i + for (j, value) in knowns: + if j & mask == 0: + hist[i] += 1 + + constant_bits = [] + for i in range(0, N): + if hist[i] == 0 or hist[i] == len(knowns): + constant_bits.append(i) + + if len(constant_bits) > 0: + constant_bits.reverse() + for n in constant_bits: + reduced_knowns = [] + for (j, value) in knowns: + reduced_knowns.append((remove_bit(j, n), value)) + knowns = reduced_knowns + return compute_split_knowns_r(knowns, N - len(constant_bits)) + + if len(knowns) == 1: + return 1.0 + if len(knowns) == 2: + if knowns[0][1] == knowns[1][1]: + return 1.0 + else: + return 0.0 + + sum = 0 + for i in range(0, N): + mask = 1 << i + left = [] + right = [] + for (j, value) in knowns: + k = remove_bit(j, i) + masked_known = (k, value) + if j & mask == 0: + left.append(masked_known) + else: + right.append(masked_known) + + # left_correctness = max(left_0_count, left_1_count) / (left_0_count + left_1_count) if left_0_count > 0 and left_1_count > 0 else 1.0 + # right_correctness = max(right_0_count, right_1_count) / (right_0_count + right_1_count) if right_0_count > 0 and right_1_count > 0 else 1.0 + left_coherence = compute_split_knowns_r(left, N - 1) + right_coherence = compute_split_knowns_r(right, N - 1) + evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0 + # sum += min(left_coherence, right_coherence) * (evenness ** 2) + # delta = 1.0 - ((left_coherence - right_coherence) * (left_coherence - right_coherence)) + sum += 0.7 * min(left_coherence, right_coherence) + 0.3 * evenness ** 2 + # sum += min(left_coherence, right_coherence) * (1.0 - abs(left_coherence - right_coherence)) + return sum / N + +def main(): + N = 8 + S = 2 ** N + distances = compute_distances(S) + + knowns = [(i, sha_n(i)) for i in [ + 0, 1, 2, 3, 4, 5, 6, 7 + # 0, 3, 4, 5, 7 + # 3, 5, 6, 10, 12, 14 + # 1, 3, 7, 10, 14, 15 + # 0, 3, 5, 6, 10, 11, 12 + # 0, 3, 5, 6, 10, 11, 12, 24, 30 + # 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127 + # 128, 131, 248, 0, 7, 13, 17, 19 + ]] + + # best_coherence = compute_coherence(knowns, N) + best_coherence = compute_split_knowns_r(knowns, N) + print(best_coherence) + print(knowns) + print() + while best_coherence < 1.0: + best_index = -1 + best_reverse = False + # best_coherence = 0 + for i in range(0, N): + for reverse in [False, True]: + mutated_knowns = xor_by_index(knowns, i, reverse) + # coherence = compute_coherence(mutated_knowns, N) + coherence = compute_split_knowns_r(mutated_knowns, N) + print(i, reverse, coherence) + if coherence > best_coherence: + best_coherence = coherence + best_index = i + best_reverse = reverse + if best_index < 0: + break + knowns = xor_by_index(knowns, best_index, best_reverse) + print() + print(best_index, best_reverse, best_coherence) + print(knowns) + print() + print(knowns) + + # for known_i in knowns: + # i = known_i[0] + # for known_j in knowns: + # j = known_j[0] + # print(distances[i][j], end=' ') + # print() + + # print(knowns) + # print() + + # coherences = compute_est_coherences(N, knowns, distances) + # best_coherence = score(coherences, knowns, distances) + # print(best_coherence) + + # flipped = [] + # while best_coherence < 1.0: + # print() + # # print(knowns) + # # print() + # best_index = -1 + # # best_coherence = 0 + # for i in range(0, n): + # if i in flipped: + # continue + # mutated_knowns = xor_by_index(knowns, i) + # coherences = compute_est_coherences(N, mutated_knowns, distances) + # coherence = score(coherences, mutated_knowns, distances) + # # print(coherence) + # print(coherence, end=' ') + # print(mutated_knowns) + # if coherence > best_coherence: + # best_coherence = coherence + # best_index = i + # if best_index < 0: + # break + # knowns = xor_by_index(knowns, best_index) + # # flipped.append(best_index) + # print(knowns) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/model_probabilities4.py b/model_probabilities4.py new file mode 100755 index 0000000..bc2821f --- /dev/null +++ b/model_probabilities4.py @@ -0,0 +1,208 @@ +import hashlib +import math +from statistics import median, stdev +import numpy as np +import random + +def count_one_bits(n): + return bin(n).count("1") + +def xor_n(n): + return count_one_bits(n) % 2 + +def sha_n(n): + m = hashlib.sha256() + m.update(str(n).encode("utf-8")) + result = m.digest() + return result[0] & 0b1 + +def xor_by_index(knowns, index, reverse=False): + mask = 1 << index + knowns = knowns[:] + for i in range(len(knowns)): + known = knowns[i] + if known[0] & mask or (not (known[0] & mask) and reverse): + knowns[i] = (known[0], known[1] ^ 1) + return knowns + +def remove_bit(i, n): + return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1) + +def split_at(knowns, N, i): + mask = 1 << i + left = [(remove_bit(j, i), value) for (j, value) in knowns if (j & mask) == 0] + right = [(remove_bit(j, i), value) for (j, value) in knowns if not (j & mask) == 0] + return (left, right) + +def factor_at(knowns, N, i, identity_value=1): + mask = 1 << i + left = [(j, value) for (j, value) in knowns if value == identity_value or (j & mask) == 0] + right = [(j, value) for (j, value) in knowns if value == identity_value or not (j & mask) == 0] + return (left, right) + +def compute_coherence(pair, N, depth = 0): + (left, right) = pair + (left_depth, left_coherence) = compute_split_knowns_r(left, N, depth) + (right_depth, right_coherence) = compute_split_knowns_r(right, N, depth) + ratio = min(len(left), len(right)) / max(len(left), len(right)) + # evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0 + evenness = left_coherence - right_coherence + # return 0.75 * min(left_coherence, right_coherence) + 0.25 * evenness ** 2 + # return 0.8 * min(left_coherence, right_coherence) + 0.2 * evenness ** 2 + coherence = left_coherence if left_depth > right_depth else right_coherence if right_depth > left_depth else (left_coherence + right_coherence) / 2.0 + depth = max(left_depth, right_depth) + return (depth, 0.9 * coherence + 0.1 * (1.0 - (evenness ** 2))) + +def compute_split_knowns_r(knowns, N, depth = 0): + if len(knowns) == 0: + return (depth, 1.0) + + hist = np.zeros(N) + for i in range(0, N): + mask = 1 << i + for (j, value) in knowns: + if j & mask == 0: + hist[i] += 1 + + constant_bits = [i for i in range(0, N) if hist[i] == 0 or hist[i] == len(knowns)] + if len(constant_bits) > 0: + constant_bits.reverse() + for n in constant_bits: + knowns = [(remove_bit(j, n), value) for (j, value) in knowns] + return compute_split_knowns_r(knowns, N - len(constant_bits), depth) + + if len(knowns) == 1: + return (depth, 1.0) + if len(knowns) == 2: + if knowns[0][1] == knowns[1][1]: + return (depth, 1.0) + else: + return (depth, 0.0) + + sum = 0 + denominator = 0 + for i in range(0, N): + (left, right) = split_at(knowns, N, i) + (depth, partial) = compute_coherence((left, right), N, depth + 1) + sum += depth * partial + denominator += depth + return (depth, sum / denominator) + +def invert(knowns): + inverted_knowns = [] + for (i, value) in knowns: + inverted_knowns.append((i, 1 - value)) + return inverted_knowns + +def reduce(knowns, N): + flips = [] + (depth, best_coherence) = compute_split_knowns_r(knowns, N) + print(best_coherence) + print(knowns) + print() + while best_coherence < 1.0: + best_index = -1 + best_reverse = False + # best_coherence = 0 + for i in range(0, N): + for reverse in [False, True]: + mutated_knowns = xor_by_index(knowns, i, reverse) + # coherence = compute_coherence(mutated_knowns, N) + (depth, coherence) = compute_split_knowns_r(mutated_knowns, N) + print(i, reverse, coherence) + if coherence > best_coherence: + best_coherence = coherence + best_index = i + best_reverse = reverse + if best_index < 0: + break + knowns = xor_by_index(knowns, best_index, best_reverse) + flips.append((best_index, best_reverse)) + print() + print(best_index, best_reverse, best_coherence) + print(knowns) + print() + return (knowns, best_coherence, flips) + +def solve(knowns, N): + (knowns, coherence, flips) = reduce(knowns, N) + if coherence == 1.0: + inverted = knowns[0][1] + return (inverted, flips, None) + + raise Exception('Stop') + + best_coherence = 0 + best_index = -1 + best_identity_value = False + print() + for i in range(0, N): + for identity_value in [0, 1]: + coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N) + print(i, identity_value, coherence) + if coherence > best_coherence: + best_coherence = coherence + best_index = i + best_identity_value = identity_value + print() + (left, right) = factor_at(knowns, N, best_index, best_identity_value) + return (0, flips, (best_identity_value, solve(left, N), solve(right, N))) + +def evaluate(model, n, value = 0): + (inverted, flips, child) = model + for (i, invert) in flips: + mask = (1 << i) + masked_n = n & mask + if (masked_n > 0 and not invert) or (masked_n == 0 and invert): + value = 1 - value + if not child is None: + (identity, left_child, right_child) = child + left = evaluate(left_child, n, 1 - identity) + right = evaluate(right_child, n, 1 - identity) + if left and right: + value = 1 - value + if identity == 0: + value = 1 - value + if inverted: + value = 1 - value + return value + +def main(): + N = 8 + S = 2 ** N + train_size = 16 + test_size = 100 + f = xor_n + + knowns = [(i, f(i)) for i in [ + # 0, 1, 2, 3, 4, 5, 6, 7 + # 0, 3, 4, 5, 7 + # 3, 5, 6, 10, 12, 14 + # 1, 3, 7, 10, 14, 15 + # 0, 3, 5, 6, 10, 11, 12 + # 0, 3, 5, 6, 10, 11, 12, 24, 30 + # 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127 + # 128, 131, 248, 0, 7, 13, 17, 19 + 23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255 + ]] + + # f = xor_n + # knowns = [] + # train_samples = set() + # for i in range(0, train_size): + # k = random.randint(0, S) + # while k in train_samples: + # k = random.randint(0, S) + # knowns.append((k, f(i))) + # train_samples.add(k) + + model = solve(knowns, N) + # print(model) + correct = 0 + for i in range(0, test_size): + if f(i) == evaluate(model, i): + correct += 1 + print(str(correct) + "/" + str(test_size)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/model_probabilities5.py b/model_probabilities5.py new file mode 100755 index 0000000..8b02a17 --- /dev/null +++ b/model_probabilities5.py @@ -0,0 +1,219 @@ +import hashlib +import math +import numpy as np +import random + +def count_one_bits(n): + return bin(n).count("1") + +def xor_n(n): + return count_one_bits(n) % 2 + +def sha_n(n): + m = hashlib.sha256() + m.update(str(n).encode("utf-8")) + result = m.digest() + return result[0] & 0b1 + +def xor_by_index(knowns, index, reverse=False): + mask = 1 << index + knowns = knowns[:] + for i in range(len(knowns)): + known = knowns[i] + if known[0] & mask or (not (known[0] & mask) and reverse): + knowns[i] = (known[0], known[1] ^ 1) + return knowns + +def remove_bit(i, n): + return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1) + +def split_at(knowns, N, i): + mask = 1 << i + left = [(remove_bit(j, i), value) for (j, value) in knowns if (j & mask) == 0] + right = [(remove_bit(j, i), value) for (j, value) in knowns if not (j & mask) == 0] + return (left, right) + +def factor_at(knowns, N, i, identity_value=1): + mask = 1 << i + left = [(j, value) for (j, value) in knowns if value == identity_value or (j & mask) == 0] + right = [(j, value) for (j, value) in knowns if value == identity_value or not (j & mask) == 0] + return (left, right) + +def compute_coherence(pair, N): + (left, right) = pair + left_coherence = compute_split_knowns_r(left, N) + right_coherence = compute_split_knowns_r(right, N) + ratio = min(len(left), len(right)) / max(len(left), len(right)) + # evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0 + # evenness = left_coherence - right_coherence + evenness = (1.0 - ((1.0 - left_coherence) - (1.0 - right_coherence)) ** 2) + # return 0.75 * min(left_coherence, right_coherence) + 0.25 * evenness ** 2 + # return 0.8 * min(left_coherence, right_coherence) + 0.2 * evenness ** 2 + # coherence = left_coherence if left_depth > right_depth else right_coherence if right_depth > left_depth else (left_coherence + right_coherence) / 2.0 + # depth = max(left_depth, right_depth) + # return (depth, 0.9 * coherence + 0.1 * (1.0 - (evenness ** 2))) + # return 0.8 * min(left_coherence, right_coherence) + 0.2 * (1.0 - (evenness ** 2)) + # return 0.75 * min(left_coherence, right_coherence) + 0.25 * (evenness ** 2) + # return ((left_coherence * len(left) + right_coherence * len(right)) / (len(left) +len(right))) * min(left_coherence, right_coherence) * evenness + # return min(left_coherence, right_coherence) * (evenness ** 2) + coherence = ((len(left) / (len(left) + len(right))) * left_coherence + (len(right) / (len(left) + len(right))) * right_coherence) + return min(left_coherence, right_coherence) * (evenness ** 2) + +def compute_split_knowns_r(knowns, N): + # if len(knowns) == 0: + # return 1.0 + + # hist = np.zeros(N) + # for i in range(0, N): + # mask = 1 << i + # for (j, value) in knowns: + # if j & mask == 0: + # hist[i] += 1 + + # constant_bits = [i for i in range(0, N) if hist[i] == 0 or hist[i] == len(knowns)] + # if len(constant_bits) > 0: + # constant_bits.reverse() + # for n in constant_bits: + # knowns = [(remove_bit(j, n), value) for (j, value) in knowns] + # return compute_split_knowns_r(knowns, N - len(constant_bits), depth) + + if len(knowns) == 1: + return 1.0 + if len(knowns) == 2: + if knowns[0][1] == knowns[1][1]: + return 1.0 + else: + return 0.0 + + sum = 0 + denominator = 0 + for i in range(0, N): + (left, right) = split_at(knowns, N, i) + weight = min(len(left), len(right)) / max(len(left), len(right)) + # weight = 1.0 - (abs(len(left) - len(right)) / (len(left) + len(right))) + if weight == 0: + continue + partial = compute_coherence((left, right), N - 1) + sum += weight * partial + denominator += weight + return sum / denominator + +def invert(knowns): + inverted_knowns = [] + for (i, value) in knowns: + inverted_knowns.append((i, 1 - value)) + return inverted_knowns + +def reduce(knowns, N): + flips = [] + best_coherence = compute_split_knowns_r(knowns, N) + print(best_coherence) + print(knowns) + print() + while best_coherence < 1.0: + best_index = -1 + best_reverse = False + # best_coherence = 0 + for i in range(0, N): + for reverse in [False, True]: + mutated_knowns = xor_by_index(knowns, i, reverse) + # coherence = compute_coherence(mutated_knowns, N) + coherence = compute_split_knowns_r(mutated_knowns, N) + print(i, reverse, coherence) + if coherence > best_coherence: + best_coherence = coherence + best_index = i + best_reverse = reverse + if best_index < 0: + break + knowns = xor_by_index(knowns, best_index, best_reverse) + flips.append((best_index, best_reverse)) + print() + print(best_index, best_reverse, best_coherence) + print(knowns) + print() + return (knowns, best_coherence, flips) + +def solve(knowns, N): + (knowns, coherence, flips) = reduce(knowns, N) + if coherence == 1.0: + inverted = knowns[0][1] + return (inverted, flips, None) + + raise Exception('Stop') + + best_coherence = 0 + best_index = -1 + best_identity_value = False + print() + for i in range(0, N): + for identity_value in [0, 1]: + coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N) + print(i, identity_value, coherence) + if coherence > best_coherence: + best_coherence = coherence + best_index = i + best_identity_value = identity_value + print() + (left, right) = factor_at(knowns, N, best_index, best_identity_value) + return (0, flips, (best_identity_value, solve(left, N), solve(right, N))) + +def evaluate(model, n, value = 0): + (inverted, flips, child) = model + for (i, invert) in flips: + mask = (1 << i) + masked_n = n & mask + if (masked_n > 0 and not invert) or (masked_n == 0 and invert): + value = 1 - value + if not child is None: + (identity, left_child, right_child) = child + left = evaluate(left_child, n, 1 - identity) + right = evaluate(right_child, n, 1 - identity) + if left and right: + value = 1 - value + if identity == 0: + value = 1 - value + if inverted: + value = 1 - value + return value + +def main(): + N = 8 + S = 2 ** N + train_size = 128 + test_size = 100 + f = xor_n + + knowns = [(i, f(i)) for i in [ + # 0, 1, 2, 3, 4, 5, 6, 7 + # 0, 3, 4, 5, 7 + # 3, 5, 6, 10, 12, 14 + # 1, 3, 7, 10, 14, 15 + # 0, 3, 5, 6, 10, 11, 12 + # 0, 3, 5, 6, 10, 11, 12, 24, 30 + # 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127 + 128, 131, 248, 0, 7, 13, 17, 19 + # 23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255 + ]] + + # knowns = [] + # train_samples = set() + # for i in range(0, train_size): + # k = random.randint(0, S) + # while k in train_samples: + # k = random.randint(0, S) + # knowns.append((k, f(i))) + # train_samples.add(k) + + model = solve(knowns, N) + print(model) + # print(model) + correct = 0 + for i in range(0, test_size): + k = random.randint(0, S - 1) + if f(k) == evaluate(model, k): + correct += 1 + print(str(correct) + "/" + str(test_size)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/model_probabilities6.py b/model_probabilities6.py new file mode 100755 index 0000000..faeea68 --- /dev/null +++ b/model_probabilities6.py @@ -0,0 +1,201 @@ +import hashlib +import math +import numpy as np +import random + +def count_one_bits(n): + return bin(n).count("1") + +def xor_n(n): + return count_one_bits(n) % 2 + +def sha_n(n): + m = hashlib.sha256() + m.update(str(n).encode("utf-8")) + result = m.digest() + return result[0] & 0b1 + +def xor_by_index(knowns, index, reverse=False): + mask = 1 << index + knowns = knowns[:] + for i in range(len(knowns)): + (g, j, value) = knowns[i] + if j & mask or (not (j & mask) and reverse): + knowns[i] = (g, j, value ^ 1) + return knowns + +def remove_bit(i, n): + return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1) + +def split_at(knowns, N, i): + mask = 1 << i + left = [(g, remove_bit(j, i), value) for (g, j, value) in knowns if (j & mask) == 0] + right = [(g, remove_bit(j, i), value) for (g, j, value) in knowns if not (j & mask) == 0] + return (left, right) + +def factor_at(knowns, N, i, identity_value=1): + mask = 1 << i + left = [(g, j, value) for (g, j, value) in knowns if value == identity_value or (j & mask) == 0] + right = [(g, j, value) for (g, j, value) in knowns if value == identity_value or not (j & mask) == 0] + return (left, right) + +def key_for_knowns(knowns): + return tuple([g for (g, _, _) in knowns]) + +primes = [1, 2, 3, 5, 7, 11, 13, 17, 19, 23] + +def compute_split_knowns_r(knowns, N): + stack = [(knowns, N)] + numerator = 0.0 + denominator = 0.0 + + while len(stack) > 0: + (s, n) = stack.pop() + depth = (N - n) + weight = depth ** 64 + + if len(s) == 1: + # numerator += weight + # denominator += weight + numerator += weight + denominator += weight + continue + if len(s) == 2: + (_, a, left_value) = s[0] + (_, b, right_value) = s[1] + distance = count_one_bits(a ^ b) + weight /= (2 ** distance) + if left_value == right_value: + numerator += weight + denominator += weight + else: + denominator += weight + continue + + for i in range(0, n): + (left, right) = split_at(s, n, i) + if len(left) == 0 or len(right) == 0: + continue + stack.append((left, n - 1)) + stack.append((right, n - 1)) + + return numerator / denominator + +def invert(knowns): + inverted_knowns = [] + for (i, value) in knowns: + inverted_knowns.append((i, 1 - value)) + return inverted_knowns + +def reduce(knowns, N): + flips = [] + best_coherence = compute_split_knowns_r(knowns, N) + print(best_coherence) + print(knowns) + print() + while best_coherence < 1.0: + best_index = -1 + best_reverse = False + # best_coherence = 0 + for i in range(0, N): + for reverse in [False, True]: + mutated_knowns = xor_by_index(knowns, i, reverse) + # coherence = compute_coherence(mutated_knowns, N) + coherence = compute_split_knowns_r(mutated_knowns, N) + print(i, reverse, coherence) + if coherence > best_coherence: + best_coherence = coherence + best_index = i + best_reverse = reverse + if best_index < 0: + break + knowns = xor_by_index(knowns, best_index, best_reverse) + flips.append((best_index, best_reverse)) + print() + print(best_index, best_reverse, best_coherence) + print(knowns) + print() + return (knowns, best_coherence, flips) + +def solve(knowns, N): + (knowns, coherence, flips) = reduce(knowns, N) + if coherence == 1.0: + (_, _, inverted) = knowns[0] + return (inverted, flips, None) + + raise Exception('Stop') + + best_coherence = 0 + best_index = -1 + best_identity_value = False + print() + for i in range(0, N): + for identity_value in [0, 1]: + coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N) + print(i, identity_value, coherence) + if coherence > best_coherence: + best_coherence = coherence + best_index = i + best_identity_value = identity_value + print() + (left, right) = factor_at(knowns, N, best_index, best_identity_value) + return (0, flips, (best_identity_value, solve(left, N), solve(right, N))) + +def evaluate(model, n, value = 0): + (inverted, flips, child) = model + for (i, invert) in flips: + mask = (1 << i) + masked_n = n & mask + if (masked_n > 0 and not invert) or (masked_n == 0 and invert): + value = 1 - value + if not child is None: + (identity, left_child, right_child) = child + left = evaluate(left_child, n, 1 - identity) + right = evaluate(right_child, n, 1 - identity) + if left and right: + value = 1 - value + if identity == 0: + value = 1 - value + if inverted: + value = 1 - value + return value + +def main(): + N = 8 + S = 2 ** N + train_size = 128 + test_size = 100 + f = xor_n + + knowns = [(i, i, f(i)) for i in [ + # 0, 1, 2, 3, 4, 5, 6, 7 + # 0, 3, 4, 5, 7 + # 3, 5, 6, 10, 12, 14 + # 1, 3, 7, 10, 14, 15 + # 0, 3, 5, 6, 10, 11, 12 + 0, 3, 5, 6, 10, 11, 12, 24, 30 + # 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127 + # 128, 131, 248, 0, 7, 13, 17, 19 + # 23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255 + ]] + + # knowns = [] + # train_samples = set() + # for i in range(0, train_size): + # k = random.randint(0, S) + # while k in train_samples: + # k = random.randint(0, S) + # knowns.append((k, f(i))) + # train_samples.add(k) + + model = solve(knowns, N) + # print(model) + correct = 0 + for i in range(0, test_size): + k = random.randint(0, S - 1) + if f(k) == evaluate(model, k): + correct += 1 + print(str(correct) + "/" + str(test_size)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/model_probabilities7.py b/model_probabilities7.py new file mode 100755 index 0000000..97d71e3 --- /dev/null +++ b/model_probabilities7.py @@ -0,0 +1,249 @@ +import hashlib +import math +import numpy as np +import random + +def count_one_bits(n): + return bin(n).count("1") + +def xor_n(n): + return count_one_bits(n) % 2 + +def sha_n(n): + m = hashlib.sha256() + m.update(str(n).encode("utf-8")) + result = m.digest() + return result[0] & 0b1 + +def xor_by_index(knowns, index, reverse=False): + mask = 1 << index + knowns = knowns[:] + for i in range(len(knowns)): + known = knowns[i] + if known[0] & mask or (not (known[0] & mask) and reverse): + knowns[i] = (known[0], known[1] ^ 1) + return knowns + +def remove_bit(i, n): + return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1) + +def split_at(knowns, N, i): + mask = 1 << i + left = [(remove_bit(j, i), value) for (j, value) in knowns if (j & mask) == 0] + right = [(remove_bit(j, i), value) for (j, value) in knowns if not (j & mask) == 0] + return (left, right) + +def factor_at(knowns, N, i, identity_value=1): + mask = 1 << i + left = [(j, value) for (j, value) in knowns if value == identity_value or (j & mask) == 0] + right = [(j, value) for (j, value) in knowns if value == identity_value or not (j & mask) == 0] + return (left, right) + +def span(s, N): + lower_bound = (1 << N) - 1 + upper_bound = 0 + for (x, _) in s: + upper_bound |= x + lower_bound &= x + return 2 ** count_one_bits(lower_bound ^ upper_bound) + +def compute_coherence(pair, N): + (left, right) = pair + left_coherence = compute_split_knowns_r(left, N) + right_coherence = compute_split_knowns_r(right, N) + + ratio = min(len(left), len(right)) / max(len(left), len(right)) + # evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0 + # evenness = left_coherence - right_coherence + evenness = (1.0 - ((1.0 - left_coherence) - (1.0 - right_coherence)) ** 2) + # return 0.75 * min(left_coherence, right_coherence) + 0.25 * evenness ** 2 + # return 0.8 * min(left_coherence, right_coherence) + 0.2 * evenness ** 2 + # coherence = left_coherence if left_depth > right_depth else right_coherence if right_depth > left_depth else (left_coherence + right_coherence) / 2.0 + # depth = max(left_depth, right_depth) + # return (depth, 0.9 * coherence + 0.1 * (1.0 - (evenness ** 2))) + # return 0.8 * min(left_coherence, right_coherence) + 0.2 * (1.0 - (evenness ** 2)) + # return 0.75 * min(left_coherence, right_coherence) + 0.25 * (evenness ** 2) + # return ((left_coherence * len(left) + right_coherence * len(right)) / (len(left) +len(right))) * min(left_coherence, right_coherence) * evenness + # return min(left_coherence, right_coherence) * (evenness ** 2) + # coherence = ((len(left) / (len(left) + len(right))) * left_coherence + (len(right) / (len(left) + len(right))) * right_coherence) + # return min(left_coherence, right_coherence) * (evenness ** 2) + span_left = span(left, N) + span_right = span(right, N) + weighted_left_coherence = span_left * left_coherence / (span_left + span_right) + weighted_right_coherence = span_right * right_coherence / (span_left + span_right) + return (weighted_left_coherence + weighted_right_coherence) * (evenness ** 2) + +def compute_split_knowns_r(knowns, N): + # if len(knowns) == 0: + # return 1.0 + + # hist = np.zeros(N) + # for i in range(0, N): + # mask = 1 << i + # for (j, value) in knowns: + # if j & mask == 0: + # hist[i] += 1 + + # constant_bits = [i for i in range(0, N) if hist[i] == 0 or hist[i] == len(knowns)] + # if len(constant_bits) > 0: + # constant_bits.reverse() + # for n in constant_bits: + # knowns = [(remove_bit(j, n), value) for (j, value) in knowns] + # return compute_split_knowns_r(knowns, N - len(constant_bits), depth) + + if len(knowns) == 1: + return 1.0 + if len(knowns) == 2: + if knowns[0][1] == knowns[1][1]: + return 1.0 + else: + return 0.0 + + sum = 0 + denominator = 0 + for i in range(0, N): + (left, right) = split_at(knowns, N, i) + if len(left) == 0 or len(right) == 0: + continue + weight = min(span(left, N), span(right, N)) + # weight = max(span(left, N), span(right, N)) / min(span(left, N), span(right, N)) + # weight = 1.0 - (abs(len(left) - len(right)) / (len(left) + len(right))) + if weight == 0: + continue + partial = compute_coherence((left, right), N - 1) + sum += weight * partial + denominator += weight + return sum / denominator + +def invert(knowns): + inverted_knowns = [] + for (i, value) in knowns: + inverted_knowns.append((i, 1 - value)) + return inverted_knowns + +def reduce(knowns, N): + flips = [] + best_coherence = compute_split_knowns_r(knowns, N) + print(best_coherence) + print(knowns) + print() + while best_coherence < 1.0: + best_index = -1 + best_reverse = False + # best_coherence = 0 + for i in range(0, N): + for reverse in [False, True]: + mutated_knowns = xor_by_index(knowns, i, reverse) + # coherence = compute_coherence(mutated_knowns, N) + coherence = compute_split_knowns_r(mutated_knowns, N) + print(i, reverse, coherence) + if coherence > best_coherence: + best_coherence = coherence + best_index = i + best_reverse = reverse + if best_index < 0: + break + knowns = xor_by_index(knowns, best_index, best_reverse) + flips.append((best_index, best_reverse)) + print() + print(best_index, best_reverse, best_coherence) + print(knowns) + print() + return (knowns, best_coherence, flips) + +def solve(knowns, N): + (knowns, coherence, flips) = reduce(knowns, N) + if coherence == 1.0: + inverted = knowns[0][1] + return (inverted, flips, None) + + raise Exception('Stop') + + best_coherence = 0 + best_index = -1 + best_identity_value = False + print() + for i in range(0, N): + for identity_value in [0, 1]: + coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N) + print(i, identity_value, coherence) + if coherence > best_coherence: + best_coherence = coherence + best_index = i + best_identity_value = identity_value + print() + (left, right) = factor_at(knowns, N, best_index, best_identity_value) + return (0, flips, (best_identity_value, solve(left, N), solve(right, N))) + +def evaluate(model, n, value = 0): + (inverted, flips, child) = model + for (i, invert) in flips: + mask = (1 << i) + masked_n = n & mask + if (masked_n > 0 and not invert) or (masked_n == 0 and invert): + value = 1 - value + if not child is None: + (identity, left_child, right_child) = child + left = evaluate(left_child, n, 1 - identity) + right = evaluate(right_child, n, 1 - identity) + if left and right: + value = 1 - value + if identity == 0: + value = 1 - value + if inverted: + value = 1 - value + return value + +def run_for_input(input): + N = 8 + S = 2 ** N + train_size = 128 + test_size = 100 + f = xor_n + + knowns = [(i, f(i)) for i in input] + + # knowns = [] + # train_samples = set() + # for i in range(0, train_size): + # k = random.randint(0, S) + # while k in train_samples: + # k = random.randint(0, S) + # knowns.append((k, f(i))) + # train_samples.add(k) + + model = solve(knowns, N) + print(model) + # print(model) + correct = 0 + for i in range(0, test_size): + k = random.randint(0, S - 1) + if f(k) == evaluate(model, k): + correct += 1 + print(str(correct) + "/" + str(test_size)) + +def run(): + inputs = [ + # [0, 1, 2, 3, 4, 5, 6, 7], + # [0, 3, 4, 5, 7], + # [3, 5, 6, 10, 12, 14], + # [1, 3, 7, 10, 14, 15], + # [0, 3, 5, 6, 10, 11, 12], + # [0, 3, 5, 6, 10, 11, 12, 24, 30], + [0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127], + # [128, 131, 248, 0, 7, 13, 17, 19], + # [23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255] + ] + results = [] + for i, input in enumerate(inputs): + success = False + try: + run_for_input(input) + success = True + except: + pass + results.append(success) + print(results) + +if __name__ == "__main__": + run() \ No newline at end of file diff --git a/model_probabilities8.py b/model_probabilities8.py new file mode 100755 index 0000000..7ae83ce --- /dev/null +++ b/model_probabilities8.py @@ -0,0 +1,219 @@ +import hashlib +import math +import numpy as np +import random + +def count_one_bits(n): + return bin(n).count("1") + +def xor_n(n): + return count_one_bits(n) % 2 + +def sha_n(n): + m = hashlib.sha256() + m.update(str(n).encode("utf-8")) + result = m.digest() + return result[0] & 0b1 + +def xor_by_index(knowns, index, reverse=False): + mask = 1 << index + knowns = knowns[:] + for i in range(len(knowns)): + known = knowns[i] + if known[0] & mask or (not (known[0] & mask) and reverse): + knowns[i] = (known[0], known[1] ^ 1) + return knowns + +def remove_bit(i, n): + return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1) + +def split_at(knowns, N, i): + mask = 1 << i + left = [(remove_bit(j, i), value) for (j, value) in knowns if (j & mask) == 0] + right = [(remove_bit(j, i), value) for (j, value) in knowns if not (j & mask) == 0] + return (left, right) + +def factor_at(knowns, N, i, identity_value=1): + mask = 1 << i + left = [(j, value) for (j, value) in knowns if value == identity_value or (j & mask) == 0] + right = [(j, value) for (j, value) in knowns if value == identity_value or not (j & mask) == 0] + return (left, right) + +def compute_coherence(pair, N): + (left, right) = pair + left_coherence = compute_split_knowns_r(left, N) + right_coherence = compute_split_knowns_r(right, N) + ratio = min(len(left), len(right)) / max(len(left), len(right)) + # evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0 + # evenness = left_coherence - right_coherence + evenness = (1.0 - ((1.0 - left_coherence) - (1.0 - right_coherence)) ** 2) + # return 0.75 * min(left_coherence, right_coherence) + 0.25 * evenness ** 2 + # return 0.8 * min(left_coherence, right_coherence) + 0.2 * evenness ** 2 + # coherence = left_coherence if left_depth > right_depth else right_coherence if right_depth > left_depth else (left_coherence + right_coherence) / 2.0 + # depth = max(left_depth, right_depth) + # return (depth, 0.9 * coherence + 0.1 * (1.0 - (evenness ** 2))) + # return 0.8 * min(left_coherence, right_coherence) + 0.2 * (1.0 - (evenness ** 2)) + # return 0.75 * min(left_coherence, right_coherence) + 0.25 * (evenness ** 2) + # return ((left_coherence * len(left) + right_coherence * len(right)) / (len(left) +len(right))) * min(left_coherence, right_coherence) * evenness + # return min(left_coherence, right_coherence) * (evenness ** 2) + coherence = ((len(left) / (len(left) + len(right))) * left_coherence + (len(right) / (len(left) + len(right))) * right_coherence) + return min(left_coherence, right_coherence) * (evenness ** 2) + +def compute_split_knowns_r(knowns, N): + # if len(knowns) == 0: + # return 1.0 + + # hist = np.zeros(N) + # for i in range(0, N): + # mask = 1 << i + # for (j, value) in knowns: + # if j & mask == 0: + # hist[i] += 1 + + # constant_bits = [i for i in range(0, N) if hist[i] == 0 or hist[i] == len(knowns)] + # if len(constant_bits) > 0: + # constant_bits.reverse() + # for n in constant_bits: + # knowns = [(remove_bit(j, n), value) for (j, value) in knowns] + # return compute_split_knowns_r(knowns, N - len(constant_bits), depth) + + if len(knowns) == 1: + return 1.0 + if len(knowns) == 2: + if knowns[0][1] == knowns[1][1]: + return 1.0 + else: + return 0.0 + + sum = 0 + denominator = 0 + for i in range(0, N): + (left, right) = split_at(knowns, N, i) + weight = min(len(left), len(right)) / max(len(left), len(right)) + # weight = 1.0 - (abs(len(left) - len(right)) / (len(left) + len(right))) + if weight == 0: + continue + partial = compute_coherence((left, right), N - 1) + sum += weight * partial + denominator += weight + return sum / denominator + +def invert(knowns): + inverted_knowns = [] + for (i, value) in knowns: + inverted_knowns.append((i, 1 - value)) + return inverted_knowns + +def reduce(knowns, N): + flips = [] + best_coherence = compute_split_knowns_r(knowns, N) + print(best_coherence) + print(knowns) + print() + while best_coherence < 1.0: + best_index = -1 + best_reverse = False + # best_coherence = 0 + for i in range(0, N): + for reverse in [False, True]: + mutated_knowns = xor_by_index(knowns, i, reverse) + # coherence = compute_coherence(mutated_knowns, N) + coherence = compute_split_knowns_r(mutated_knowns, N) + print(i, reverse, coherence) + if coherence > best_coherence: + best_coherence = coherence + best_index = i + best_reverse = reverse + if best_index < 0: + break + knowns = xor_by_index(knowns, best_index, best_reverse) + flips.append((best_index, best_reverse)) + print() + print(best_index, best_reverse, best_coherence) + print(knowns) + print() + return (knowns, best_coherence, flips) + +def solve(knowns, N): + (knowns, coherence, flips) = reduce(knowns, N) + if coherence == 1.0: + inverted = knowns[0][1] + return (inverted, flips, None) + + raise Exception('Stop') + + best_coherence = 0 + best_index = -1 + best_identity_value = False + print() + for i in range(0, N): + for identity_value in [0, 1]: + coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N) + print(i, identity_value, coherence) + if coherence > best_coherence: + best_coherence = coherence + best_index = i + best_identity_value = identity_value + print() + (left, right) = factor_at(knowns, N, best_index, best_identity_value) + return (0, flips, (best_identity_value, solve(left, N), solve(right, N))) + +def evaluate(model, n, value = 0): + (inverted, flips, child) = model + for (i, invert) in flips: + mask = (1 << i) + masked_n = n & mask + if (masked_n > 0 and not invert) or (masked_n == 0 and invert): + value = 1 - value + if not child is None: + (identity, left_child, right_child) = child + left = evaluate(left_child, n, 1 - identity) + right = evaluate(right_child, n, 1 - identity) + if left and right: + value = 1 - value + if identity == 0: + value = 1 - value + if inverted: + value = 1 - value + return value + +def main(): + N = 8 + S = 2 ** N + train_size = 128 + test_size = 100 + f = xor_n + + knowns = [(i, f(i)) for i in [ + # 0, 1, 2, 3, 4, 5, 6, 7 + # 0, 3, 4, 5, 7 + # 3, 5, 6, 10, 12, 14 + # 1, 3, 7, 10, 14, 15 + # 0, 3, 5, 6, 10, 11, 12 + # 0, 3, 5, 6, 10, 11, 12, 24, 30 + # 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127 + # 128, 131, 248, 0, 7, 13, 17, 19 + 23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255 + ]] + + # knowns = [] + # train_samples = set() + # for i in range(0, train_size): + # k = random.randint(0, S) + # while k in train_samples: + # k = random.randint(0, S) + # knowns.append((k, f(i))) + # train_samples.add(k) + + model = solve(knowns, N) + print(model) + # print(model) + correct = 0 + for i in range(0, test_size): + k = random.randint(0, S - 1) + if f(k) == evaluate(model, k): + correct += 1 + print(str(correct) + "/" + str(test_size)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/model_probabilities9.py b/model_probabilities9.py new file mode 100755 index 0000000..8fdb636 --- /dev/null +++ b/model_probabilities9.py @@ -0,0 +1,310 @@ +import hashlib +import math +import numpy as np +import random +import secrets +from struct import pack, pack_into, unpack_from + +def bit_at_index(buffer, index): + offset = (index >> 3) % len(buffer) + return buffer[offset] & (1 << (index & 0b111)) != 0 + +def count_one_bits(n): + return bin(n).count("1") + +def hamming_distance(a, b): + distance = 0 + for i in range(0, len(a)): + distance += count_one_bits(a[i] ^ b[i]) + return distance + +def xor_n(n): + return count_one_bits(n) % 2 + +def sha(x): + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def apply_flips(samples, inputs, flips): + samples = samples[:] + for i in range(len(samples)): + (key, old_value) = samples[i] + new_value = old_value + for index in flips: + if bit_at_index(inputs[key], index): + new_value = new_value ^ 1 + if not new_value == old_value: + samples[i] = (key, new_value) + return samples + +def coherence_for_knowns(knowns, distances, N): + if len(knowns) == 1: + return 1.0 + coherences = [] + for i in range(0, len(knowns)): + (a_key, a_value) = knowns[i] + numerator = 0 + denominator = 0 + for j in range(0, len(knowns)): + if i == j: + continue + (b_key, b_value) = knowns[j] + distance = distances[a_key][b_key] + weight = 1.0 / (2 ** distance) + denominator += weight + if a_value == b_value: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def iterate_indices(indices, N): + carry_index = -1 + for i in range(0, len(indices)): + j = len(indices) - i - 1 + if indices[j] + 1 + i < N: + carry_index = j + break + if carry_index < 0: + return None + base_value = indices[carry_index] + for i in range(0, len(indices) - carry_index): + new_value = base_value + i + 1 + if new_value >= N: + return None + indices[carry_index + i] = new_value + return indices + +def compute_indices(samples, inputs, N): + zero_buckets = [False for i in range(0, N)] + one_buckets = [False for i in range(0, N)] + for (key, _) in samples: + for index in range(0, N): + if bit_at_index(inputs[key], index): + one_buckets[index] = True + else: + zero_buckets[index] = True + return [index for index in range(0, N) if zero_buckets[index] and one_buckets[index]] + +def compute_distances(inputs, distances): + for i in range(0, len(inputs)): + a = inputs[i] + for j in range(i, len(inputs)): + b = inputs[j] + distance = hamming_distance(a, b) if j != i else 0 + distances[i][j] = distance + distances[j][i] = distance + +def reduce(samples, inputs, distances, N): + available_indices = compute_indices(samples, inputs, N) + flips = [] + best_coherence = coherence_for_knowns(samples, distances, N) + # print(best_coherence) + # print(knowns) + # print() + depth = 1 + while depth <= len(available_indices) and depth < 2: + while best_coherence < 1.0: + best_flip = None + try_indices = [i for i in range(0, depth)] + while not try_indices is None: + try_flip = [available_indices[i] for i in try_indices] + mutated_samples = apply_flips(samples, inputs, try_flip) + coherence = coherence_for_knowns(mutated_samples, distances, N) + # print(try_flip, coherence) + if coherence > best_coherence: + best_coherence = coherence + best_flip = try_flip + try_indices = iterate_indices(try_indices, len(available_indices)) + + if best_flip is None: + depth += 1 + # print(depth) + break + samples = apply_flips(samples, inputs, best_flip) + flips += best_flip + available_indices = [index for index in available_indices if index not in best_flip] + depth = 1 + # print() + # print(best_flip, best_coherence) + # print(knowns) + # print() + # print(depth) + if len(available_indices) == 0: + break + if best_coherence == 1.0: + break + return (samples, best_coherence, flips) + +def dominant_value(knowns, M=2): + buckets = [0 for i in range(0, M)] + for (_, value) in knowns: + buckets[value] += 1 + return buckets.index(max(buckets)) + +def solve(samples, inputs, distances, N): + (samples, coherence, flips) = reduce(samples, inputs, distances, N) + if coherence == 1.0: + inverted = samples[0][1] + return (inverted, flips, None) + + identity = dominant_value(samples) + left = [(key, 1) for (key, value) in samples if value != identity] + right = [(key, 1) for (key, value) in samples if value != identity] + for (key, value) in samples: + if value == identity: + if random.random() > 0.5: + left.append((key, 0)) + else: + right.append((key, 0)) + + return (0, flips, (identity, solve(left, inputs, distances, N), solve(right, inputs, distances, N))) + +def evaluate(model, x, value = 0): + (inverted, flips, child) = model + for i in flips: + if bit_at_index(x, i) != 0: + value ^= 1 + if not child is None: + (identity, left_child, right_child) = child + left = evaluate(left_child, x) + right = evaluate(right_child, x) + if left & right != identity: + value ^= 1 + if inverted: + value ^= 1 + return value + +def transform(x, layers): + x[0] = 0 + for layer in layers: + prefix = 0 + for i in range(0, len(layer)): + model = layer[i] + value = evaluate(model, x) + prefix <<= 1 + prefix |= value + x[0] = prefix + +def encode_f(f, buffer, offset=0): + (inverted, flips, residual) = f + pack_into('B', buffer, offset, inverted) + offset += 1 + for index in flips: + pack_into('B', buffer, offset, 0) + offset += 1 + pack_into('I', buffer, offset, index) + offset += 4 + if residual is None: + pack_into('B', buffer, offset, 1) + offset += 1 + return offset + (inverted, left, right) = residual + pack_into('B', buffer, offset, 2 if not inverted else 3) + offset += 1 + offset = encode_f(left, buffer, offset) + offset = encode_f(right, buffer, offset) + return offset + +def decode_f(buffer, offset = 0): + [inverted] = unpack_from('B', buffer, offset) + offset += 1 + inverted &= 0b1 + flips = [] + while offset < len(buffer): + [opcode] = unpack_from('B', buffer, offset) + offset += 1 + opcode &= 0b11 + if opcode == 0: + [index] = unpack_from('I', buffer, offset) + offset += 4 + flips.append(index) + elif opcode == 1: + return (offset, (inverted, flips, None)) + else: + (offset, left) = decode_f(buffer, offset) + (offset, right) = decode_f(buffer, offset) + gate_inverted = 0 if opcode == 2 else 1 + return (offset, (gate_inverted, flips, (left, right))) + return (offset, (inverted, [], None)) + +def random_input(): + return bytearray(1) + secrets.token_bytes(3) + +def main(): + N = 32 + S = 2 ** N + train_size = 64 + test_size = 1000 + f = sha + num_epochs = 4 + num_layers = 7 + layers_samples = [] + layers = [] + score = 0.5 + distances = np.zeros((train_size, train_size)) + + for epoch in range(0, num_epochs): + layer = [] + layer_samples = [] + total_correct = 0.0 + layer_index = 0 + total_difficulty = 0 + difficulty = 0 + while layer_index < num_layers: + inputs = [] + samples = [] + raw_samples = [] + for i in range(0, train_size): + x = random_input() + y = f(x) + transform(x, layers) + inputs.append(x) + samples.append((i, y)) + raw_samples.append((x, y)) + + compute_distances(inputs, distances) + model = solve(samples, inputs, distances, N) + # print(model) + # encoded = bytearray(1024) + # offset = encode_f(model, encoded) + # decoded_model = decode_f(encoded) + # print() + # print(decoded_model) + + # correct = 0 + # for (x, y) in samples: + # if evaluate(model, inputs[x]) == y: + # correct += 1 + # print(str(correct) + "/" + str(train_size)) + + correct = 0 + for _ in range(0, test_size): + x = random_input() + y = f(x) + transform(x, layers) + if evaluate(model, x) == y: + correct += 1 + difficulty += 1 + local_score = correct / test_size + if local_score < score - 0.0001 * difficulty: + continue + # print_score = round(local_score * 10000.0) / 100.0 + # print('Layer ' + str(layer_index) + ': ' + str(candidates) + ' ' + str(print_score) + '%') + layer_index += 1 + total_correct += correct + total_difficulty += difficulty + difficulty = 0 + layer.append(model) + layer_samples.append(raw_samples) + score = total_correct / (test_size * num_layers) + average_difficulty = round(total_difficulty * 100.0 / num_layers) / 100.0 + print_score = round(score * 10000.0) / 100.0 + print('Epoch ' + str(epoch) + ': ' + str(average_difficulty) + ' ' + str(print_score) + '%') + layers.append(layer) + layers_samples.append(layer_samples) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations.cl b/mutations.cl new file mode 100644 index 0000000..631e2cd --- /dev/null +++ b/mutations.cl @@ -0,0 +1,96 @@ +__kernel void compute_distances(__global const uchar* x, __global float* distances) { + int i = get_global_id(0); + int j = get_global_id(1); + int index = i * get_global_size(1) + j; + if (i == j) { + distances[index] = 0; + return; + } + float distance = 0; + for (int k = 0; k < {N}; k++) { + distance += x[i * {N} + k] ^ x[j * {N} + k]; + } + distances[index] = pow(2, -distance); +} + +__kernel void evaluate(__global const uchar* program, __global const uchar* x, __global uchar* scratch, __global uchar* y) { + int program_index = get_global_id(0) * {MAX_PROGRAM_SIZE} * (1 + {N} + 2); + int scratch_index = get_global_id(0) * {MAX_PROGRAM_SIZE}; + int input_index = get_global_id(1) * {N}; + int output_index = get_global_id(1); + + scratch[scratch_index] = 0; + + for (int i = 0; i < {MAX_PROGRAM_SIZE}; i++) { + uchar output = program[program_index++]; + + for (int j = 0; j < {N}; j++) { + output += program[program_index++] * x[input_index + j]; + } + int left_index = program[program_index++]; + int right_index = program[program_index++]; + + output += scratch[scratch_index + left_index] * scratch[scratch_index + right_index]; + output %= {M}; + + if (program[program_index] == 255) { + y[output_index] = output; + return; + } else { + scratch[scratch_index + i + 1] = output; + } + } +} + +__kernel void compute_coherences(__global const uchar* y, __global const uchar* z, __global const float* distances, __global float* coherences) { + int index = get_global_id(0); + int sample_size = get_global_size(0); + + float numerator = 0; + float denominator = 0; + for (int i = 0; i < sample_size; i++) { + int p = z[i] ^ y[index * sample_size + i]; + for (int j = 0; j < sample_size; j++) { + int q = z[j] ^ y[index * sample_size + j]; + float distance = distances[i * sample_size + j]; + denominator += distance; + if (p == q) { + numerator += distance; + } + } + } + coherences[index] = numerator / denominator; +} + +__kernel void initialize_sort(__global uint* indices, __global uint* offset) { + uint index = get_global_id(0); + indices[index] = index; + if (index == 0) { + *offset = 0; + } +} + +__kernel void increment_offset(__global uint* offset) { + uint x = *offset; + if (x == 0) { + *offset = 1; + } else { + *offset = 0; + } +} + +__kernel void sort(__global const float* coherences, __global uint* indices, __global uint* offset) { + uint index = get_global_id(0) * 2 + *offset; + uint a = indices[index]; + uint b = indices[index + 1]; + float coherence_a = coherences[a]; + float coherence_b = coherences[b]; + if (coherence_a < coherence_b) { + indices[index] = b; + indices[index + 1] = a; + } +} + +__kernel void evolve(__global const uchar* program, __global float* coherences) { + int index_a = get_global_id(0); +} \ No newline at end of file diff --git a/mutations.py b/mutations.py new file mode 100644 index 0000000..bc0fa00 --- /dev/null +++ b/mutations.py @@ -0,0 +1,511 @@ +import hashlib +import math +import numpy as np +import random +from struct import pack, pack_into, unpack_from +import secrets + +from numpy import hamming + +N = 8 + +def bit_at_index(buffer, index): + offset = (index >> 3) % len(buffer) + return buffer[offset] & (1 << (index & 0b111)) != 0 + +def count_one_bits(n): + return bin(n).count("1") + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def encode_f(f, buffer, offset=0): + (inverted, flips, child) = f + pack_into('I', buffer, offset, inverted) + offset += 4 + for index in flips: + pack_into('I', buffer, offset, 0) + offset += 4 + pack_into('I', buffer, offset, index) + offset += 4 + if child is None: + pack_into('I', buffer, offset, 1) + offset += 4 + return offset + (inverted, left, right) = child + pack_into('I', buffer, offset, 2 if not inverted else 3) + offset += 4 + offset = encode_f(left, buffer, offset) + offset = encode_f(right, buffer, offset) + return offset + +def generate_random_branch(p_mutation): + global N + + p_add_indices = p_mutation * random.random() + p_add_children = p_mutation * random.random() + + inverted = random.randint(0, 1) + indices = set() + children = [] + + # randomly add indices + while random.random() < p_add_indices and len(indices) < N: + available_indices = [i for i in range(0, N) if i not in indices] + if len(available_indices) == 1: + indices.add(available_indices[0]) + continue + indices.add(available_indices[random.randint(0, len(available_indices) - 1)]) + # randomly add children + while random.random() < p_add_children: + child_inverted = random.randint(0, 1) + left = generate_random_branch(p_add_children) + right = generate_random_branch(p_add_children) + children.append((child_inverted, left, right)) + return (inverted, indices, children) + +def mutate_f(f, p_mutation): + global N + (inverted, indices, children) = f + mutated_indices = set(indices) + mutated_children = children[:] + + p_invert = p_mutation * random.random() + p_drop_indices = p_mutation * random.random() + p_add_indices = p_mutation * random.random() + p_drop_children = p_mutation * random.random() + p_mutate_child = p_mutation * random.random() + p_clone_child = p_mutation * random.random() + p_invert_child = p_mutation * random.random() + p_add_children = p_mutation * random.random() + + # randomly invert + if random.random() < p_invert: + inverted ^= 1 + # randomly drop indices + while random.random() < p_drop_indices and len(mutated_indices) > 0: + mutated_indices.pop() + # randomly add indices + while random.random() < p_add_indices and len(mutated_indices) < N: + available_indices = [i for i in range(0, N) if i not in mutated_indices] + if len(available_indices) == 1: + mutated_indices.add(available_indices[0]) + continue + mutated_indices.add(available_indices[random.randint(0, len(available_indices) - 1)]) + # randomly drop children + while random.random() < p_drop_children and len(mutated_children) > 0: + if len(mutated_children) == 1: + del mutated_children[0] + break + del mutated_children[random.randint(0, len(mutated_children) - 1)] + # randomly clone children + while random.random() < p_clone_child and len(mutated_children) > 0: + index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1) + (child_inverted, left, right) = mutated_children[index] + if random.random() < p_invert_child: + child_inverted ^= 1 + clone = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation)) + mutated_children.append(clone) + # randomly mutate children + while random.random() < p_mutate_child and len(mutated_children) > 0: + index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1) + (child_inverted, left, right) = mutated_children[index] + if random.random() < p_invert_child: + child_inverted ^= 1 + mutated_children[index] = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation)) + # randomly add children + while random.random() < p_add_children: + child_inverted = random.randint(0, 1) + left = generate_random_branch(p_mutation) + right = generate_random_branch(p_mutation) + mutated_children.append((child_inverted, left, right)) + return (inverted, mutated_indices, mutated_children) + +def decode_f(buffer, mutate = False, offset = 0, skip_invert = False): + global N + inverted = 0 + if not skip_invert: + [inverted] = unpack_from('I', buffer, offset) + offset += 4 + # random invert + if mutate and random.random() < 0.01: + inverted ^= 1 + inverted &= 0b1 + flips = set() + # random add flip + while mutate and random.random() < 0.5 and len(flips) < N: + available_indices = [i for i in range(0, N) if i not in flips] + if len(available_indices) == 1: + flips.add(available_indices[0]) + continue + flips.add(available_indices[random.randint(0, len(available_indices) - 1)]) + while offset < len(buffer): + # random create branch + if mutate and random.random() < 0.01: + gate_inverted = random.randint(0, 1) + left = generate_random_branch() + (offset, right) = decode_f(buffer, mutate, offset, True) + return (offset, (inverted, flips, (gate_inverted, left, right))) + [opcode] = unpack_from('I', buffer, offset) + offset += 4 + opcode &= 0b11 + if opcode == 0: + [index] = unpack_from('I', buffer, offset) + offset += 4 + # random skip flip + if mutate and random.random() < 0.01: + continue + if index in flips: + flips.remove(index) + else: + flips.add(index) + elif opcode == 1: + return (offset, (inverted, flips, None)) + else: + (offset, left) = decode_f(buffer, mutate, offset) + (offset, right) = decode_f(buffer, mutate, offset) + gate_inverted = 0 if opcode == 2 else 1 + # random invert + if mutate and random.random() < 0.01: + gate_inverted ^= 1 + # random skip branch + if mutate and random.random() < 0.01: + return (offset, (inverted, flips, None)) + return (offset, (inverted, flips, (gate_inverted, left, right))) + return (offset, (inverted, [], None)) + +def generate_program(f): + statement = "" + (inverted, indices, children) = f + if inverted: + statement += "1^" + statement += "(" + for i in indices: + statement += "(x[" + str(i) + ">>3]&(1<<(" + str(i) + "&0b111))!=0)^" + for child in children: + (gate_inverted, left, right) = child + if gate_inverted: + statement += "1^" + statement += "((" + generate_program(left) + ")&(" + generate_program(right) + "))^" + statement += "0)" + return statement + +def compile_f(f): + program = 'def f(x):\n\treturn ' + generate_program(f) + scope = {} + exec(program, scope) + return scope['f'] + +def evaluate(model, x, value = 0): + (inverted, indices, children) = model + for i in indices: + if bit_at_index(x, i) != 0: + value ^= 1 + for child in children: + (child_inverted, left, right) = child + left = evaluate(left, x) + right = evaluate(right, x) + if left & right != child_inverted: + value ^= 1 + if inverted: + value ^= 1 + return value + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(N / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(x) + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def xor(x): + num_one_bits = 0 + for n in x: + num_one_bits += count_one_bits(n) + return num_one_bits % 2 + +def random_sample(m, n): + inputs = np.zeros((m, n)) + for i in range(0, m): + for j in range(0, n): + inputs[i][j] = random.randint(0, 1) + return inputs + +def update_sample(sample, index): + global N + for j in range(0, N): + sample[index][j] = random.randint(0, 1) + +def coherence(inputs, outputs): + coherences = [] + for i in range(0, len(inputs)): + x_a = inputs[i] + y_a = outputs[i] + numerator = 0 + denominator = 0 + for j in range(0, len(inputs)): + if i == j: + continue + x_b = inputs[j] + y_b = outputs[j] + distance = hamming_distance(x_a, x_b) + weight = 1.0 / (2 ** distance) + denominator += weight + if y_a == y_b: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def score(f, sample, distances): + return coherence([(x, f(x) ^ y) for (x, y) in sample], distances) + +def compute_distances(inputs, distances, scratch): + for i in range(0, len(inputs)): + a = inputs[i] + for j in range(i, len(inputs)): + if i == j: + distances[i][j] = 0 + continue + b = inputs[j] + distance = 2 ** -hamming_distance(a, b, scratch) + distances[i][j] = distance + distances[j][i] = distance + +def update_distances(inputs, distances, i, scratch): + a = inputs[i] + for j in range(0, len(inputs)): + if i == j: + distances[i][j] = 0 + continue + b = inputs[j] + distance = 2 ** -hamming_distance(a, b, scratch) + distances[i][j] = distance + distances[j][i] = distance + +def evaluate_sample(model, sample, output): + stack = [model] + (_, _, _, root_scratch, _) = model + while len(stack) > 0: + layer = stack.pop() + (inverted, xors, child, scratch, touched) = layer + if child is None: + np.matmul(sample, xors, scratch) + np.mod(scratch, 2, scratch) + if inverted == 1: + np.logical_xor(1, scratch, scratch) + touched[0] = 1 + else: + (child_inverted, left, right) = child + (_, _, _, left_scratch, left_touched) = left + (_, _, _, right_scratch, right_touched) = right + if left_touched[0] and right_touched[0]: + np.multiply(left_scratch, right_scratch, output) + np.matmul(sample, xors, scratch) + np.mod(scratch, 2, scratch) + if inverted: + np.logical_xor(scratch, 1, scratch) + if child_inverted: + np.logical_xor(output, 1, output) + np.logical_xor(scratch, output, scratch) + touched[0] = 1 + else: + stack.insert(0, layer) + stack.insert(0, left) + stack.insert(0, right) + np.copyto(output, root_scratch) + reset_model(model) + +def reset_model(model): + stack = [model] + while len(stack) > 0: + layer = stack.pop() + (_, _, child, _, touched) = layer + touched[0] = 0 + if not child is None: + (_, left, right) = child + stack.append(left) + stack.append(right) + +def clone_model(model, p_mutation): + global N + + p_invert = p_mutation * random.random() + p_invert_child = p_mutation * random.random() + p_flip = p_mutation * random.random() + p_add_child = p_mutation * random.random() + # p_drop_child = p_mutation * random.random() * 0.5 + p_drop_child = 0 + + (inverted, xors, child, scratch, touched) = model + if random.random() < p_invert: + inverted ^= 1 + clone_xors = np.zeros((N,)) + np.copyto(clone_xors, xors) + for i in range(0, N): + if random.random() < p_flip: + clone_xors[i] = int(clone_xors[i]) ^ 1 + clone_scratch = np.zeros(np.shape(scratch)) + clone_touched = np.zeros(np.shape(touched)) + if child is None: + if random.random() < p_add_child: + sample_size = len(scratch) + child_inverted = random.randint(0, 1) + left = random_child(sample_size, p_mutation) + right = random_child(sample_size, p_mutation) + return (inverted, clone_xors, (child_inverted, left, right), clone_scratch, clone_touched) + return (inverted, clone_xors, None, clone_scratch, clone_touched) + if random.random() < p_drop_child: + return (inverted, clone_xors, None, clone_scratch, clone_touched) + (child_inverted, left, right) = child + if random.random() < p_invert_child: + inverted ^= 1 + clone_left = clone_model(left, p_mutation) + clone_right = clone_model(right, p_mutation) + return (inverted, clone_xors, (child_inverted, clone_left, clone_right), clone_scratch, clone_touched) + +def random_child(sample_size, p_mutation): + global N + inverted = random.randint(0, 1) + xors = np.zeros((N,)) + scratch = np.zeros((sample_size,)) + touched = np.zeros((1,)) + + p_flip = p_mutation * random.random() + p_child = p_mutation * random.random() + + index = random.randint(0, N - 1) + xors[index] = 1 + for i in range(0, N): + if random.random() < p_flip: + xors[i] = 1 + # if random.random() < p_child: + # child_inverted = random.randint(0, 1) + # left = random_child(sample_size, p_mutation * random.random()) + # right = random_child(sample_size, p_mutation * random.random()) + # return (inverted, xors, (child_inverted, left, right), scratch, touched) + return (inverted, xors, None, scratch, touched) + +def size(model): + (_, xors, child, _, _) = model + xor_size = np.sum(xors) + if not child is None: + (_, left, right) = child + return xor_size + size(left) * size(right) + return xor_size + +def null_candidate(sample_size): + global N + return (0, np.zeros((N,)), None, np.zeros((sample_size,)), np.zeros((1,))) + +def main(): + global N + epochs = 10000 + num_survivors = 100 + num_offspring = 10 + num_candidates = num_survivors + num_survivors * num_offspring + sample_size = 32 + eval_size = 100 + p_mutation = 0.5 + g = sha + current_generation = [null_candidate(sample_size) for _ in range(0, num_candidates)] + + distances = np.zeros((sample_size, sample_size)) + output_equality = np.zeros((sample_size, sample_size)) + inputs = random_sample(sample_size, N) + scratch = np.zeros(N,) + compute_distances(inputs, distances, scratch) + expected_outputs = np.zeros((sample_size,)) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + outputs = np.zeros((sample_size,)) + output_xor = np.zeros((sample_size,)) + ones = np.ones((sample_size,)) + numerators = np.zeros((sample_size,)) + denominators = np.zeros((sample_size,)) + coherences = np.zeros((sample_size,)) + np.matmul(ones, distances, denominators) + scores = np.zeros((num_candidates,)) + max_score = 0 + last_score = 0 + streak = 0 + + for epoch in range(0, epochs): + for i in range(0, num_candidates): + candidate = current_generation[i] + evaluate_sample(candidate, inputs, outputs) + np.logical_xor(outputs, expected_outputs, output_xor) + for p in range(0, sample_size): + for q in range(0, sample_size): + m = int(output_xor[p]) + n = int(output_xor[q]) + output_equality[p][q] = 1 ^ m ^ n + np.multiply(output_equality, distances, output_equality) + np.matmul(ones, output_equality, numerators) + np.divide(numerators, denominators, coherences) + score = np.average(coherences) + scores[i] = score + + top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:] + survivors = [current_generation[index] for index in top_n] + + # f = lambda x: evaluate(current_generation[0], x) + # correct = 0 + # for i in range(0, eval_size): + # x = random_input() + # if f(x) == g(x): + # correct += 1 + + top_score = scores[top_n[-1]] + print(epoch, top_score, size(survivors[-1])) + if top_score <= max_score: + p_mutation += 0.01 + else: + p_mutation = 0.5 + max_score = top_score + + for i in range(0, num_survivors): + current_generation[i] = survivors[i] + + for i in range(0, num_survivors): + candidate = survivors[i] + for j in range(0, num_offspring): + index = num_survivors + j * num_survivors + i + current_generation[index] = clone_model(candidate, random.random()) + + # while random.random() < 0.5: + if last_score == top_score: + # streak += 1 + # else: + # streak = 0 + # if streak >= 4: + # streak = 0 + inputs = random_sample(sample_size, N) + compute_distances(inputs, distances, scratch) + np.matmul(ones, distances, denominators) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + # expected_outputs = np.zeros((sample_size,)) + # for i in range(0, sample_size): + # expected_outputs[i] = g(inputs[i]) + # index = random.randint(0, sample_size - 1) + # update_sample(inputs, index) + # expected_outputs[index] = g(inputs[index]) + # update_distances(inputs, distances, index, scratch) + # np.matmul(ones, distances, denominators) + last_score = top_score + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations10.py b/mutations10.py new file mode 100644 index 0000000..1d514d1 --- /dev/null +++ b/mutations10.py @@ -0,0 +1,425 @@ +from enum import unique +import hashlib +import math +import numpy as np +import random +import time + +N = 8 +M = 2 + +def vec_to_int(x): + z = 0 + for i in range(0, len(x)): + z <<= 1 + z |= x[i] + return z + +def timeit(f): + def timed(*args, **kw): + ts = time.time() + result = f(*args, **kw) + te = time.time() + + print('func:%r took: %2.4f sec' % (f.__name__, te-ts)) + return result + return timed + +class Candidate: + def __init__(self, layer, slots): + global N + self.layer = layer + self.node_count = layer + self.offsets = np.zeros((self.node_count, N + 1 + slots)).astype(np.int32) + +class Probabilities: + def __init__(self, layer, slots): + global N + self.layer = layer + self.slots = slots + self.node_count = layer + self.p_offsets = np.zeros((self.node_count, N + 1 + slots)) + self.p_offsets.fill(0.5) + self.offset_coherences = np.zeros((2, self.node_count, N + 1 + slots, 2, self.node_count, N + 1 + slots)) + self.offset_coherences.fill(-1) + self.deltas = np.zeros((self.node_count, N + 1 + slots, 2, self.node_count, N + 1 + slots)) + + def has_converged(self): + for i in range(0,self.node_count): + for j in range(0, len(self.p_offsets[i])): + if self.p_offsets[i][j] > 0.05 and self.p_offsets[i][j] < 0.95: + return False + return True + + def flatten(self): + candidate = Candidate(self.layer, self.slots) + for i in range(0, self.node_count): + for j in range(0, len(self.p_offsets[i])): + candidate.offsets[i][j] = 1 if self.p_offsets[i][j] >= 0.5 else 0 + return candidate + +def clamp(x, min_value = 0.01, max_value = 1): + return min(max(x, min_value), max_value) + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +# 00100111 x4 +# 00000110 x1 +def sha(v): + global M + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] % M + +def xor(x): + num_one_bits = 0 + for i in range(0, len(x)): + if i == 0: + continue + num_one_bits += x[i] + return num_one_bits % 2 + + +# 0 ^ 1 ^ (2 ^ (4 * (5 ^ 0 * 7))) * (3 ^ 6 * 7) +# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * (5 ^ 0 * 7)) ^ 4 * 6 * 7 * (5 ^ 0 * 7) +# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * 5 ^ 0 * 3 * 4 * 7 ^ 4 * 5 * 6 * 7 ^ 0 * 4 * 6 * 7 + +# 0 ^ 1 ^ 2*3 ^ 2*6*7 ^ 3*4*5 ^ 0*3*4*7 ^ 4*5*6*7 ^ 0*4*6*7 +# What about strictly SOP? +# That is, 1-Hot of increasing complexity? +# How would that work? +# Candidate generation could apply some kind of softmax to filter down to one +# +def test_fn(x): + # 0 1 + # 2 | 3 + # 4 | 5 | 6 | 7 + # | | 0 | 7 | | | | + return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7]))) + +def candidate_fn(x): + return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2]) + +def true_fn(x): + return x[0] ^ x[1] ^ (x[3] * x[2]) + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def coherence(outputs, distances): + coherences = [] + for i in range(0, len(outputs)): + y_a = outputs[i] + numerator = 0 + denominator = 0 + for j in range(0, len(outputs)): + if i == j: + continue + y_b = outputs[j] + weight = distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def random_sample(m, n, layers, g, compute_scratch): + inputs = np.zeros((m, n)).astype(np.int32) + augmented_inputs = np.zeros((m, n + len(layers) + 1)).astype(np.int32) + outputs = np.zeros((m,)).astype(np.int32) + for i in range(0, m): + for j in range(0, n): + val = random.randint(0, 1) + inputs[i][j] = val + augmented_inputs[i][j] = val + outputs[i] = g(inputs[i]) + augmented_inputs[i][n] = 1 + for j in range(0, len(layers)): + augmented_inputs[i][n + j] = evaluate_candidate(layers[j], augmented_inputs[i], compute_scratch) + return (inputs, augmented_inputs, outputs) + +def populate_distances(inputs, distances, scratch): + for i in range(0, len(inputs)): + x_a = inputs[i] + for j in range(0, len(inputs)): + if i == j: + continue + x_b = inputs[j] + distance = hamming_distance(x_a, x_b, scratch) + distances[i][j] = 1.0 / (2 ** distance) + +def evaluate(layers, candidate, x, compute_scratch): + z = evaluate_layers(layers, x, compute_scratch) + z ^= evaluate_candidate(candidate, x, compute_scratch) + return z + +def evaluate_layers(layers, x, compute_scratch): + z = 0 + for layer in layers: + z ^= evaluate_candidate(layer, x, compute_scratch) + return z + +def evaluate_candidate(candidate, x, compute_scratch): + y = 1 + for j in range(0, candidate.node_count): + value = 0 + compute_scratch.fill(0) + compute_scratch[0:len(candidate.offsets[j])] = candidate.offsets[j] + np.multiply(compute_scratch, x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + y &= value + return y + +@timeit +def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch): + global M + + for i in range(0, sample_size): + outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch) + for j in range(1, num_candidates): + np.copyto(outputs[j], outputs[0]) + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + base_score = coherence(output_xor, distances) + + scores.fill(0) + unique_candidates = {} + for j in range(0, num_candidates): + create_candidate(probabilities, candidates[j]) + unique_candidates[candidate_str(candidates[j])] = j + + for i in range(0, sample_size): + for _, j in unique_candidates.items(): + candidate = candidates[j] + outputs[j][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch) + for _, j in unique_candidates.items(): + candidate = candidates[j] + np.subtract(outputs[j], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + scores[j] = score + return base_score + + +def compute_uplift(candidate, layers, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch): + global M + + for i in range(0, sample_size): + outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch) + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + base_score = coherence(output_xor, distances) + + for i in range(0, sample_size): + outputs[0][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch) + + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + return (base_score, score) + +@timeit +def update_probabilities(probabilities, candidates, inputs, base_score, scores, scale): + num_candidates = len(candidates) + + probabilities.offset_coherences.fill(-1) + for p in range(0, num_candidates): + candidate = candidates[p] + if scores[p] == 0: + continue + # score = max(scores[p], base_score) + score = scores[p] + for j in range(0, probabilities.node_count): + for k in range(0, len(candidate.offsets[j])): + i = candidate.offsets[j][k] + for m in range(0, probabilities.node_count): + for n in range(0, len(candidate.offsets[m])): + l = candidate.offsets[m][n] + probabilities.offset_coherences[i][j][k][l][m][n] = max(score, probabilities.offset_coherences[i][j][k][l][m][n]) + + p_offsets_next = np.empty_like(probabilities.p_offsets) + inertia = 0 + for j in range(0, probabilities.node_count): + for k in range(0, len(p_offsets_next[j])): + delta = 0 + count = 0 + for m in range(0, probabilities.node_count): + for n in range(0, len(p_offsets_next[m])): + # if j == m and k == n: + # continue + p_j1_if_m0 = probabilities.offset_coherences[1][j][k][0][m][n] + p_j0_if_m0 = probabilities.offset_coherences[0][j][k][0][m][n] + p_j1_if_m1 = probabilities.offset_coherences[1][j][k][1][m][n] + p_j0_if_m1 = probabilities.offset_coherences[0][j][k][1][m][n] + if p_j1_if_m0 >= 0 and p_j0_if_m0 >= 0: + # delta_if_m0 = (p_j1_if_m0 - base_score) - (p_j0_if_m0 - base_score) + delta_if_m0 = p_j1_if_m0 - p_j0_if_m0 + delta += delta_if_m0 * (1.0 - probabilities.p_offsets[m][n]) * scale + count += 1 + if p_j1_if_m1 >= 0 and p_j0_if_m1 >= 0: + # delta_if_m1 = (p_j1_if_m1 - base_score) - (p_j0_if_m1 - base_score) + delta_if_m1 = p_j1_if_m1 - p_j0_if_m1 + delta += delta_if_m1 * probabilities.p_offsets[m][n] * scale + count += 1 + if count > 0: + delta /= count + p_offsets_next[j][k] = clamp(probabilities.p_offsets[j][k] + delta, 0, 1) + inertia += abs(p_offsets_next[j][k] - probabilities.p_offsets[j][k]) + + for j in range(0, probabilities.node_count): + for k in range(0, len(probabilities.p_offsets[j])): + p_offset_next = 0.9 * probabilities.p_offsets[j][k] + 0.1 * p_offsets_next[j][k] + # if p_offset_next <= 0.05: + # p_offset_next = 0.0 + # elif p_offset_next >= 0.95: + # p_offset_next = 1.0 + probabilities.p_offsets[j][k] = p_offset_next + + return inertia + +def create_candidate(probabilities, candidate): + candidate.offsets.fill(0) + for i in range(0, probabilities.node_count): + max_value = -1 + max_index = -1 + for j in range(0, len(probabilities.p_offsets[i])): + value = random.random() + probabilities.p_offsets[i][j] + if value > max_value: + max_value = value + max_index = j + # candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0 + candidate.offsets[i][max_index] = 1 + +def copy_candidate(src, dest): + for i in range(0, src.node_count): + for j in range(0, len(src.offsets[i])): + dest.offsets[i][j] = src.offsets[i][j] + +def p(x): + return math.ceil(x * 100) / 100 + +def p_a(x): + return [p(z) for z in x] + +def print_probabilities(probabilities): + print('=====================') + for i in range(0, probabilities.node_count): + print(i, p_a(probabilities.p_offsets[i])) + print('=====================') + +def candidate_str(candidate): + build_str = '' + for i in range(0, candidate.node_count): + for j in range(0, len(candidate.offsets[i])): + build_str += str(candidate.offsets[i][j]) + return build_str + +def main(): + global N, M + sample_size = 64 + num_candidates = 100 + num_survivors = 1 + uplift_sample_size = 100 + output_xor = np.zeros(sample_size,) + scratch = np.zeros((N,)) + int_scratch = np.zeros((N,)).astype(np.int32) + g = test_fn + layers = [] + augment_layers = [] + layer = 1 + (inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, int_scratch) + distances = np.zeros((sample_size, sample_size)) + populate_distances(inputs, distances, scratch) + outputs = np.zeros((num_candidates + num_survivors, sample_size,)).astype(np.int32) + scores = np.zeros((num_candidates + num_survivors,)) + + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + + while score < 1: + probabilities = Probabilities(layer, len(augment_layers)) + candidates = [Candidate(layer, len(augment_layers)) for _ in range(0, num_candidates + num_survivors)] + augmented_int_scratch = np.zeros((N + 1 + len(augment_layers),)).astype(np.int32) + (inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, augmented_int_scratch) + populate_distances(inputs, distances, scratch) + + inertia = 1 + epoch = 1 + while inertia > 0.001 and epoch < 1000 and not probabilities.has_converged(): + base_score = compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, augmented_inputs, outputs, output_xor, expected_outputs, sample_size, augmented_int_scratch) + round_inertia = update_probabilities(probabilities, candidates, augmented_inputs, base_score, scores, 1 + 0.01 * epoch) + inertia = 0.9 * inertia + 0.1 * round_inertia + + print_probabilities(probabilities) + for candidate in layers: + print(candidate.offsets) + max_score = np.max(scores) + print(base_score, max_score,round_inertia, inertia) + + top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:] + + for i in range(0, num_survivors): + src_index = top_n[i] + dest_index = num_candidates + i + if src_index == dest_index: + continue + src = candidates[src_index] + dest = candidates[dest_index] + candidates[dest_index] = src + candidates[src_index] = dest + + (inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, augmented_int_scratch) + populate_distances(inputs, distances, scratch) + epoch += 1 + + candidate = probabilities.flatten() + print(candidate.offsets) + for j in range(0, sample_size): + outputs[0][j] = evaluate(layers, candidate, augmented_inputs[j], augmented_int_scratch) + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + + average_base_score = 0 + average_score = 0 + for i in range(0, uplift_sample_size): + (inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, augmented_int_scratch) + populate_distances(inputs, distances, scratch) + (base_score, score) = compute_uplift(candidate, layers, distances, augmented_inputs, outputs, output_xor, expected_outputs, sample_size, augmented_int_scratch) + average_base_score += base_score + average_score += score + average_base_score /= uplift_sample_size + average_score /= uplift_sample_size + uplift = average_score - average_base_score + print(uplift) + + if uplift <= 0: + layer += 1 + # augment_layers = layers[1:] + continue + + layers.append(candidate) + # if layer == 1: + # layer += 1 + + for candidate in layers: + print(candidate.offsets) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations11.py b/mutations11.py new file mode 100644 index 0000000..9251c24 --- /dev/null +++ b/mutations11.py @@ -0,0 +1,535 @@ +from enum import unique +import hashlib +import math +import numpy as np +import random +import time + +N = 8 +N_ACTUAL = 2 * ((N - 1) + 8) +M = 2 + +def vec_to_int(x): + z = 0 + for i in range(0, len(x)): + z <<= 1 + z |= x[i] + return z + +def timeit(f): + def timed(*args, **kw): + ts = time.time() + result = f(*args, **kw) + te = time.time() + + print('func:%r took: %2.4f sec' % (f.__name__, te-ts)) + return result + return timed + +class Candidate: + def __init__(self, layer): + global N_ACTUAL + self.layer = layer + self.offsets = np.zeros((N_ACTUAL)).astype(np.int32) + +class Probabilities: + def __init__(self, layer): + global N_ACTUAL + self.layer = layer + self.p_offsets = np.zeros((N_ACTUAL)) + self.p_offsets.fill(0.5) + self.p_offsets_next = np.zeros((N_ACTUAL)) + self.offset_coherences = np.zeros((N_ACTUAL)) + self.offset_coherences.fill(-1) + self.knowns = set() + + def snap(self): + reset = False + for j in range(0, len(self.p_offsets)): + if self.p_offsets[j] > 0.6 and self.p_offsets[j] < 0.95: + self.p_offsets[j] = 1.0 + self.knowns.add(j) + flip = j ^ 0b1 + self.p_offsets[flip] = 0.0 + reset = True + break + elif self.p_offsets[j] < 0.05: + self.p_offsets[j] = 0.0 + if reset: + for j in range(0, len(self.p_offsets)): + flip = j ^ 0b1 + if self.p_offsets[j] < 0.95 and self.p_offsets[flip] < 0.95: + self.p_offsets[j] = 0.5 + + def eliminate_random_known(self): + if len(self.knowns) == 0: + return False + index = random.sample(self.knowns, 1)[0] + self.knowns.remove(index) + return True + + def reset(self): + self.p_offsets.fill(0.5) + for index in self.knowns: + flip = index ^ 0b1 + self.p_offsets[index] = 1.0 + self.p_offsets[flip] = 0.0 + + def all_zeros(self): + for j in range(0, len(self.p_offsets)): + if self.p_offsets[j] > 0.05 and self.p_offsets[j] < 0.95: + return False + return True + + def has_converged(self): + if self.all_zeros(): + return True + + top_n = sorted(range(len(self.p_offsets)), key=lambda i: self.p_offsets[i])[-self.layer:] + for i in top_n: + if self.p_offsets[i] < 0.95: + return False + + return True + + def flatten(self): + candidate = Candidate(self.layer) + top_n = sorted(range(len(self.p_offsets)), key=lambda i: self.p_offsets[i])[-self.layer:] + for i in top_n: + if self.p_offsets[i] < 0.95: + return None + candidate.offsets[i] = 1 + + return candidate + +def clamp(x, min_value = 0.01, max_value = 1): + return min(max(x, min_value), max_value) + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +# 00100111 x4 +# 00000110 x1 +def sha(v): + global M + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] % M + +def sha_byte(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result + +def xor(x): + num_one_bits = 0 + for i in range(0, len(x)): + if i == 0: + continue + num_one_bits += x[i] + return num_one_bits % 2 + + +# 0 ^ 1 ^ (2 ^ (4 * (5 ^ 0 * 7))) * (3 ^ 6 * 7) +# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * (5 ^ 0 * 7)) ^ 4 * 6 * 7 * (5 ^ 0 * 7) +# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * 5 ^ 0 * 3 * 4 * 7 ^ 4 * 5 * 6 * 7 ^ 0 * 4 * 6 * 7 + +# 0 ^ 1 ^ 2*3 ^ 2*6*7 ^ 3*4*5 ^ 0*3*4*7 ^ 4*5*6*7 ^ 0*4*6*7 +# What about strictly SOP? +# That is, 1-Hot of increasing complexity? +# How would that work? +# Candidate generation could apply some kind of softmax to filter down to one +# +def test_fn(x): + # 0 1 + # 2 | 3 + # 4 | 5 | 6 | 7 + # | | 0 | 7 | | | | + return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7]))) + +def candidate_fn(x): + return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2]) + +def true_fn(x): + return x[0] ^ x[1] ^ (x[3] * x[2]) + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def coherence(outputs, distances): + coherences = [] + for i in range(0, len(outputs)): + y_a = outputs[i] + numerator = 0 + denominator = 0 + for j in range(0, len(outputs)): + if i == j: + continue + y_b = outputs[j] + weight = distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def random_sample(m, inputs, augmented_inputs, outputs): + global N, N_ACTUAL + for i in range(0, m): + for j in range(0, N): + val = random.randint(0, 1) + inputs[i][j] = val + if j > 0: + augmented_inputs[i][(j - 1) * 2] = val + augmented_inputs[i][(j - 1) * 2 + 1] = 1 - val + # augmented_inputs[i][j * 2] = val + # augmented_inputs[i][j * 2 + 1] = 1 - val + output = sha_byte(inputs[i]) + outputs[i] = inputs[i][0] + for k in range(0, 1): + output_byte = output[k] + for j in range(0, 8): + val = (output_byte >> j) & 0b1; + inputs[i][k * 8 + j] = val + augmented_inputs[i][(N - 1 + k * 8 + j) * 2] = val + augmented_inputs[i][(N - 1 + k * 8 + j) * 2 + 1] = 1 - val + # outputs[i] = g(inputs[i]) + return (inputs, augmented_inputs, outputs) + +def populate_distances(inputs, distances, scratch): + for i in range(0, len(inputs)): + x_a = inputs[i] + for j in range(0, len(inputs)): + if i == j: + continue + x_b = inputs[j] + distance = hamming_distance(x_a, x_b, scratch) + distances[i][j] = 1.0 / (2 ** distance) + +def evaluate(layers, candidate, x, compute_scratch): + z = evaluate_layers(layers, x, compute_scratch) + z ^= evaluate_candidate(candidate, x, compute_scratch) + return z + +def evaluate_layers(layers, x, compute_scratch): + z = 0 + for layer in layers: + z ^= evaluate_candidate(layer, x, compute_scratch) + return z + +def evaluate_candidate(candidate, x, compute_scratch): + compute_scratch.fill(0) + compute_scratch[0:len(candidate.offsets)] = candidate.offsets + np.multiply(compute_scratch, x, compute_scratch) + return 1 if np.sum(compute_scratch) - np.sum(candidate.offsets) == 0 else 0 + +def layer_str(layer): + parts = [] + for i in range(0, len(layer.offsets)): + if layer.offsets[i] == 1: + parts.append('x[' + str(i) + ']') + return '*'.join(parts) + +def cache_layers(layers): + expr = 'def f(x):\n\tresult=0\n' + for i in range(0, len(layers)): + layer = layers[i] + expr += '\tresult^=' + layer_str(layer) + '\n' + expr += '\treturn result\n' + scope = {} + exec(expr, scope) + return scope['f'] + +@timeit +def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch, cached_f): + global M + + for i in range(0, sample_size): + outputs[0][i] = cached_f(inputs[i]) + # outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch) + # check = cached_f(inputs[i]) + # if check != outputs[0][i]: + # raise Exception('Mistake') + for j in range(1, num_candidates): + np.copyto(outputs[j], outputs[0]) + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + base_score = coherence(output_xor, distances) + + scores.fill(0) + unique_candidates = {} + for j in range(0, num_candidates): + create_candidate(probabilities, candidates[j]) + unique_candidates[candidate_str(candidates[j])] = j + + for i in range(0, sample_size): + for _, j in unique_candidates.items(): + candidate = candidates[j] + outputs[j][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch) + for _, j in unique_candidates.items(): + candidate = candidates[j] + np.subtract(outputs[j], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + scores[j] = score + # for j in range(0, num_candidates): + # candidate = candidates[j] + # create_candidate(probabilities, candidate) + + # for i in range(0, sample_size): + # for j in range(0, num_candidates): + # candidate = candidates[j] + # outputs[j][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch) + + # for j in range(0, num_candidates): + # candidate = candidates[j] + # np.subtract(outputs[j], expected_outputs, output_xor) + # np.mod(output_xor, M, output_xor) + # score = coherence(output_xor, distances) + # scores[j] = score + + return base_score + + +def compute_uplift(candidate, layers, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch): + global M + + for i in range(0, sample_size): + outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch) + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + base_score = coherence(output_xor, distances) + + for i in range(0, sample_size): + outputs[0][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch) + + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + return (base_score, score) + +@timeit +def update_probabilities(probabilities, candidates, inputs, base_score, scores, scale): + num_candidates = len(candidates) + + probabilities.offset_coherences.fill(-1) + for p in range(0, num_candidates): + score = scores[p] + if score == 0: + continue + candidate = candidates[p] + + for j in range(0, len(candidate.offsets)): + if candidate.offsets[j] == 0: + continue + probabilities.offset_coherences[j] = max(score, probabilities.offset_coherences[j]) + + inertia = 0 + for j in range(0, len(probabilities.p_offsets_next)): + p = probabilities.offset_coherences[j] + delta = p - base_score if p >= 0 else 0 + probabilities.p_offsets_next[j] = clamp(probabilities.p_offsets[j] + delta, 0, 1) + inertia += abs(probabilities.p_offsets_next[j] - probabilities.p_offsets[j]) + + for j in range(0, len(probabilities.p_offsets_next)): + p_offset_next = 0.9 * probabilities.p_offsets[j] + 0.1 * probabilities.p_offsets_next[j] + # if p_offset_next <= 0.05: + # p_offset_next = 0.0 + # elif p_offset_next >= 0.95: + # p_offset_next = 1.0 + probabilities.p_offsets[j] = p_offset_next + # total = np.sum(probabilities.p_offsets[j]) + # probabilities.p_offsets[j] *= 1.0 / total + + probabilities.snap() + + return inertia + +def create_candidate(probabilities, candidate): + candidate.offsets.fill(0) + scores = np.empty_like(candidate.offsets).astype(np.float32) + for j in range(0, len(probabilities.p_offsets)): + if probabilities.p_offsets[j] == 1.0: + scores[j] = 1000 + elif probabilities.p_offsets[j] == 0.0: + scores[j] = -1000 + else: + scores[j] = random.random() + probabilities.p_offsets[j] + top = sorted(range(len(scores)), key=lambda i: scores[i], reverse = True) + picked = set() + for i in top: + flip = i ^ 0b1 + if flip in picked: + continue + candidate.offsets[i] = 1 + picked.add(i) + if len(picked) == candidate.layer: + return + +def copy_candidate(src, dest): + for j in range(0, len(src.offsets)): + dest.offsets[j] = src.offsets[j] + +def p(x): + return math.ceil(x * 100) / 100 + +def p_a(x): + return [p(z) for z in x] + +def print_probabilities(probabilities): + print('=====================') + print(p_a(probabilities.p_offsets)) + print('=====================') + +def candidate_str(candidate): + build_str = '' + for j in range(0, len(candidate.offsets)): + build_str += str(candidate.offsets[j]) + return build_str + +def main(): + global N, N_ACTUAL, M + sample_size = 64 + num_candidates = 100 + num_survivors = 1 + uplift_sample_size = 128 + output_xor = np.zeros(sample_size,) + scratch = np.zeros((N,)) + int_scratch = np.zeros((N,)).astype(np.int32) + g = sha + layers = [] + unique_layers = set() + augment_layers = [] + layer = 1 + inputs = np.zeros((sample_size, N)).astype(np.int32) + augmented_inputs = np.zeros((sample_size, N_ACTUAL)).astype(np.int32) + expected_outputs = np.zeros((sample_size,)).astype(np.int32) + random_sample(sample_size, inputs, augmented_inputs, expected_outputs) + distances = np.zeros((sample_size, sample_size)) + populate_distances(inputs, distances, scratch) + outputs = np.zeros((num_candidates + num_survivors, sample_size,)).astype(np.int32) + scores = np.zeros((num_candidates + num_survivors,)) + cached_f = cache_layers(layers) + probabilities = Probabilities(1) + + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + + with open('model.txt', 'w') as f: + while score < 1: + probabilities.layer = layer + candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)] + augmented_int_scratch = np.zeros((N_ACTUAL,)).astype(np.int32) + random_sample(sample_size, inputs, augmented_inputs, expected_outputs) + populate_distances(inputs, distances, scratch) + + inertia = 1 + epoch = 1 + while inertia > 0.001 and epoch < 2000 and not probabilities.has_converged(): + base_score = compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, augmented_inputs, outputs, output_xor, expected_outputs, sample_size, augmented_int_scratch, cached_f) + round_inertia = update_probabilities(probabilities, candidates, augmented_inputs, base_score, scores, 1 + 0.01 * epoch) + inertia = 0.9 * inertia + 0.1 * round_inertia + + print_probabilities(probabilities) + # for candidate in layers: + # print(candidate.offsets) + max_score = np.max(scores) + print(base_score, max_score,round_inertia, inertia) + + top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:] + + for i in range(0, num_survivors): + src_index = top_n[i] + dest_index = num_candidates + i + if src_index == dest_index: + continue + src = candidates[src_index] + dest = candidates[dest_index] + candidates[dest_index] = src + candidates[src_index] = dest + + random_sample(sample_size, inputs, augmented_inputs, expected_outputs) + populate_distances(inputs, distances, scratch) + epoch += 1 + + candidate = probabilities.flatten() + # uplift = -1 + # if not candidate is None: + # print(candidate.offsets) + # for j in range(0, sample_size): + # outputs[0][j] = evaluate(layers, candidate, augmented_inputs[j], augmented_int_scratch) + # np.subtract(outputs[0], expected_outputs, output_xor) + # np.mod(output_xor, M, output_xor) + # score = coherence(output_xor, distances) + + # average_base_score = 0 + # average_score = 0 + # for i in range(0, uplift_sample_size): + # (inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, augmented_int_scratch) + # populate_distances(inputs, distances, scratch) + # (base_score, score) = compute_uplift(candidate, layers, distances, augmented_inputs, outputs, output_xor, expected_outputs, sample_size, augmented_int_scratch) + # average_base_score += base_score + # average_score += score + # average_base_score /= uplift_sample_size + # average_score /= uplift_sample_size + # uplift = average_score - average_base_score + # print(uplift) + + # if uplift <= 0: + # layer += 1 + # # augment_layers = layers[1:] + # continue + if candidate is None: + if probabilities.eliminate_random_known(): + probabilities.reset() + continue + layer += 1 + continue + + layer_id = candidate_str(candidate) + if layer_id in unique_layers: + if probabilities.eliminate_random_known(): + if probabilities.eliminate_random_known(): + probabilities.reset() + continue + layer += 1 + continue + + unique_layers.add(layer_id) + layers.append(candidate) + cached_f = cache_layers(layers) + probabilities.eliminate_random_known() + probabilities.reset() + + for i in range(0, len(candidate.offsets)): + if candidate.offsets[i] == 1: + f.write(str(i)) + f.write(' ') + f.write('\n') + f.flush() + + # if layer == 1: + # layer += 1 + + for candidate in layers: + print(candidate.offsets) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations12.py b/mutations12.py new file mode 100644 index 0000000..052d8d1 --- /dev/null +++ b/mutations12.py @@ -0,0 +1,391 @@ +import bisect +from email.mime import base +import hashlib +import math +import numpy as np +import random + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def index_hash(indices): + return ','.join([str(index) for index in sorted(indices)]) + +class Candidate(): + def __init__(self, indices): + self.indices = indices[:] + + def evaluate(self, x): + if len(x) in self.indices: + return 0 + value = 1 + for index in self.indices: + value *= x[index] + return value + + def id(self): + return index_hash(self.indices) + + def eval_str(self): + parts = [] + for index in self.indices: + parts.append('x[' + str(index) + ']') + return '*'.join(parts) + +class Probabilities(): + def __init__(self): + self.N = 8 + self.actual_N = self.N * 2 + self.num_terms = 1 + self.num_candidates = 100 + self.sample_size = 64 + self.p = np.zeros((self.actual_N + 1,)) + self.p_temp = np.empty_like(self.p) + self.next_p = np.empty_like(self.p) + self.knowns = [] + self.stops = set() + self.reset_p() + self.epoch = 0 + + self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32) + self.distances = np.zeros((self.sample_size, self.sample_size)) + self.xor_square = np.zeros((self.sample_size, self.sample_size)) + self.base_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.outputs = np.zeros((self.sample_size)).astype(np.int32) + self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.output_xor = np.zeros((self.sample_size)).astype(np.int32) + self.max_coherences = np.zeros((self.actual_N + 1)) + self.max_candidates = [None for _ in range(0, self.actual_N)] + + self.layers = [] + self.base = None + self.rings = [] + + self.scratch = np.zeros((self.actual_N,)) + + self.last_value = -1 + self.rounds = 0 + self.average_delta_over_null = 0 + + def randomize_inputs(self): + for i in range(0, self.sample_size): + for j in range(0, self.N): + val = random.randint(0, 1) + self.inputs[i][j * 2] = val + self.inputs[i][j * 2 + 1] = val ^ 1 + + def populate_distances(self): + for i in range(0, len(self.inputs)): + x_a = self.inputs[i] + for j in range(0, len(self.inputs)): + if i == j: + continue + x_b = self.inputs[j] + distance = hamming_distance(x_a, x_b, self.scratch) + self.distances[i][j] = 1.0 / (2 ** distance) + + def compute_rings(self): + self.rings = [] + for i in range(0, len(self.inputs)): + x_a = self.inputs[i] + min_distance = self.actual_N + indices = [] + for j in range(0, len(self.inputs)): + if i == j: + continue + x_b = self.inputs[j] + distance = hamming_distance(x_a, x_b, self.scratch) + if distance < min_distance: + min_distance = distance + indices = [j] + elif distance == min_distance: + indices.append(j) + self.rings.append(indices) + + def compute_expected_outputs(self): + for i in range(0, len(self.inputs)): + self.expected_outputs[i] = sha(self.inputs[i]) + + def compute_base_outputs(self): + if self.base is None: + self.base_outputs.fill(0) + return + for i in range(0, len(self.inputs)): + self.base_outputs[i] = self.base(self.inputs[i]) + + def coherence(self, outputs=None): + if outputs is None: + outputs = self.outputs + np.logical_xor(outputs, self.expected_outputs, self.output_xor) + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + + return sum(coherences) / len(coherences) + + def ring_coherence(self, outputs=None): + if outputs is None: + outputs = self.outputs + np.logical_xor(outputs, self.expected_outputs, self.output_xor) + total = 0 + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + indices = self.rings[i] + coherence = sum([1 if self.output_xor[j] == y_a else 0 for j in indices]) / len(indices) + total += coherence + return total / len(self.output_xor) + + def normalize_p(self): + check = self.knowns[:] + for i in range(0, len(self.p)): + if self.p[i] < 0: + self.p[i] = 0 + for i in range(0, len(self.p)): + if i in self.knowns: + flip = i ^ 0b1 + self.p[i] = 0.0 + self.p[flip] = 0.0 + else: + check.append(i) + stop_id = index_hash(check) + check.pop() + if stop_id in self.stops: + self.p[i] = 0.0 + total = np.sum(self.p) + if total > 0: + for i in range(0, len(self.p)): + self.p[i] = self.p[i] / total + + def reset_p(self): + self.p.fill(1.0) + self.normalize_p() + + def threshold(self): + # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100) + return 1.0 - (self.epoch / 100) + + def get_converged_index(self): + for i in range(0, len(self.p)): + if self.p[i] > self.threshold(): + return i + return None + + def add_layer(self): + self.add_stop() + layer = Candidate(self.knowns) + self.layers.append(layer) + self.base = self.cache_layers() + self.knowns.pop() + self.reset_p() + + def random_sample(self): + self.randomize_inputs() + self.populate_distances() + # self.compute_rings() + self.compute_expected_outputs() + self.compute_base_outputs() + return self.coherence(self.base_outputs) + # return self.ring_coherence(self.base_outputs) + + def random_candidate(self): + indices = self.knowns[:] + np.copyto(self.p_temp, self.p) + self.p_temp[self.actual_N] = 0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + for _ in range(0, self.num_terms - len(self.knowns)): + index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0] + indices.append(index) + flip = index ^ 0b1 + self.p_temp[index] = 0 + self.p_temp[flip] = 0 + for i in range(0, len(self.p_temp)): + if i not in indices: + indices.append(i) + stop_id = index_hash(indices) + indices.pop() + if stop_id in self.stops: + self.p_temp[i] = 0.0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + return Candidate(indices) + + def add_stop(self): + stop_id = index_hash(self.knowns) + self.stops.add(stop_id) + + def update(self): + self.epoch += 1 + base_coherence = self.random_sample() + self.max_coherences.fill(0) + for i in range(0, self.actual_N): + self.max_candidates[i] = None + visited = set() + has_candidate = False + # np.copyto(self.next_p, self.p) + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in visited: + continue + visited.add(candidate_id) + if self.actual_N in candidate.indices: + continue + has_candidate = True + for i in range(0, len(self.inputs)): + self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i]) + # coherence = self.ring_coherence() + coherence = self.coherence() + # if coherence <= base_coherence: + # continue + # for index in candidate.indices: + # self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0) + # self.p_temp[index] += 0 + for index in candidate.indices: + if coherence > self.max_coherences[index]: + self.max_coherences[index] = coherence + self.max_candidates[index] = candidate + # self.max_coherences[index] = max(self.max_coherences[index], coherence) + # np.copyto(self.p, self.next_p) + + # np.copyto(self.p_temp, self.p) + for i in range(0, self.actual_N): + candidate = self.max_candidates[i] + if candidate is None: + continue + for index in candidate.indices: + self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0) + # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id()) + self.normalize_p() + # print(self.p) + + # np.subtract(self.p_temp, self.p, self.p_temp) + # np.abs(self.p_temp, self.p_temp) + # delta = np.sum(self.p_temp) / len(self.p_temp) + # print(delta, np.argmax(self.p)) + # np.copyto(self.p_temp, self.p) + # for i in range(0, len(self.p_temp)): + # self.p_temp[i] = round(self.p_temp[i] * 100) / 100 + # print(self.p_temp) + + index = np.argmax(self.p) + delta_over_null = self.p[index] - self.p[self.actual_N] + if self.epoch == 0: + self.average_delta_over_null = delta_over_null + else: + self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null + diff = self.num_terms - len(self.knowns) + + print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p)) + + # Always iterate for a minimum number of epochs + if self.epoch < 15: + return + if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300: + return + if self.average_delta_over_null < 0.001: + index = self.actual_N + else: + index = np.argmax(self.p) + + # index = np.argmax(self.p) + # if index == self.last_value: + # self.rounds += 1 + # else: + # self.rounds = 0 + # self.last_value = index + + # if self.rounds < 10 and self.epoch < 100: + # return + + # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50): + # return + + # index = np.argmax(self.p) + + # print(self.p) + # print(self.threshold()) + # print(self.p) + # index = self.get_converged_index() + if not index is None or not has_candidate: + # print(index, delta, np.argmax(self.p)) + self.epoch = 0 + if index == self.actual_N or not has_candidate: + if len(self.knowns) > 0: + self.add_stop() + self.knowns.pop() + print('Backtrack: ' + str(self.knowns)) + self.reset_p() + return + self.num_terms += 1 + self.knowns = [] + self.stops = set() + self.reset_p() + print(self.num_terms) + return + self.knowns.append(index) + # bisect.insort(self.knowns, index) + if len(self.knowns) == self.num_terms: + print('Add layer: ' + str(self.knowns)) + self.add_layer() + else: + print('Found term: ' + str(self.knowns)) + self.reset_p() + print(base_coherence) + return + + def cache_layers(self): + expr = 'def f(x):\n\tresult=0\n' + for layer in self.layers: + expr += '\tresult^=' + layer.eval_str() + '\n' + expr += '\treturn result\n' + scope = {} + exec(expr, scope) + return scope['f'] + +def main(): + probabilities = Probabilities() + while probabilities.num_terms <= probabilities.N: + probabilities.update() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations13.py b/mutations13.py new file mode 100644 index 0000000..5f95b3f --- /dev/null +++ b/mutations13.py @@ -0,0 +1,447 @@ +import bisect +from email.mime import base +import hashlib +import math +import numpy as np +import random + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def index_hash(indices): + return ','.join([str(index) for index in sorted(indices)]) + +class Candidate(): + def __init__(self, indices): + self.indices = indices[:] + self.uplift = 0 + + def evaluate(self, x): + if len(x) in self.indices: + return 0 + value = 1 + for index in self.indices: + value *= x[index] + return value + + def id(self): + return index_hash(self.indices) + + def eval_str(self): + parts = [] + for index in self.indices: + parts.append('x[' + str(index) + ']') + return '*'.join(parts) + +class Probabilities(): + def __init__(self): + self.N = 8 + self.actual_N = self.N * 2 + self.num_terms = 1 + self.num_candidates = 100 + self.sample_size = 64 + self.p = np.zeros((self.actual_N + 1,)) + self.p_temp = np.empty_like(self.p) + self.next_p = np.empty_like(self.p) + self.knowns = [] + self.stops = set() + self.reset_p() + self.epoch = 0 + + self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32) + self.distances = np.zeros((self.sample_size, self.sample_size)) + self.xor_square = np.zeros((self.sample_size, self.sample_size)) + self.base_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.outputs = np.zeros((self.sample_size)).astype(np.int32) + self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.output_xor = np.zeros((self.sample_size)).astype(np.int32) + self.max_coherences = np.zeros((self.actual_N + 1)) + self.max_candidates = [None for _ in range(0, self.actual_N)] + + self.layers = [] + self.base = None + self.rings = [] + + self.scratch = np.zeros((self.actual_N,)) + + self.last_value = -1 + self.rounds = 0 + self.average_delta_over_null = 0 + + self.candidate_pool = [] + self.candidate_ids = set() + + def randomize_inputs(self): + for i in range(0, self.sample_size): + for j in range(0, self.N): + val = random.randint(0, 1) + self.inputs[i][j * 2] = val + self.inputs[i][j * 2 + 1] = val ^ 1 + + def populate_distances(self): + for i in range(0, len(self.inputs)): + x_a = self.inputs[i] + for j in range(0, len(self.inputs)): + if i == j: + continue + x_b = self.inputs[j] + distance = hamming_distance(x_a, x_b, self.scratch) + self.distances[i][j] = 1.0 / (2 ** distance) + + def compute_rings(self): + self.rings = [] + for i in range(0, len(self.inputs)): + x_a = self.inputs[i] + min_distance = self.actual_N + indices = [] + for j in range(0, len(self.inputs)): + if i == j: + continue + x_b = self.inputs[j] + distance = hamming_distance(x_a, x_b, self.scratch) + if distance < min_distance: + min_distance = distance + indices = [j] + elif distance == min_distance: + indices.append(j) + self.rings.append(indices) + + def compute_expected_outputs(self): + for i in range(0, len(self.inputs)): + self.expected_outputs[i] = sha(self.inputs[i]) + + def compute_base_outputs(self): + if self.base is None: + self.base_outputs.fill(0) + return + for i in range(0, len(self.inputs)): + self.base_outputs[i] = self.base(self.inputs[i]) + + def coherence(self, outputs=None): + if outputs is None: + outputs = self.outputs + np.logical_xor(outputs, self.expected_outputs, self.output_xor) + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + + return sum(coherences) / len(coherences) + + def ring_coherence(self, outputs=None): + if outputs is None: + outputs = self.outputs + np.logical_xor(outputs, self.expected_outputs, self.output_xor) + total = 0 + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + indices = self.rings[i] + coherence = sum([1 if self.output_xor[j] == y_a else 0 for j in indices]) / len(indices) + total += coherence + return total / len(self.output_xor) + + def normalize_p(self): + check = self.knowns[:] + for i in range(0, len(self.p)): + if self.p[i] < 0: + self.p[i] = 0 + for i in range(0, len(self.p)): + if i in self.knowns: + flip = i ^ 0b1 + self.p[i] = 0.0 + self.p[flip] = 0.0 + else: + check.append(i) + stop_id = index_hash(check) + check.pop() + if stop_id in self.stops: + self.p[i] = 0.0 + total = np.sum(self.p) + if total > 0: + for i in range(0, len(self.p)): + self.p[i] = self.p[i] / total + + def reset_p(self): + self.p.fill(1.0) + self.normalize_p() + + def threshold(self): + # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100) + return 1.0 - (self.epoch / 100) + + def get_converged_index(self): + for i in range(0, len(self.p)): + if self.p[i] > self.threshold(): + return i + return None + + def add_layer(self): + self.add_stop() + layer = Candidate(self.knowns) + self.layers.append(layer) + self.base = self.cache_layers() + self.knowns.pop() + self.reset_p() + + def random_sample(self): + self.randomize_inputs() + self.populate_distances() + # self.compute_rings() + self.compute_expected_outputs() + self.compute_base_outputs() + return self.coherence(self.base_outputs) + # return self.ring_coherence(self.base_outputs) + + def random_candidate(self): + indices = self.knowns[:] + np.copyto(self.p_temp, self.p) + self.p_temp[self.actual_N] = 0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + for _ in range(0, self.num_terms - len(self.knowns)): + index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0] + indices.append(index) + flip = index ^ 0b1 + self.p_temp[index] = 0 + self.p_temp[flip] = 0 + for i in range(0, len(self.p_temp)): + if i not in indices: + indices.append(i) + stop_id = index_hash(indices) + indices.pop() + if stop_id in self.stops: + self.p_temp[i] = 0.0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + return Candidate(indices) + + def seed_candidate_pool(self): + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in self.candidate_ids: + continue + self.candidate_pool.append(candidate) + self.candidate_ids.add(candidate_id) + + def add_stop(self): + stop_id = index_hash(self.knowns) + self.stops.add(stop_id) + + def update(self): + self.epoch += 1 + base_coherence = self.random_sample() + self.seed_candidate_pool() + for candidate in self.candidate_pool: + for i in range(0, len(self.inputs)): + self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i]) + coherence = self.coherence() + candidate.uplift += coherence - base_coherence + self.candidate_pool.sort(key=lambda x: x.uplift, reverse=True) + for drop_candidate in self.candidate_pool[self.num_candidates:]: + self.candidate_ids.remove(drop_candidate.id()) + self.candidate_pool = self.candidate_pool[:self.num_candidates] + # print('======') + # print(self.epoch, base_coherence) + # print('======') + + # if len(self.candidate_pool) == 0: + # print(self.p) + + # for i in range(0, min(5, len(self.candidate_pool))): + # candidate = self.candidate_pool[i] + # print(candidate.id(), candidate.uplift) + + # if self.epoch < 15: + # return + + if self.candidate_pool[0].uplift > 0.3: + candidate = self.candidate_pool[0] + candidate_id = candidate.id() + self.candidate_ids.remove(candidate_id) + print(candidate_id) + self.knowns = candidate.indices + self.add_layer() + self.knowns = [] + self.reset_p() + self.epoch = 0 + self.candidate_pool = [] + self.candidate_ids = set() + elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200: + self.epoch = 0 + self.num_terms += 1 + self.candidate_pool = [] + self.candidate_ids = set() + self.knowns = [] + self.stops = set() + self.reset_p() + return + + # np.copyto(self.next_p, self.p) + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in visited: + continue + visited.add(candidate_id) + if self.actual_N in candidate.indices: + continue + has_candidate = True + for i in range(0, len(self.inputs)): + self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i]) + # coherence = self.ring_coherence() + coherence = self.coherence() + # if coherence <= base_coherence: + # continue + # for index in candidate.indices: + # self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0) + # self.p_temp[index] += 0 + for index in candidate.indices: + if coherence > self.max_coherences[index]: + self.max_coherences[index] = coherence + self.max_candidates[index] = candidate + # self.max_coherences[index] = max(self.max_coherences[index], coherence) + # np.copyto(self.p, self.next_p) + + # np.copyto(self.p_temp, self.p) + for i in range(0, self.actual_N): + candidate = self.max_candidates[i] + if candidate is None: + continue + for index in candidate.indices: + self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0) + # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id()) + self.normalize_p() + # print(self.p) + + # np.subtract(self.p_temp, self.p, self.p_temp) + # np.abs(self.p_temp, self.p_temp) + # delta = np.sum(self.p_temp) / len(self.p_temp) + # print(delta, np.argmax(self.p)) + # np.copyto(self.p_temp, self.p) + # for i in range(0, len(self.p_temp)): + # self.p_temp[i] = round(self.p_temp[i] * 100) / 100 + # print(self.p_temp) + + index = np.argmax(self.p) + delta_over_null = self.p[index] - self.p[self.actual_N] + if self.epoch == 0: + self.average_delta_over_null = delta_over_null + else: + self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null + diff = self.num_terms - len(self.knowns) + + print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p)) + + # Always iterate for a minimum number of epochs + if self.epoch < 15: + return + if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300: + return + if self.average_delta_over_null < 0.001: + index = self.actual_N + else: + index = np.argmax(self.p) + + # index = np.argmax(self.p) + # if index == self.last_value: + # self.rounds += 1 + # else: + # self.rounds = 0 + # self.last_value = index + + # if self.rounds < 10 and self.epoch < 100: + # return + + # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50): + # return + + # index = np.argmax(self.p) + + # print(self.p) + # print(self.threshold()) + # print(self.p) + # index = self.get_converged_index() + if not index is None or not has_candidate: + # print(index, delta, np.argmax(self.p)) + self.epoch = 0 + if index == self.actual_N or not has_candidate: + if len(self.knowns) > 0: + self.add_stop() + self.knowns.pop() + print('Backtrack: ' + str(self.knowns)) + self.reset_p() + return + self.num_terms += 1 + self.knowns = [] + self.stops = set() + self.reset_p() + print(self.num_terms) + return + self.knowns.append(index) + # bisect.insort(self.knowns, index) + if len(self.knowns) == self.num_terms: + print('Add layer: ' + str(self.knowns)) + self.add_layer() + else: + print('Found term: ' + str(self.knowns)) + self.reset_p() + print(base_coherence) + return + + def cache_layers(self): + expr = 'def f(x):\n\tresult=0\n' + for layer in self.layers: + expr += '\tresult^=' + layer.eval_str() + '\n' + expr += '\treturn result\n' + scope = {} + exec(expr, scope) + return scope['f'] + +def main(): + probabilities = Probabilities() + while probabilities.num_terms <= probabilities.N: + probabilities.update() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations14.py b/mutations14.py new file mode 100644 index 0000000..d9c7c44 --- /dev/null +++ b/mutations14.py @@ -0,0 +1,549 @@ +import bisect +from email.mime import base +import hashlib +import math +import numpy as np +import random + +from pkg_resources import get_distribution + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def index_hash(indices): + return ','.join([str(index) for index in sorted(indices)]) + +def bin_div(a, b): + if a == 0 and b == 0: + return 2 + if a == 1 and b == 0: + return -1 + if a == 0 and b == 1: + return 0 + return 1 + +class Candidate(): + def __init__(self, indices): + self.indices = indices[:] + self.uplift = 0 + + def evaluate(self, x): + if len(x) in self.indices: + return 0 + value = 1 + for index in self.indices: + value *= x[index] + return value + + def id(self): + return index_hash(self.indices) + + def eval_str(self): + parts = [] + for index in self.indices: + parts.append('x[' + str(index) + ']') + return '*'.join(parts) + +class Probabilities(): + def __init__(self): + self.N = 16 + self.actual_N = self.N * 2 + self.num_terms = 1 + self.num_candidates = 100 + self.sample_size = 64 + self.p = np.zeros((self.actual_N + 1,)) + self.p_temp = np.empty_like(self.p) + self.next_p = np.empty_like(self.p) + self.knowns = [] + self.stops = set() + self.reset_p() + self.epoch = 0 + + self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32) + self.masked_distances = np.zeros((self.sample_size, self.sample_size)) + self.distances = np.zeros((self.sample_size, self.sample_size)) + self.xor_square = np.zeros((self.sample_size, self.sample_size)) + self.base_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.outputs = np.zeros((self.sample_size)).astype(np.int32) + self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.output_xor = np.zeros((self.sample_size)).astype(np.int32) + self.mask = np.zeros((self.sample_size)) + self.numerators = np.zeros((self.sample_size)) + self.denominators = np.zeros((self.sample_size)) + self.coherences = np.zeros((self.sample_size)) + self.max_coherences = np.zeros((self.actual_N + 1)) + self.max_candidates = [None for _ in range(0, self.actual_N)] + self.uplifts = np.zeros((self.actual_N)) + self.subspace_uplifts = np.zeros((self.actual_N)) + + self.layers = [] + self.base = None + + self.scratch = np.zeros((self.actual_N,)) + + self.last_value = -1 + self.rounds = 0 + self.average_delta_over_null = 0 + self.visited = set() + + self.candidate_pool = [] + self.candidate_ids = set() + + def randomize_inputs(self): + for i in range(0, self.sample_size): + for j in range(0, self.N): + val = random.randint(0, 1) + self.inputs[i][j * 2] = val + self.inputs[i][j * 2 + 1] = val ^ 1 + + def populate_distances(self): + for i in range(0, len(self.inputs)): + x_a = self.inputs[i] + for j in range(0, len(self.inputs)): + if i == j: + continue + x_b = self.inputs[j] + distance = hamming_distance(x_a, x_b, self.scratch) + self.distances[i][j] = 1.0 / (2 ** distance) + + def compute_expected_outputs(self): + for i in range(0, len(self.inputs)): + self.expected_outputs[i] = sha(self.inputs[i]) + + def compute_base_outputs(self): + if self.base is None: + self.base_outputs.fill(0) + return + for i in range(0, len(self.inputs)): + self.base_outputs[i] = self.base(self.inputs[i]) + + def mat_coherence(self): + np.abs(self.output_xor, self.mask) + np.subtract(self.output_xor, self.mask, self.mask) + np.divide(self.mask, 2.0, self.mask) + np.add(1.0, self.mask, self.mask) + self.xor_square.fill(0) + np.copyto(self.masked_distances, self.distances) + masked_distances_t = self.masked_distances.transpose() + for i in range(0, len(self.xor_square)): + self.xor_square[i] = self.output_xor + np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i]) + np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i]) + np.sum(self.masked_distances, axis=0, out=self.denominators) + self.xor_square = self.xor_square.transpose() + np.logical_xor(self.xor_square, self.output_xor, self.xor_square) + np.multiply(self.xor_square, self.masked_distances, self.xor_square) + np.sum(self.xor_square, axis=0, out=self.numerators) + np.divide(self.numerators, self.denominators, self.coherences) + return 1.0 - np.nanmean(self.coherences) + + def coherence(self, outputs=None): + if outputs is None: + outputs = self.outputs + np.logical_xor(outputs, self.expected_outputs, self.output_xor) + return self.mat_coherence() + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + + raw_coherence = sum(coherences) / len(coherences) + check_coherence = self.mat_coherence() + + return raw_coherence + + def div_coherence(self): + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + if y_a < 0: + continue + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + if y_b < 0: + continue + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + # if y_a < 0 or y_b < 0: + # numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + if len(coherences) == 0: + return 1.0 + return sum(coherences) / len(coherences) + + def normalize_p(self): + check = self.knowns[:] + for i in range(0, len(self.p)): + if self.p[i] < 0: + self.p[i] = 0 + for i in range(0, len(self.p)): + if i in self.knowns: + flip = i ^ 0b1 + self.p[i] = 0.0 + self.p[flip] = 0.0 + else: + check.append(i) + stop_id = index_hash(check) + check.pop() + if stop_id in self.stops: + self.p[i] = 0.0 + total = np.sum(self.p) + if total > 0: + for i in range(0, len(self.p)): + self.p[i] = self.p[i] / total + + def reset_p(self): + self.p.fill(1.0) + self.normalize_p() + + def threshold(self): + # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100) + return 1.0 - (self.epoch / 100) + + def get_converged_index(self): + for i in range(0, len(self.p)): + if self.p[i] > self.threshold(): + return i + return None + + def add_layer(self): + self.add_stop() + layer = Candidate(self.knowns) + self.layers.append(layer) + self.base = self.cache_layers() + self.knowns.pop() + self.reset_p() + + def random_sample(self): + self.randomize_inputs() + self.populate_distances() + self.compute_expected_outputs() + self.compute_base_outputs() + return self.coherence(self.base_outputs) + + def random_candidate(self): + indices = self.knowns[:] + np.copyto(self.p_temp, self.p) + self.p_temp[self.actual_N] = 0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + for _ in range(0, self.num_terms - len(self.knowns)): + index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0] + indices.append(index) + flip = index ^ 0b1 + self.p_temp[index] = 0 + self.p_temp[flip] = 0 + for i in range(0, len(self.p_temp)): + if i not in indices: + indices.append(i) + stop_id = index_hash(indices) + indices.pop() + if stop_id in self.stops: + self.p_temp[i] = 0.0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + return Candidate(indices) + + def seed_candidate_pool(self): + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in self.candidate_ids: + continue + self.candidate_pool.append(candidate) + self.candidate_ids.add(candidate_id) + + def add_stop(self): + stop_id = index_hash(self.knowns) + self.stops.add(stop_id) + + def get_distribution(self, candidate, half = 1): + count = 0 + for i in range(0, len(self.inputs)): + value = candidate.evaluate(self.inputs[i]) + if value == half: + self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i] + count += 1 + else: + self.output_xor[i] = -1 + return (count, self.mat_coherence()) + + def update(self): + self.epoch += 1 + base_coherence = self.random_sample() + candidate = Candidate(self.knowns[:]) + + index = -1 + subspace_index = -1 + bar = 1.0 - (self.epoch / 1000.0) + for i in range(0, self.actual_N): + if i in self.knowns: + continue + candidate.indices.append(i) + (count_0, subspace_coherence_0) = self.get_distribution(candidate, 0) + # (_, subspace_coherence) = self.get_distribution(candidate, 0) + # subspace_coherence = subspace_coherence_0 * count_0 / (count_0 + count_1) + subspace_coherence_1 * count_1 / (count_0 + count_1) + # subspace_coherence = subspace_coherence_0 + # delta = (subspace_coherence_0 - base_coherence) * count_0 / (count_0 + count_1) + (subspace_coherence_1 - base_coherence) * count_1 / (count_0 + count_1) + delta = (subspace_coherence_0 - base_coherence) * count_0 / len(self.inputs) + self.subspace_uplifts[i] += delta + if self.subspace_uplifts[i] > bar: + if subspace_index < 0 or self.subspace_uplifts[i] > self.subspace_uplifts[subspace_index]: + subspace_index = i + + if index_hash(candidate.indices) not in self.stops: + for j in range(0, len(self.inputs)): + self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j]) + coherence = self.coherence() + delta = coherence - base_coherence + # self.uplifts[i] = 0.9 * self.uplifts[i] + 0.1 * coherence + self.uplifts[i] += delta + if self.uplifts[i] > bar: + if index < 0 or self.uplifts[i] > self.uplifts[index]: + index = i + candidate.indices.pop() + + # print('=====' + str(base_coherence)) + # print(self.uplifts) + # print(self.subspace_uplifts) + + if index >= 0: + self.knowns.append(index) + print(base_coherence) + print(self.knowns, bar) + # print(self.uplifts) + # print(self.subspace_uplifts) + self.add_layer() + self.knowns = [] + self.uplifts.fill(0) + self.subspace_uplifts.fill(0) + self.epoch = 0 + return + + if subspace_index >= 0: + self.knowns.append(subspace_index) + print(self.knowns, bar) + # print(self.uplifts) + # print(self.subspace_uplifts) + self.uplifts.fill(0) + self.subspace_uplifts.fill(0) + self.epoch = 0 + return + + # print('======') + # print(self.epoch, base_coherence) + # print('======') + + # if len(self.candidate_pool) == 0: + # print(self.p) + + # for i in range(0, min(5, len(self.candidate_pool))): + # candidate = self.candidate_pool[i] + # print(candidate.id(), candidate.uplift) + + # if self.epoch < 15: + # return + + if self.candidate_pool[0].uplift > 0.3: + candidate = self.candidate_pool[0] + candidate_id = candidate.id() + self.candidate_ids.remove(candidate_id) + print(candidate_id) + self.knowns = candidate.indices + self.add_layer() + self.knowns = [] + self.reset_p() + self.epoch = 0 + self.candidate_pool = [] + self.candidate_ids = set() + elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200: + self.epoch = 0 + self.num_terms += 1 + self.candidate_pool = [] + self.candidate_ids = set() + self.knowns = [] + self.stops = set() + self.reset_p() + return + + # np.copyto(self.next_p, self.p) + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in visited: + continue + visited.add(candidate_id) + if self.actual_N in candidate.indices: + continue + has_candidate = True + for i in range(0, len(self.inputs)): + self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i]) + # coherence = self.ring_coherence() + coherence = self.coherence() + # if coherence <= base_coherence: + # continue + # for index in candidate.indices: + # self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0) + # self.p_temp[index] += 0 + for index in candidate.indices: + if coherence > self.max_coherences[index]: + self.max_coherences[index] = coherence + self.max_candidates[index] = candidate + # self.max_coherences[index] = max(self.max_coherences[index], coherence) + # np.copyto(self.p, self.next_p) + + # np.copyto(self.p_temp, self.p) + for i in range(0, self.actual_N): + candidate = self.max_candidates[i] + if candidate is None: + continue + for index in candidate.indices: + self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0) + # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id()) + self.normalize_p() + # print(self.p) + + # np.subtract(self.p_temp, self.p, self.p_temp) + # np.abs(self.p_temp, self.p_temp) + # delta = np.sum(self.p_temp) / len(self.p_temp) + # print(delta, np.argmax(self.p)) + # np.copyto(self.p_temp, self.p) + # for i in range(0, len(self.p_temp)): + # self.p_temp[i] = round(self.p_temp[i] * 100) / 100 + # print(self.p_temp) + + index = np.argmax(self.p) + delta_over_null = self.p[index] - self.p[self.actual_N] + if self.epoch == 0: + self.average_delta_over_null = delta_over_null + else: + self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null + diff = self.num_terms - len(self.knowns) + + print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p)) + + # Always iterate for a minimum number of epochs + if self.epoch < 15: + return + if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300: + return + if self.average_delta_over_null < 0.001: + index = self.actual_N + else: + index = np.argmax(self.p) + + # index = np.argmax(self.p) + # if index == self.last_value: + # self.rounds += 1 + # else: + # self.rounds = 0 + # self.last_value = index + + # if self.rounds < 10 and self.epoch < 100: + # return + + # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50): + # return + + # index = np.argmax(self.p) + + # print(self.p) + # print(self.threshold()) + # print(self.p) + # index = self.get_converged_index() + if not index is None or not has_candidate: + # print(index, delta, np.argmax(self.p)) + self.epoch = 0 + if index == self.actual_N or not has_candidate: + if len(self.knowns) > 0: + self.add_stop() + self.knowns.pop() + print('Backtrack: ' + str(self.knowns)) + self.reset_p() + return + self.num_terms += 1 + self.knowns = [] + self.stops = set() + self.reset_p() + print(self.num_terms) + return + self.knowns.append(index) + # bisect.insort(self.knowns, index) + if len(self.knowns) == self.num_terms: + print('Add layer: ' + str(self.knowns)) + self.add_layer() + else: + print('Found term: ' + str(self.knowns)) + self.reset_p() + print(base_coherence) + return + + def cache_layers(self): + expr = 'def f(x):\n\tresult=0\n' + for layer in self.layers: + expr += '\tresult^=' + layer.eval_str() + '\n' + expr += '\treturn result\n' + scope = {} + exec(expr, scope) + return scope['f'] + +def main(): + probabilities = Probabilities() + # probabilities.knowns = [14] + # probabilities.add_layer() + # probabilities.knowns = [8] + # probabilities.add_layer() + # probabilities.knowns = [4] + # probabilities.add_layer() + while probabilities.num_terms <= probabilities.N: + probabilities.update() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations15.py b/mutations15.py new file mode 100644 index 0000000..b6ac03e --- /dev/null +++ b/mutations15.py @@ -0,0 +1,628 @@ +import bisect +from email.mime import base +import hashlib +import math +import numpy as np +import random +import statistics + +from pkg_resources import get_distribution + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def index_hash(indices): + return ','.join([str(index) for index in sorted(indices)]) + +def bin_div(a, b): + if a == 0 and b == 0: + return 2 + if a == 1 and b == 0: + return -1 + if a == 0 and b == 1: + return 0 + return 1 + +class Candidate(): + def __init__(self, indices): + self.indices = indices[:] + self.uplift = 0 + + def evaluate(self, x): + if len(x) in self.indices: + return 0 + value = 1 + for index in self.indices: + value *= x[index] + return value + + def id(self): + return index_hash(self.indices) + + def eval_str(self): + parts = [] + for index in self.indices: + parts.append('x[' + str(index) + ']') + return '*'.join(parts) + +class Probabilities(): + def __init__(self): + self.N = 8 + self.actual_N = self.N * 2 + self.num_terms = 1 + self.num_candidates = 100 + self.sample_size = 64 + self.p = np.zeros((self.actual_N + 1,)) + self.p_temp = np.empty_like(self.p) + self.next_p = np.empty_like(self.p) + self.knowns = [] + self.stops = set() + self.reset_p() + self.epoch = 0 + + self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32) + self.masked_distances = np.zeros((self.sample_size, self.sample_size)) + self.distances = np.zeros((self.sample_size, self.sample_size)) + self.xor_square = np.zeros((self.sample_size, self.sample_size)) + self.base_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.outputs = np.zeros((self.sample_size)).astype(np.int32) + self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.output_xor = np.zeros((self.sample_size)).astype(np.int32) + self.mask = np.zeros((self.sample_size)) + self.numerators = np.zeros((self.sample_size)) + self.denominators = np.zeros((self.sample_size)) + self.coherences = np.zeros((self.sample_size)) + self.max_coherences = np.zeros((self.actual_N + 1)) + self.max_candidates = [None for _ in range(0, self.actual_N)] + self.uplifts = np.zeros((self.actual_N)) + self.uplift_means = np.zeros((self.actual_N)) + self.uplift_medians = np.zeros((self.actual_N)) + self.uplift_convergences = np.zeros((self.actual_N)) + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.subspace_uplifts = np.zeros((self.actual_N)) + self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)] + self.uplift_stddevs = np.zeros((self.actual_N)) + + self.layers = [] + self.base = None + + self.scratch = np.zeros((self.actual_N,)) + + self.last_value = -1 + self.rounds = 0 + self.average_delta_over_null = 0 + self.visited = set() + + self.candidate_pool = [] + self.candidate_ids = set() + self.has_added_layer = False + + def randomize_inputs(self): + for i in range(0, self.sample_size): + for j in range(0, self.N): + val = random.randint(0, 1) + self.inputs[i][j * 2] = val + self.inputs[i][j * 2 + 1] = val ^ 1 + + def populate_distances(self): + for i in range(0, len(self.inputs)): + x_a = self.inputs[i] + for j in range(0, len(self.inputs)): + if i == j: + continue + x_b = self.inputs[j] + distance = hamming_distance(x_a, x_b, self.scratch) + self.distances[i][j] = 1.0 / (2 ** distance) + + def compute_expected_outputs(self): + for i in range(0, len(self.inputs)): + self.expected_outputs[i] = sha(self.inputs[i]) + + def compute_base_outputs(self): + if self.base is None: + self.base_outputs.fill(0) + return + for i in range(0, len(self.inputs)): + self.base_outputs[i] = self.base(self.inputs[i]) + + def mat_coherence(self): + np.abs(self.output_xor, self.mask) + np.subtract(self.output_xor, self.mask, self.mask) + np.divide(self.mask, 2.0, self.mask) + np.add(1.0, self.mask, self.mask) + self.xor_square.fill(0) + np.copyto(self.masked_distances, self.distances) + masked_distances_t = self.masked_distances.transpose() + for i in range(0, len(self.xor_square)): + self.xor_square[i] = self.output_xor + np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i]) + np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i]) + np.sum(self.masked_distances, axis=0, out=self.denominators) + self.xor_square = self.xor_square.transpose() + np.logical_xor(self.xor_square, self.output_xor, self.xor_square) + np.multiply(self.xor_square, self.masked_distances, self.xor_square) + np.sum(self.xor_square, axis=0, out=self.numerators) + np.divide(self.numerators, self.denominators, self.coherences) + return 1.0 - np.nanmean(self.coherences) + + def coherence(self, outputs=None): + if outputs is None: + outputs = self.outputs + np.logical_xor(outputs, self.expected_outputs, self.output_xor) + return self.mat_coherence() + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + + raw_coherence = sum(coherences) / len(coherences) + check_coherence = self.mat_coherence() + + return raw_coherence + + def div_coherence(self): + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + if y_a < 0: + continue + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + if y_b < 0: + continue + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + # if y_a < 0 or y_b < 0: + # numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + if len(coherences) == 0: + return 1.0 + return sum(coherences) / len(coherences) + + def normalize_p(self): + check = self.knowns[:] + for i in range(0, len(self.p)): + if self.p[i] < 0: + self.p[i] = 0 + for i in range(0, len(self.p)): + if i in self.knowns: + flip = i ^ 0b1 + self.p[i] = 0.0 + self.p[flip] = 0.0 + else: + check.append(i) + stop_id = index_hash(check) + check.pop() + if stop_id in self.stops: + self.p[i] = 0.0 + total = np.sum(self.p) + if total > 0: + for i in range(0, len(self.p)): + self.p[i] = self.p[i] / total + + def reset_p(self): + self.p.fill(1.0) + self.normalize_p() + + def threshold(self): + # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100) + return 1.0 - (self.epoch / 1000) + + def get_converged_index(self): + for i in range(0, len(self.p)): + if self.p[i] > self.threshold(): + return i + return None + + def add_layer(self): + self.has_added_layer = True + self.add_stop() + layer = Candidate(self.knowns) + self.layers.append(layer) + self.base = self.cache_layers() + self.knowns.pop() + self.reset_p() + + def random_sample(self): + self.randomize_inputs() + self.populate_distances() + self.compute_expected_outputs() + self.compute_base_outputs() + return self.coherence(self.base_outputs) + + def random_candidate(self): + indices = self.knowns[:] + np.copyto(self.p_temp, self.p) + self.p_temp[self.actual_N] = 0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + for _ in range(0, self.num_terms - len(self.knowns)): + index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0] + indices.append(index) + flip = index ^ 0b1 + self.p_temp[index] = 0 + self.p_temp[flip] = 0 + for i in range(0, len(self.p_temp)): + if i not in indices: + indices.append(i) + stop_id = index_hash(indices) + indices.pop() + if stop_id in self.stops: + self.p_temp[i] = 0.0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + return Candidate(indices) + + def seed_candidate_pool(self): + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in self.candidate_ids: + continue + self.candidate_pool.append(candidate) + self.candidate_ids.add(candidate_id) + + def add_stop(self): + stop_id = index_hash(self.knowns) + self.stops.add(stop_id) + + def get_distribution(self, candidate, half = 1): + count = 0 + for i in range(0, len(self.inputs)): + value = candidate.evaluate(self.inputs[i]) + if value == half: + self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i] + count += 1 + else: + self.output_xor[i] = -1 + return (count, self.mat_coherence()) + + def update(self): + self.epoch += 1 + + index = -1 + subspace_index = -1 + # bar = 1.0 - (self.epoch / 10000.0) + if self.epoch >= 200: + self.uplifts.fill(0) + self.subspace_uplifts.fill(0) + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.epoch = 0 + # if len(self.knowns) > 0 and not self.has_added_layer: + # self.add_stop() + # self.knowns.pop() + self.has_added_layer = False + if len(self.knowns) == 0: + self.num_terms += 1 + self.stops = set() + else: + self.add_stop() + self.knowns.pop() + self.update() + return + + base_coherence = self.random_sample() + candidate = Candidate(self.knowns[:]) + + for i in range(0, self.actual_N): + # if i in self.knowns: + # continue + candidate.indices.append(i) + try: + if i in self.knowns: + continue + if index_hash(candidate.indices) in self.stops: + continue + + if len(candidate.indices) < self.num_terms: + (count_0, subspace_coherence_0) = self.get_distribution(candidate, 0) + delta_0 = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size + (count_1, subspace_coherence_1) = self.get_distribution(candidate, 1) + delta_1 = (subspace_coherence_1 - base_coherence) * count_1 / self.sample_size + self.uplift_samples[i].append(delta_0) + self.uplift_samples[i].append(delta_1) + mean = statistics.mean(self.uplift_samples[i]) + median = statistics.median(self.uplift_samples[i]) + self.uplift_convergences[i] = abs(self.uplift_medians[i] - median) + self.uplift_means[i] = mean + self.uplift_medians[i] = median + if self.epoch > 20 and self.uplift_convergences[i] < 1e-5 and self.uplift_medians[i] > 0: + if subspace_index < 0 or self.uplift_medians[i] > self.uplift_medians[subspace_index]: + subspace_index = i + # if self.uplift_convergences[i] < 1e-6 and self.uplift_means[i] > 0: + # if subspace_index < 0 or self.uplift_means[i] > self.uplift_means[subspace_index]: + # subspace_index = i + # self.subspace_uplifts[i] += delta + # if self.subspace_uplifts[i] > bar: + # if subspace_index < 0 or self.subspace_uplifts[i] > self.subspace_uplifts[subspace_index]: + # subspace_index = i + else: + for j in range(0, len(self.inputs)): + self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j]) + coherence = self.coherence() + delta = coherence - base_coherence + self.uplift_samples[i].append(delta) + self.uplift_ranges[i][0] = max(self.uplift_samples[i]) + self.uplift_ranges[i][1] = min(self.uplift_samples[i]) + mean = statistics.mean(self.uplift_samples[i]) + median = statistics.median(self.uplift_samples[i]) + if len(self.uplift_samples[i]) >= 2: + stddev = statistics.stdev(self.uplift_samples[i]) + self.uplift_stddevs[i] = stddev + self.uplift_convergences[i] = abs(self.uplift_medians[i] - median) + self.uplift_means[i] = mean + self.uplift_medians[i] = median + # self.uplifts[i] = 0.9 * self.uplifts[i] + 0.1 * coherence + self.uplifts[i] += delta + middle = self.uplift_ranges[i][1] + (self.uplift_ranges[i][0] - self.uplift_ranges[i][1]) / 2 + + if self.epoch > 20 and self.uplift_convergences[i] < 1e-5 and self.uplift_medians[i] > 0: + if index < 0 or self.uplift_medians[i] > self.uplift_medians[index]: + index = i + # if self.epoch > 100 and max(self.uplift_samples[i]) + min(self.uplift_samples[i]) > 0.01: + # if index < 0 or max(self.uplift_samples[i]) + min(self.uplift_samples[i]) > max(self.uplift_samples[index]) + min(self.uplift_samples[index]): + # index = i + # if self.uplift_convergences[i] < 1e-6 and self.uplift_means[i] > 0: + # if index < 0 or self.uplift_means[i] > self.uplift_means[index]: + # index = i + # if self.uplifts[i] > bar: + # if index < 0 or self.uplifts[i] > self.uplifts[index]: + # index = i + finally: + candidate.indices.pop() + + # print('=====' + str(base_coherence)) + # print(self.uplifts) + # print(self.uplift_means) + # print(self.uplift_medians) + # print(self.uplift_stddevs) + # print(self.uplift_ranges) + # print(self.uplift_convergences) + # print(self.subspace_uplifts) + + if index >= 0: + self.knowns.append(index) + print(base_coherence) + print(self.knowns, self.epoch) + # print(self.uplift_medians) + # print(self.uplifts) + # print(self.subspace_uplifts) + self.add_layer() + self.uplifts.fill(0) + self.subspace_uplifts.fill(0) + self.uplift_medians.fill(0) + self.uplift_convergences.fill(0) + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.epoch = 0 + return + + if subspace_index >= 0: + self.knowns.append(subspace_index) + print(self.knowns, self.epoch) + # print(self.uplifts) + # print(self.subspace_uplifts) + self.uplifts.fill(0) + self.subspace_uplifts.fill(0) + self.uplift_medians.fill(0) + self.uplift_convergences.fill(0) + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.epoch = 0 + return + + # print('======') + # print(self.epoch, base_coherence) + # print('======') + + # if len(self.candidate_pool) == 0: + # print(self.p) + + # for i in range(0, min(5, len(self.candidate_pool))): + # candidate = self.candidate_pool[i] + # print(candidate.id(), candidate.uplift) + + # if self.epoch < 15: + # return + + if self.candidate_pool[0].uplift > 0.3: + candidate = self.candidate_pool[0] + candidate_id = candidate.id() + self.candidate_ids.remove(candidate_id) + print(candidate_id) + self.knowns = candidate.indices + self.add_layer() + self.knowns = [] + self.reset_p() + self.epoch = 0 + self.candidate_pool = [] + self.candidate_ids = set() + elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200: + self.epoch = 0 + self.num_terms += 1 + self.candidate_pool = [] + self.candidate_ids = set() + self.knowns = [] + self.stops = set() + self.reset_p() + return + + # np.copyto(self.next_p, self.p) + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in visited: + continue + visited.add(candidate_id) + if self.actual_N in candidate.indices: + continue + has_candidate = True + for i in range(0, len(self.inputs)): + self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i]) + # coherence = self.ring_coherence() + coherence = self.coherence() + # if coherence <= base_coherence: + # continue + # for index in candidate.indices: + # self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0) + # self.p_temp[index] += 0 + for index in candidate.indices: + if coherence > self.max_coherences[index]: + self.max_coherences[index] = coherence + self.max_candidates[index] = candidate + # self.max_coherences[index] = max(self.max_coherences[index], coherence) + # np.copyto(self.p, self.next_p) + + # np.copyto(self.p_temp, self.p) + for i in range(0, self.actual_N): + candidate = self.max_candidates[i] + if candidate is None: + continue + for index in candidate.indices: + self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0) + # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id()) + self.normalize_p() + # print(self.p) + + # np.subtract(self.p_temp, self.p, self.p_temp) + # np.abs(self.p_temp, self.p_temp) + # delta = np.sum(self.p_temp) / len(self.p_temp) + # print(delta, np.argmax(self.p)) + # np.copyto(self.p_temp, self.p) + # for i in range(0, len(self.p_temp)): + # self.p_temp[i] = round(self.p_temp[i] * 100) / 100 + # print(self.p_temp) + + index = np.argmax(self.p) + delta_over_null = self.p[index] - self.p[self.actual_N] + if self.epoch == 0: + self.average_delta_over_null = delta_over_null + else: + self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null + diff = self.num_terms - len(self.knowns) + + print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p)) + + # Always iterate for a minimum number of epochs + if self.epoch < 15: + return + if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300: + return + if self.average_delta_over_null < 0.001: + index = self.actual_N + else: + index = np.argmax(self.p) + + # index = np.argmax(self.p) + # if index == self.last_value: + # self.rounds += 1 + # else: + # self.rounds = 0 + # self.last_value = index + + # if self.rounds < 10 and self.epoch < 100: + # return + + # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50): + # return + + # index = np.argmax(self.p) + + # print(self.p) + # print(self.threshold()) + # print(self.p) + # index = self.get_converged_index() + if not index is None or not has_candidate: + # print(index, delta, np.argmax(self.p)) + self.epoch = 0 + if index == self.actual_N or not has_candidate: + if len(self.knowns) > 0: + self.add_stop() + self.knowns.pop() + print('Backtrack: ' + str(self.knowns)) + self.reset_p() + return + self.num_terms += 1 + self.knowns = [] + self.stops = set() + self.reset_p() + print(self.num_terms) + return + self.knowns.append(index) + # bisect.insort(self.knowns, index) + if len(self.knowns) == self.num_terms: + print('Add layer: ' + str(self.knowns)) + self.add_layer() + else: + print('Found term: ' + str(self.knowns)) + self.reset_p() + print(base_coherence) + return + + def cache_layers(self): + expr = 'def f(x):\n\tresult=0\n' + for layer in self.layers: + expr += '\tresult^=' + layer.eval_str() + '\n' + expr += '\treturn result\n' + scope = {} + exec(expr, scope) + return scope['f'] + +def main(): + probabilities = Probabilities() + # probabilities.knowns = [14] + # probabilities.add_layer() + # probabilities.knowns = [8] + # probabilities.add_layer() + # probabilities.knowns = [4] + # probabilities.add_layer() + while probabilities.num_terms <= probabilities.N: + probabilities.update() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations16.py b/mutations16.py new file mode 100644 index 0000000..ace7f9f --- /dev/null +++ b/mutations16.py @@ -0,0 +1,663 @@ +import bisect +from cmath import isnan +from email.mime import base +import matplotlib.pyplot as plt +import hashlib +import math +import numpy as np +import random +import statistics + +from pkg_resources import get_distribution +from scipy import stats + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def index_hash(indices): + return ','.join([str(index) for index in sorted(indices)]) + +def bin_div(a, b): + if a == 0 and b == 0: + return 2 + if a == 1 and b == 0: + return -1 + if a == 0 and b == 1: + return 0 + return 1 + +class Candidate(): + def __init__(self, indices): + self.indices = indices[:] + self.uplift = 0 + + def evaluate(self, x): + if len(x) in self.indices: + return 0 + value = 1 + for index in self.indices: + value *= x[index] + return value + + def id(self): + return index_hash(self.indices) + + def eval_str(self): + parts = [] + for index in self.indices: + parts.append('x[' + str(index) + ']') + return '*'.join(parts) + +class Probabilities(): + def __init__(self): + self.N = 16 + self.actual_N = self.N * 2 + self.num_terms = 1 + self.num_candidates = 100 + self.sample_size = 64 + self.p = np.zeros((self.actual_N + 1,)) + self.p_temp = np.empty_like(self.p) + self.next_p = np.empty_like(self.p) + self.knowns = [] + self.stops = set() + self.reset_p() + self.epoch = 0 + + self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32) + self.masked_distances = np.zeros((self.sample_size, self.sample_size)) + self.distances = np.zeros((self.sample_size, self.sample_size)) + self.xor_square = np.zeros((self.sample_size, self.sample_size)) + self.base_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.outputs = np.zeros((self.sample_size)).astype(np.int32) + self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.output_xor = np.zeros((self.sample_size)).astype(np.int32) + self.mask = np.zeros((self.sample_size)) + self.numerators = np.zeros((self.sample_size)) + self.denominators = np.zeros((self.sample_size)) + self.coherences = np.zeros((self.sample_size)) + self.max_coherences = np.zeros((self.actual_N + 1)) + self.max_candidates = [None for _ in range(0, self.actual_N)] + self.uplifts = np.zeros((self.actual_N)) + self.uplift_means = np.zeros((self.actual_N)) + self.uplift_medians = np.zeros((self.actual_N)) + self.uplift_convergences = np.zeros((self.actual_N)) + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)] + self.superspace_uplift_samples = [] + self.subspace_uplifts = np.zeros((self.actual_N)) + self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)] + self.uplift_stddevs = np.zeros((self.actual_N)) + + self.layers = [] + self.layer_confidence = {} + self.base = None + + self.scratch = np.zeros((self.actual_N,)) + + self.last_value = -1 + self.rounds = 0 + self.average_delta_over_null = 0 + self.visited = set() + + self.candidate_pool = [] + self.candidate_ids = set() + self.has_added_layer = False + + def randomize_inputs(self): + for i in range(0, self.sample_size): + for j in range(0, self.N): + val = random.randint(0, 1) + self.inputs[i][j * 2] = val + self.inputs[i][j * 2 + 1] = val ^ 1 + + def populate_distances(self): + for i in range(0, len(self.inputs)): + x_a = self.inputs[i] + for j in range(0, len(self.inputs)): + if i == j: + continue + x_b = self.inputs[j] + distance = hamming_distance(x_a, x_b, self.scratch) + self.distances[i][j] = 1.0 / (2 ** distance) + + def compute_expected_outputs(self): + for i in range(0, len(self.inputs)): + self.expected_outputs[i] = sha(self.inputs[i]) + + def compute_base_outputs(self): + if self.base is None: + self.base_outputs.fill(0) + return + for i in range(0, len(self.inputs)): + self.base_outputs[i] = self.base(self.inputs[i]) + + def mat_coherence(self): + np.abs(self.output_xor, self.mask) + np.subtract(self.output_xor, self.mask, self.mask) + np.divide(self.mask, 2.0, self.mask) + np.add(1.0, self.mask, self.mask) + self.xor_square.fill(0) + np.copyto(self.masked_distances, self.distances) + masked_distances_t = self.masked_distances.transpose() + for i in range(0, len(self.xor_square)): + self.xor_square[i] = self.output_xor + np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i]) + np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i]) + np.sum(self.masked_distances, axis=0, out=self.denominators) + self.xor_square = self.xor_square.transpose() + np.logical_xor(self.xor_square, self.output_xor, self.xor_square) + np.multiply(self.xor_square, self.masked_distances, self.xor_square) + np.sum(self.xor_square, axis=0, out=self.numerators) + np.divide(self.numerators, self.denominators, self.coherences) + mean = np.nanmean(self.coherences) + if isnan(mean): + mean = 1.0 + return 1.0 - mean + + def coherence(self, outputs=None): + if outputs is None: + outputs = self.outputs + np.logical_xor(outputs, self.expected_outputs, self.output_xor) + return self.mat_coherence() + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + + raw_coherence = sum(coherences) / len(coherences) + check_coherence = self.mat_coherence() + + return raw_coherence + + def div_coherence(self): + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + if y_a < 0: + continue + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + if y_b < 0: + continue + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + # if y_a < 0 or y_b < 0: + # numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + if len(coherences) == 0: + return 1.0 + return sum(coherences) / len(coherences) + + def normalize_p(self): + check = self.knowns[:] + for i in range(0, len(self.p)): + if self.p[i] < 0: + self.p[i] = 0 + for i in range(0, len(self.p)): + if i in self.knowns: + flip = i ^ 0b1 + self.p[i] = 0.0 + self.p[flip] = 0.0 + else: + check.append(i) + stop_id = index_hash(check) + check.pop() + if stop_id in self.stops: + self.p[i] = 0.0 + total = np.sum(self.p) + if total > 0: + for i in range(0, len(self.p)): + self.p[i] = self.p[i] / total + + def reset_p(self): + self.p.fill(1.0) + self.normalize_p() + + def threshold(self): + # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100) + return 1.0 - (self.epoch / 1000) + + def get_converged_index(self): + for i in range(0, len(self.p)): + if self.p[i] > self.threshold(): + return i + return None + + def add_layer(self): + self.has_added_layer = True + self.add_stop() + layer = Candidate(self.knowns) + self.layers.append(layer) + self.base = self.cache_layers() + self.knowns.pop() + self.reset_p() + + def random_sample(self): + self.randomize_inputs() + self.populate_distances() + self.compute_expected_outputs() + self.compute_base_outputs() + return self.coherence(self.base_outputs) + + def random_candidate(self): + indices = self.knowns[:] + np.copyto(self.p_temp, self.p) + self.p_temp[self.actual_N] = 0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + for _ in range(0, self.num_terms - len(self.knowns)): + index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0] + indices.append(index) + flip = index ^ 0b1 + self.p_temp[index] = 0 + self.p_temp[flip] = 0 + for i in range(0, len(self.p_temp)): + if i not in indices: + indices.append(i) + stop_id = index_hash(indices) + indices.pop() + if stop_id in self.stops: + self.p_temp[i] = 0.0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + return Candidate(indices) + + def seed_candidate_pool(self): + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in self.candidate_ids: + continue + self.candidate_pool.append(candidate) + self.candidate_ids.add(candidate_id) + + def add_stop(self): + stop_id = index_hash(self.knowns) + self.stops.add(stop_id) + + def get_distribution(self, candidate, half = 1): + count = 0 + for i in range(0, len(self.inputs)): + value = candidate.evaluate(self.inputs[i]) + if value == half: + self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i] + count += 1 + else: + self.output_xor[i] = -1 + return (count, self.mat_coherence()) + + def update(self): + self.epoch += 1 + + base_coherence = self.random_sample() + candidate = Candidate(self.knowns[:]) + + if len(candidate.indices) > 0: + index = candidate.indices.pop() + try: + count_0, superspace_coherence_0 = self.get_distribution(candidate, 0) + count_1, superspace_coherence_1 = self.get_distribution(candidate, 1) + # delta = (superspace_coherence - base_coherence) * count / self.sample_size + delta = superspace_coherence_0 - superspace_coherence_1 + self.superspace_uplift_samples.append(delta) + finally: + candidate.indices.append(index) + + for i in range(0, self.actual_N): + candidate.indices.append(i) + try: + if i in self.knowns: + continue + + count_0, subspace_coherence_0 = self.get_distribution(candidate, 0) + # count_1, subspace_coherence_1 = self.get_distribution(candidate, 1) + delta = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size + # delta = subspace_coherence_0 - subspace_coherence_1 + self.subspace_uplift_samples[i].append(delta) + + # if index_hash(candidate.indices) in self.stops: + # continue + + for j in range(0, len(self.inputs)): + self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j]) + + coherence = self.coherence() + delta = coherence - base_coherence + self.uplift_samples[i].append(delta) + finally: + candidate.indices.pop() + + if self.epoch >= 100: + # for i in range(0, self.actual_N): + # parameters = stats.norm.fit(self.uplift_samples[i]) + # print(i, parameters) + # print(i, stats.kstest(self.uplift_samples[i], "norm", parameters)) + + # fig, axs = plt.subplots(4, 4) + # for i in range(0, 4): + # for j in range(0, 4): + # n, bins, patches = axs[i][j].hist(self.uplift_samples[i * 4 + j], 50, density=True, facecolor='g', alpha=0.75) + # plt.show() + + try: + index = -1 + best_mu = -1 + confidence = -1 + for i in range(0, self.actual_N): + if len(self.uplift_samples[i]) == 0: + continue + parameters = stats.norm.fit(self.uplift_samples[i]) + (mu, _) = parameters + # median = statistics.median(self.uplift_samples[i]) + if mu > 0: + result = stats.kstest(self.uplift_samples[i], stats.norm.cdf, parameters) + layer_id = index_hash(self.knowns + [i]) + if layer_id in self.layer_confidence: + layer_confidence = self.layer_confidence[layer_id] + if layer_confidence >= result.pvalue: + continue + if index < 0 or mu > best_mu: + best_mu = mu + index = i + confidence = result.pvalue + if index >= 0: + self.knowns.append(index) + self.layer_confidence[index_hash(self.knowns)] = confidence + # num_terms = len(self.knowns) + print(self.knowns, best_mu, confidence) + print(base_coherence) + self.add_layer() + # if num_terms > self.num_terms: + # self.stops = set() + # self.num_terms = num_terms + self.knowns = [] + return + + index = -1 + best_mu = -1 + superspace_median = statistics.median(self.superspace_uplift_samples) if len(self.superspace_uplift_samples) > 0 else -1 + for i in range(0, self.actual_N): + if len(self.subspace_uplift_samples[i]) == 0: + continue + # median = statistics.median(self.subspace_uplift_samples[i]) + parameters = stats.norm.fit(self.subspace_uplift_samples[i]) + (mu, _) = parameters + if mu > 0: + result = stats.kstest(self.subspace_uplift_samples[i], stats.norm.cdf, parameters) + # print(i, mu, result.pvalue) + if result.pvalue > 0.95: + if index < 0 or mu > best_mu: + # if median > best_median: + best_mu = mu + index = i + + if index >= 0: + self.knowns.append(index) + print(self.knowns, best_mu) + return + + if len(self.knowns) > 0: + # self.add_stop() + self.knowns = [] + finally: + self.epoch = 0 + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)] + self.superspace_uplift_samples = [] + return + + # print('=====' + str(base_coherence)) + # print(self.uplifts) + # print(self.uplift_means) + # print(self.uplift_medians) + # print(self.uplift_stddevs) + # print(self.uplift_ranges) + # print(self.uplift_convergences) + # print(self.subspace_uplifts) + + if index >= 0: + self.knowns.append(index) + print(base_coherence) + print(self.knowns, self.epoch) + # print(self.uplift_medians) + # print(self.uplifts) + # print(self.subspace_uplifts) + self.add_layer() + self.uplifts.fill(0) + self.subspace_uplifts.fill(0) + self.uplift_medians.fill(0) + self.uplift_convergences.fill(0) + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.epoch = 0 + return + + if subspace_index >= 0: + self.knowns.append(subspace_index) + print(self.knowns, self.epoch) + # print(self.uplifts) + # print(self.subspace_uplifts) + self.uplifts.fill(0) + self.subspace_uplifts.fill(0) + self.uplift_medians.fill(0) + self.uplift_convergences.fill(0) + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.epoch = 0 + return + + # print('======') + # print(self.epoch, base_coherence) + # print('======') + + # if len(self.candidate_pool) == 0: + # print(self.p) + + # for i in range(0, min(5, len(self.candidate_pool))): + # candidate = self.candidate_pool[i] + # print(candidate.id(), candidate.uplift) + + # if self.epoch < 15: + # return + + if self.candidate_pool[0].uplift > 0.3: + candidate = self.candidate_pool[0] + candidate_id = candidate.id() + self.candidate_ids.remove(candidate_id) + print(candidate_id) + self.knowns = candidate.indices + self.add_layer() + self.knowns = [] + self.reset_p() + self.epoch = 0 + self.candidate_pool = [] + self.candidate_ids = set() + elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200: + self.epoch = 0 + self.num_terms += 1 + self.candidate_pool = [] + self.candidate_ids = set() + self.knowns = [] + self.stops = set() + self.reset_p() + return + + # np.copyto(self.next_p, self.p) + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in visited: + continue + visited.add(candidate_id) + if self.actual_N in candidate.indices: + continue + has_candidate = True + for i in range(0, len(self.inputs)): + self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i]) + # coherence = self.ring_coherence() + coherence = self.coherence() + # if coherence <= base_coherence: + # continue + # for index in candidate.indices: + # self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0) + # self.p_temp[index] += 0 + for index in candidate.indices: + if coherence > self.max_coherences[index]: + self.max_coherences[index] = coherence + self.max_candidates[index] = candidate + # self.max_coherences[index] = max(self.max_coherences[index], coherence) + # np.copyto(self.p, self.next_p) + + # np.copyto(self.p_temp, self.p) + for i in range(0, self.actual_N): + candidate = self.max_candidates[i] + if candidate is None: + continue + for index in candidate.indices: + self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0) + # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id()) + self.normalize_p() + # print(self.p) + + # np.subtract(self.p_temp, self.p, self.p_temp) + # np.abs(self.p_temp, self.p_temp) + # delta = np.sum(self.p_temp) / len(self.p_temp) + # print(delta, np.argmax(self.p)) + # np.copyto(self.p_temp, self.p) + # for i in range(0, len(self.p_temp)): + # self.p_temp[i] = round(self.p_temp[i] * 100) / 100 + # print(self.p_temp) + + index = np.argmax(self.p) + delta_over_null = self.p[index] - self.p[self.actual_N] + if self.epoch == 0: + self.average_delta_over_null = delta_over_null + else: + self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null + diff = self.num_terms - len(self.knowns) + + print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p)) + + # Always iterate for a minimum number of epochs + if self.epoch < 15: + return + if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300: + return + if self.average_delta_over_null < 0.001: + index = self.actual_N + else: + index = np.argmax(self.p) + + # index = np.argmax(self.p) + # if index == self.last_value: + # self.rounds += 1 + # else: + # self.rounds = 0 + # self.last_value = index + + # if self.rounds < 10 and self.epoch < 100: + # return + + # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50): + # return + + # index = np.argmax(self.p) + + # print(self.p) + # print(self.threshold()) + # print(self.p) + # index = self.get_converged_index() + if not index is None or not has_candidate: + # print(index, delta, np.argmax(self.p)) + self.epoch = 0 + if index == self.actual_N or not has_candidate: + if len(self.knowns) > 0: + self.add_stop() + self.knowns.pop() + print('Backtrack: ' + str(self.knowns)) + self.reset_p() + return + self.num_terms += 1 + self.knowns = [] + self.stops = set() + self.reset_p() + print(self.num_terms) + return + self.knowns.append(index) + # bisect.insort(self.knowns, index) + if len(self.knowns) == self.num_terms: + print('Add layer: ' + str(self.knowns)) + self.add_layer() + else: + print('Found term: ' + str(self.knowns)) + self.reset_p() + print(base_coherence) + return + + def cache_layers(self): + expr = 'def f(x):\n\tresult=0\n' + for layer in self.layers: + expr += '\tresult^=' + layer.eval_str() + '\n' + expr += '\treturn result\n' + scope = {} + exec(expr, scope) + return scope['f'] + +def main(): + probabilities = Probabilities() + # probabilities.knowns = [14] + # probabilities.add_layer() + # probabilities.knowns = [8] + # probabilities.add_layer() + # probabilities.knowns = [4] + # probabilities.add_layer() + while probabilities.num_terms <= probabilities.N: + probabilities.update() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations17.py b/mutations17.py new file mode 100644 index 0000000..aa9a21b --- /dev/null +++ b/mutations17.py @@ -0,0 +1,669 @@ +import bisect +from cmath import isnan +from email.mime import base +import matplotlib.pyplot as plt +import hashlib +import math +import numpy as np +import random +import statistics + +from pkg_resources import get_distribution +from scipy import stats + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def xor(v): + total = np.sum(v) + value = total % 2 + return np.sum(v) % 2 + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def index_hash(indices): + return ','.join([str(index) for index in sorted(indices)]) + +def bin_div(a, b): + if a == 0 and b == 0: + return 2 + if a == 1 and b == 0: + return -1 + if a == 0 and b == 1: + return 0 + return 1 + +class Candidate(): + def __init__(self, indices): + self.indices = indices[:] + self.uplift = 0 + + def evaluate(self, x): + if len(x) in self.indices: + return 0 + value = 1 + for index in self.indices: + value *= x[index] + return value + + def id(self): + return index_hash(self.indices) + + def eval_str(self): + parts = [] + for index in self.indices: + parts.append('x[' + str(index) + ']') + return '*'.join(parts) + +class Probabilities(): + def __init__(self): + self.N = 8 + self.actual_N = self.N * 2 + self.num_terms = 1 + self.num_candidates = 100 + # self.sample_size = self.N ** 2 + self.sample_size = 64 + self.p = np.zeros((self.actual_N + 1,)) + self.p_temp = np.empty_like(self.p) + self.next_p = np.empty_like(self.p) + self.knowns = [] + self.stops = set() + self.reset_p() + self.epoch = 0 + + self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32) + self.raw_inputs = np.zeros((self.sample_size, self.N)).astype(np.int32) + self.masked_distances = np.zeros((self.sample_size, self.sample_size)) + self.distances = np.zeros((self.sample_size, self.sample_size)) + self.xor_square = np.zeros((self.sample_size, self.sample_size)) + self.base_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.outputs = np.zeros((self.sample_size)).astype(np.int32) + self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.output_xor = np.zeros((self.sample_size)).astype(np.int32) + self.mask = np.zeros((self.sample_size)) + self.numerators = np.zeros((self.sample_size)) + self.denominators = np.zeros((self.sample_size)) + self.coherences = np.zeros((self.sample_size)) + self.max_coherences = np.zeros((self.actual_N + 1)) + self.max_candidates = [None for _ in range(0, self.actual_N)] + self.uplifts = np.zeros((self.actual_N)) + self.uplift_means = np.zeros((self.actual_N)) + self.uplift_medians = np.zeros((self.actual_N)) + self.uplift_convergences = np.zeros((self.actual_N)) + # self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)] + self.superspace_uplift_samples = [] + self.subspace_uplifts = np.zeros((self.actual_N)) + self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)] + self.uplift_stddevs = np.zeros((self.actual_N)) + + self.samples = 1000 + # self.samples = 200 + self.base_coherence_samples = np.zeros((self.samples)) + self.coherence_samples = np.zeros((self.actual_N, self.samples)) + self.subspace_uplift_left_samples = np.zeros((self.actual_N, self.samples)) + self.subspace_uplift_right_samples = np.zeros((self.actual_N, self.samples)) + + self.layers = [] + self.layer_confidence = {} + self.base = None + + self.scratch = np.zeros((self.N,)) + + self.last_value = -1 + self.rounds = 0 + self.average_delta_over_null = 0 + self.visited = set() + + self.candidate_pool = [] + self.candidate_ids = set() + self.has_added_layer = False + + def randomize_inputs(self): + for i in range(0, self.sample_size): + for j in range(0, self.N): + val = random.randint(0, 1) + self.raw_inputs[i][j] = val + self.inputs[i][j * 2] = val + self.inputs[i][j * 2 + 1] = val ^ 1 + + def populate_distances(self): + for i in range(0, len(self.raw_inputs)): + x_a = self.raw_inputs[i] + for j in range(0, len(self.raw_inputs)): + if i == j: + continue + x_b = self.raw_inputs[j] + distance = hamming_distance(x_a, x_b, self.scratch) + self.distances[i][j] = 1.0 / (2 ** (distance - 1)) if distance > 0 else 0 + # self.distances[i][j] = 1.0 / (distance ** 2) if distance > 0 else 0 + + def compute_expected_outputs(self): + for i in range(0, len(self.raw_inputs)): + self.expected_outputs[i] = xor(self.raw_inputs[i]) + + def compute_base_outputs(self): + if self.base is None: + self.base_outputs.fill(0) + return + for i in range(0, len(self.inputs)): + self.base_outputs[i] = self.base(self.inputs[i]) + + def mat_coherence(self): + np.abs(self.output_xor, self.mask) + np.subtract(self.output_xor, self.mask, self.mask) + np.divide(self.mask, 2.0, self.mask) + np.add(1.0, self.mask, self.mask) + self.xor_square.fill(0) + np.copyto(self.masked_distances, self.distances) + masked_distances_t = self.masked_distances.transpose() + for i in range(0, len(self.xor_square)): + self.xor_square[i] = self.output_xor + np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i]) + np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i]) + np.sum(self.masked_distances, axis=0, out=self.denominators) + self.xor_square = self.xor_square.transpose() + np.logical_xor(self.xor_square, self.output_xor, self.xor_square) + np.multiply(self.xor_square, self.masked_distances, self.xor_square) + np.sum(self.xor_square, axis=0, out=self.numerators) + np.divide(self.numerators, self.denominators, self.coherences) + mean = np.nanmean(self.coherences) + if isnan(mean): + mean = 1.0 + return 1.0 - mean + + def coherence(self, outputs=None): + if outputs is None: + outputs = self.outputs + np.logical_xor(outputs, self.expected_outputs, self.output_xor) + return self.mat_coherence() + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + + raw_coherence = sum(coherences) / len(coherences) + check_coherence = self.mat_coherence() + + return raw_coherence + + def div_coherence(self): + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + if y_a < 0: + continue + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + if y_b < 0: + continue + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + # if y_a < 0 or y_b < 0: + # numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + if len(coherences) == 0: + return 1.0 + return sum(coherences) / len(coherences) + + def normalize_p(self): + check = self.knowns[:] + for i in range(0, len(self.p)): + if self.p[i] < 0: + self.p[i] = 0 + for i in range(0, len(self.p)): + if i in self.knowns: + flip = i ^ 0b1 + self.p[i] = 0.0 + self.p[flip] = 0.0 + else: + check.append(i) + stop_id = index_hash(check) + check.pop() + if stop_id in self.stops: + self.p[i] = 0.0 + total = np.sum(self.p) + if total > 0: + for i in range(0, len(self.p)): + self.p[i] = self.p[i] / total + + def reset_p(self): + self.p.fill(1.0) + self.normalize_p() + + def threshold(self): + # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100) + return 1.0 - (self.epoch / 1000) + + def get_converged_index(self): + for i in range(0, len(self.p)): + if self.p[i] > self.threshold(): + return i + return None + + def add_layer(self): + self.has_added_layer = True + self.add_stop() + layer = Candidate(self.knowns) + self.layers.append(layer) + self.base = self.cache_layers() + self.knowns.pop() + self.reset_p() + + def random_sample(self): + self.randomize_inputs() + self.populate_distances() + self.compute_expected_outputs() + self.compute_base_outputs() + return self.coherence(self.base_outputs) + + def random_candidate(self): + indices = self.knowns[:] + np.copyto(self.p_temp, self.p) + self.p_temp[self.actual_N] = 0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + for _ in range(0, self.num_terms - len(self.knowns)): + index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0] + indices.append(index) + flip = index ^ 0b1 + self.p_temp[index] = 0 + self.p_temp[flip] = 0 + for i in range(0, len(self.p_temp)): + if i not in indices: + indices.append(i) + stop_id = index_hash(indices) + indices.pop() + if stop_id in self.stops: + self.p_temp[i] = 0.0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + return Candidate(indices) + + def seed_candidate_pool(self): + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in self.candidate_ids: + continue + self.candidate_pool.append(candidate) + self.candidate_ids.add(candidate_id) + + def add_stop(self): + stop_id = index_hash(self.knowns) + self.stops.add(stop_id) + + def get_distribution(self, candidate, half = 1): + count = 0 + for i in range(0, len(self.inputs)): + value = candidate.evaluate(self.inputs[i]) + if value == half: + self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i] + count += 1 + else: + self.output_xor[i] = -1 + return (count, self.mat_coherence()) + + def update(self): + sample = self.epoch + self.epoch += 1 + + base_coherence = self.random_sample() + self.base_coherence_samples[sample] = base_coherence - 0.5 + candidate = Candidate(self.knowns[:]) + + for i in range(0, self.actual_N): + candidate.indices.append(i) + try: + count_0, subspace_coherence_0 = self.get_distribution(candidate, 0) + count_1, subspace_coherence_1 = self.get_distribution(candidate, 1) + # delta = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size + # delta = subspace_coherence_0 - subspace_coherence_1 + self.subspace_uplift_left_samples[i][sample] = subspace_coherence_0 - 0.5 + self.subspace_uplift_right_samples[i][sample] = subspace_coherence_1 - 0.5 + + # if index_hash(candidate.indices) in self.stops: + # continue + + for j in range(0, len(self.inputs)): + self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j]) + + coherence = self.coherence() + self.coherence_samples[i][sample] = coherence - 0.5 + finally: + candidate.indices.pop() + + if self.epoch >= self.samples: + # for i in range(0, self.actual_N): + # parameters = stats.norm.fit(self.uplift_samples[i]) + # print(i, parameters) + # print(i, stats.kstest(self.uplift_samples[i], "norm", parameters)) + + added = False + # parameters = stats.norm.fit(self.base_coherence_samples) + # (base_mu, _) = parameters + + try: + index = -1 + lowest_pvalue = -1 + is_subspace = False + for i in range(0, self.actual_N): + if i in self.knowns: + continue + result = stats.kstest(self.base_coherence_samples, self.coherence_samples[i], alternative='greater') + print(i, result) + # value = result.pvalue * (1 - result.statistic) + if index < 0 or result.pvalue < lowest_pvalue: + # if index < 0 or value < lowest_pvalue: + index = i + lowest_pvalue = result.pvalue + + for i in range(0, self.actual_N): + if i in self.knowns: + continue + result = stats.kstest(self.base_coherence_samples, self.subspace_uplift_left_samples[i], alternative='greater') + # result = stats.kstest(self.subspace_uplift_left_samples[i], self.subspace_uplift_right_samples[i], alternative='greater') + print(i, result) + # value = result.pvalue * (1 - result.statistic) + if index < 0 or result.pvalue < lowest_pvalue: + # if index < 0 or value < lowest_pvalue: + index = i + lowest_pvalue = result.pvalue + is_subspace = True + + # if result.pvalue > 0.95: + # index = i + # parameters = stats.norm.fit(self.subspace_uplift_samples[i]) + # (mu, _) = parameters + # if mu > base_mu: + # if index < 0 or mu > highest_mu: + # index = i + # highest_mu = mu + + if index >= 0: + if is_subspace: + # print('subspace') + self.knowns.append(index) + print(self.knowns, lowest_pvalue) + else: + # print('flat') + self.knowns.append(index) + # self.layer_confidence[index_hash(self.knowns)] = confidence + # num_terms = len(self.knowns) + print(self.knowns, lowest_pvalue) + print(base_coherence) + self.add_layer() + # if num_terms > self.num_terms: + # self.stops = set() + # self.num_terms = num_terms + self.knowns = [] + return + + # if len(self.knowns) > 0: + # # self.add_stop() + # self.knowns = [] + finally: + fig, axs = plt.subplots(4, 4) + for i in range(0, 4): + for j in range(0, 4): + axs[i][j].hist(self.base_coherence_samples, 50, density=True, facecolor='r', alpha=0.5) + n, bins, patches = axs[i][j].hist(self.coherence_samples[i * 4 + j], 50, density=True, facecolor='g', alpha=0.5) + n, bins, patches = axs[i][j].hist(self.subspace_uplift_left_samples[i * 4 + j], 50, density=True, facecolor='b', alpha=0.5) + # n, bins, patches = axs[i][j].hist(self.subspace_uplift_right_samples[i * 4 + j], 50, density=True, facecolor='b', alpha=0.5) + plt.show() + self.epoch = 0 + + return + + # print('=====' + str(base_coherence)) + # print(self.uplifts) + # print(self.uplift_means) + # print(self.uplift_medians) + # print(self.uplift_stddevs) + # print(self.uplift_ranges) + # print(self.uplift_convergences) + # print(self.subspace_uplifts) + + if index >= 0: + self.knowns.append(index) + print(base_coherence) + print(self.knowns, self.epoch) + # print(self.uplift_medians) + # print(self.uplifts) + # print(self.subspace_uplifts) + self.add_layer() + self.uplifts.fill(0) + self.subspace_uplifts.fill(0) + self.uplift_medians.fill(0) + self.uplift_convergences.fill(0) + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.epoch = 0 + return + + if subspace_index >= 0: + self.knowns.append(subspace_index) + print(self.knowns, self.epoch) + # print(self.uplifts) + # print(self.subspace_uplifts) + self.uplifts.fill(0) + self.subspace_uplifts.fill(0) + self.uplift_medians.fill(0) + self.uplift_convergences.fill(0) + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.epoch = 0 + return + + # print('======') + # print(self.epoch, base_coherence) + # print('======') + + # if len(self.candidate_pool) == 0: + # print(self.p) + + # for i in range(0, min(5, len(self.candidate_pool))): + # candidate = self.candidate_pool[i] + # print(candidate.id(), candidate.uplift) + + # if self.epoch < 15: + # return + + if self.candidate_pool[0].uplift > 0.3: + candidate = self.candidate_pool[0] + candidate_id = candidate.id() + self.candidate_ids.remove(candidate_id) + print(candidate_id) + self.knowns = candidate.indices + self.add_layer() + self.knowns = [] + self.reset_p() + self.epoch = 0 + self.candidate_pool = [] + self.candidate_ids = set() + elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200: + self.epoch = 0 + self.num_terms += 1 + self.candidate_pool = [] + self.candidate_ids = set() + self.knowns = [] + self.stops = set() + self.reset_p() + return + + # np.copyto(self.next_p, self.p) + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in visited: + continue + visited.add(candidate_id) + if self.actual_N in candidate.indices: + continue + has_candidate = True + for i in range(0, len(self.inputs)): + self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i]) + # coherence = self.ring_coherence() + coherence = self.coherence() + # if coherence <= base_coherence: + # continue + # for index in candidate.indices: + # self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0) + # self.p_temp[index] += 0 + for index in candidate.indices: + if coherence > self.max_coherences[index]: + self.max_coherences[index] = coherence + self.max_candidates[index] = candidate + # self.max_coherences[index] = max(self.max_coherences[index], coherence) + # np.copyto(self.p, self.next_p) + + # np.copyto(self.p_temp, self.p) + for i in range(0, self.actual_N): + candidate = self.max_candidates[i] + if candidate is None: + continue + for index in candidate.indices: + self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0) + # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id()) + self.normalize_p() + # print(self.p) + + # np.subtract(self.p_temp, self.p, self.p_temp) + # np.abs(self.p_temp, self.p_temp) + # delta = np.sum(self.p_temp) / len(self.p_temp) + # print(delta, np.argmax(self.p)) + # np.copyto(self.p_temp, self.p) + # for i in range(0, len(self.p_temp)): + # self.p_temp[i] = round(self.p_temp[i] * 100) / 100 + # print(self.p_temp) + + index = np.argmax(self.p) + delta_over_null = self.p[index] - self.p[self.actual_N] + if self.epoch == 0: + self.average_delta_over_null = delta_over_null + else: + self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null + diff = self.num_terms - len(self.knowns) + + print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p)) + + # Always iterate for a minimum number of epochs + if self.epoch < 15: + return + if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300: + return + if self.average_delta_over_null < 0.001: + index = self.actual_N + else: + index = np.argmax(self.p) + + # index = np.argmax(self.p) + # if index == self.last_value: + # self.rounds += 1 + # else: + # self.rounds = 0 + # self.last_value = index + + # if self.rounds < 10 and self.epoch < 100: + # return + + # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50): + # return + + # index = np.argmax(self.p) + + # print(self.p) + # print(self.threshold()) + # print(self.p) + # index = self.get_converged_index() + if not index is None or not has_candidate: + # print(index, delta, np.argmax(self.p)) + self.epoch = 0 + if index == self.actual_N or not has_candidate: + if len(self.knowns) > 0: + self.add_stop() + self.knowns.pop() + print('Backtrack: ' + str(self.knowns)) + self.reset_p() + return + self.num_terms += 1 + self.knowns = [] + self.stops = set() + self.reset_p() + print(self.num_terms) + return + self.knowns.append(index) + # bisect.insort(self.knowns, index) + if len(self.knowns) == self.num_terms: + print('Add layer: ' + str(self.knowns)) + self.add_layer() + else: + print('Found term: ' + str(self.knowns)) + self.reset_p() + print(base_coherence) + return + + def cache_layers(self): + expr = 'def f(x):\n\tresult=0\n' + for layer in self.layers: + expr += '\tresult^=' + layer.eval_str() + '\n' + expr += '\treturn result\n' + scope = {} + exec(expr, scope) + return scope['f'] + +def main(): + probabilities = Probabilities() + # probabilities.knowns = [14] + # probabilities.add_layer() + # probabilities.knowns = [8] + # probabilities.add_layer() + # probabilities.knowns = [4] + # probabilities.add_layer() + while probabilities.num_terms <= probabilities.N: + probabilities.update() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations18.py b/mutations18.py new file mode 100644 index 0000000..376767b --- /dev/null +++ b/mutations18.py @@ -0,0 +1,845 @@ +import bisect +from cmath import isnan +from email.mime import base +import matplotlib.pyplot as plt +import hashlib +import math +import numpy as np +import random +import statistics + +from pkg_resources import get_distribution +from scipy import optimize, stats +from astropy import modeling + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def xor(v): + return np.sum(v[1:]) % 2 + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def index_hash(indices): + return ','.join([str(index) for index in sorted(indices)]) + +def bin_div(a, b): + if a == 0 and b == 0: + return 2 + if a == 1 and b == 0: + return -1 + if a == 0 and b == 1: + return 0 + return 1 + +class Candidate(): + def __init__(self, indices): + self.indices = indices[:] + self.uplift = 0 + + def evaluate(self, x): + if len(x) in self.indices: + return 0 + value = 1 + for index in self.indices: + value *= x[index] + return value + + def id(self): + return index_hash(self.indices) + + def eval_str(self): + parts = [] + for index in self.indices: + parts.append('x[' + str(index) + ']') + return '*'.join(parts) + +class Probabilities(): + def __init__(self): + self.N = 16 + self.actual_N = self.N * 2 + self.num_terms = 1 + self.num_candidates = 100 + # self.sample_size = self.N ** 2 + self.sample_size = 64 + self.p = np.zeros((self.actual_N + 1,)) + self.p_temp = np.empty_like(self.p) + self.next_p = np.empty_like(self.p) + self.knowns = [] + self.stops = set() + self.reset_p() + self.epoch = 0 + + self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32) + self.raw_inputs = np.zeros((self.sample_size, self.N)).astype(np.int32) + self.masked_distances = np.zeros((self.sample_size, self.sample_size)) + self.distances = np.zeros((self.sample_size, self.sample_size)) + self.xor_square = np.zeros((self.sample_size, self.sample_size)) + self.nn = np.zeros((self.sample_size, self.sample_size)).astype(np.int32) + self.nn_distances = np.zeros((self.sample_size, 2)).astype(np.int32) + self.base_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.outputs = np.zeros((self.sample_size)).astype(np.int32) + self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.output_xor = np.zeros((self.sample_size)).astype(np.int32) + self.mask = np.zeros((self.sample_size)) + self.numerators = np.zeros((self.sample_size)) + self.denominators = np.zeros((self.sample_size)) + self.coherences = np.zeros((self.sample_size)) + self.max_coherences = np.zeros((self.actual_N + 1)) + self.max_candidates = [None for _ in range(0, self.actual_N)] + self.uplifts = np.zeros((self.actual_N)) + self.uplift_means = np.zeros((self.actual_N)) + self.uplift_medians = np.zeros((self.actual_N)) + self.uplift_convergences = np.zeros((self.actual_N)) + # self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)] + self.superspace_uplift_samples = [] + self.subspace_uplifts = np.zeros((self.actual_N)) + self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)] + self.uplift_stddevs = np.zeros((self.actual_N)) + + self.last_index = -1 + self.last_pvalue = -1 + self.left_half = True + + self.samples = 10 + self.num_bins = 1000 + # self.samples = 200 + self.base_coherence_samples = np.zeros((self.samples)) + self.coherence_samples = np.zeros((self.actual_N, self.samples)) + self.subspace_uplift_samples = np.zeros((self.actual_N, self.samples)) + self.subspace_uplift_weights = np.zeros((self.actual_N, self.samples)) + + self.layers = [] + self.layer_confidence = {} + self.base = None + + self.scratch = np.zeros((self.N,)) + + self.last_value = -1 + self.rounds = 0 + self.average_delta_over_null = 0 + self.visited = set() + + self.candidate_pool = [] + self.candidate_ids = set() + self.has_added_layer = False + + def randomize_inputs(self): + for i in range(0, self.sample_size): + for j in range(0, self.N): + val = random.randint(0, 1) + self.raw_inputs[i][j] = val + self.inputs[i][j * 2] = val + self.inputs[i][j * 2 + 1] = val ^ 1 + + def populate_distances(self): + self.nn.fill(-1) + self.nn_distances.fill(-1) + for i in range(0, len(self.raw_inputs)): + x_a = self.raw_inputs[i] + for j in range(0, len(self.raw_inputs)): + if i == j: + continue + x_b = self.raw_inputs[j] + distance = hamming_distance(x_a, x_b, self.scratch) + if (self.nn_distances[i][0] < 0 or distance < self.nn_distances[i][0]) and distance > 0: + self.nn_distances[i][0] = distance + self.nn_distances[i][1] = 1 + self.nn[i][0] = j + elif distance == self.nn_distances[i][0]: + count = self.nn_distances[i][1] + self.nn_distances[i][1] = count + 1 + self.nn[i][count] = j + # self.distances[i][j] = 1.0 / (2 ** (distance - 1)) if distance > 0 else 0 + self.distances[i][j] = 1.0 / (distance ** 12) if distance > 0 else 0 + + def compute_expected_outputs(self): + for i in range(0, len(self.raw_inputs)): + self.expected_outputs[i] = xor(self.raw_inputs[i]) + + def compute_base_outputs(self): + if self.base is None: + self.base_outputs.fill(0) + return + for i in range(0, len(self.inputs)): + self.base_outputs[i] = self.base(self.inputs[i]) + + def mat_coherence(self): + np.abs(self.output_xor, self.mask) + np.subtract(self.output_xor, self.mask, self.mask) + np.divide(self.mask, 2.0, self.mask) + np.add(1.0, self.mask, self.mask) + self.xor_square.fill(0) + np.copyto(self.masked_distances, self.distances) + masked_distances_t = self.masked_distances.transpose() + for i in range(0, len(self.xor_square)): + self.xor_square[i] = self.output_xor + np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i]) + np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i]) + np.sum(self.masked_distances, axis=0, out=self.denominators) + self.xor_square = self.xor_square.transpose() + np.logical_xor(self.xor_square, self.output_xor, self.xor_square) + np.multiply(self.xor_square, self.masked_distances, self.xor_square) + np.sum(self.xor_square, axis=0, out=self.numerators) + np.divide(self.numerators, self.denominators, self.coherences) + mean = np.nanmean(self.coherences) + if isnan(mean): + mean = 1.0 + return 1.0 - mean + + def nn_coherence(self): + for i in range(0, len(self.output_xor)): + total = 0 + y_a = self.output_xor[i] + [distance, count] = self.nn_distances[i] + for index in range(0, count): + j = self.nn[i][index] + y_b = self.output_xor[j] + total += 1 if y_a == 1 and y_b == 1 or y_a == 0 and y_b == 0 else 0 + self.coherences[i] = total / count + return np.mean(self.coherences) + + def coherence(self, outputs=None): + if outputs is None: + outputs = self.outputs + np.logical_xor(outputs, self.expected_outputs, self.output_xor) + return self.nn_coherence() + # return self.mat_coherence() + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + + raw_coherence = sum(coherences) / len(coherences) + check_coherence = self.mat_coherence() + + return raw_coherence + + def div_coherence(self): + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + if y_a < 0: + continue + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + if y_b < 0: + continue + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + # if y_a < 0 or y_b < 0: + # numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + if len(coherences) == 0: + return 1.0 + return sum(coherences) / len(coherences) + + def normalize_p(self): + check = self.knowns[:] + for i in range(0, len(self.p)): + if self.p[i] < 0: + self.p[i] = 0 + for i in range(0, len(self.p)): + if i in self.knowns: + flip = i ^ 0b1 + self.p[i] = 0.0 + self.p[flip] = 0.0 + else: + check.append(i) + stop_id = index_hash(check) + check.pop() + if stop_id in self.stops: + self.p[i] = 0.0 + total = np.sum(self.p) + if total > 0: + for i in range(0, len(self.p)): + self.p[i] = self.p[i] / total + + def reset_p(self): + self.p.fill(1.0) + self.normalize_p() + + def threshold(self): + # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100) + return 1.0 - (self.epoch / 1000) + + def get_converged_index(self): + for i in range(0, len(self.p)): + if self.p[i] > self.threshold(): + return i + return None + + def add_layer(self): + self.has_added_layer = True + self.add_stop() + layer = Candidate(self.knowns) + self.layers.append(layer) + self.base = self.cache_layers() + self.knowns.pop() + self.reset_p() + + def random_sample(self): + self.randomize_inputs() + self.populate_distances() + self.compute_expected_outputs() + self.compute_base_outputs() + return self.coherence(self.base_outputs) + + def random_candidate(self): + indices = self.knowns[:] + np.copyto(self.p_temp, self.p) + self.p_temp[self.actual_N] = 0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + for _ in range(0, self.num_terms - len(self.knowns)): + index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0] + indices.append(index) + flip = index ^ 0b1 + self.p_temp[index] = 0 + self.p_temp[flip] = 0 + for i in range(0, len(self.p_temp)): + if i not in indices: + indices.append(i) + stop_id = index_hash(indices) + indices.pop() + if stop_id in self.stops: + self.p_temp[i] = 0.0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + return Candidate(indices) + + def seed_candidate_pool(self): + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in self.candidate_ids: + continue + self.candidate_pool.append(candidate) + self.candidate_ids.add(candidate_id) + + def add_stop(self): + stop_id = index_hash(self.knowns) + self.stops.add(stop_id) + + def get_distribution(self, candidate, half = 1): + count = 0 + for i in range(0, len(self.inputs)): + value = candidate.evaluate(self.inputs[i]) + if value == half: + self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i] + count += 1 + else: + self.output_xor[i] = -1 + # return (count, self.mat_coherence()) + return (count, self.nn_coherence()) + + def err(self, fitted_model, bins, hist): + err = 0 + for i in range(0, self.num_bins): + x = bins[i + 1] + y = hist[i] + delta = fitted_model(x) - y + err += delta * delta + return err / self.num_bins + + def update(self): + sample = self.epoch + self.epoch += 1 + + base_coherence = self.random_sample() + self.base_coherence_samples[sample] = base_coherence + candidate = Candidate(self.knowns[:]) + + for i in range(0, self.actual_N): + candidate.indices.append(i) + try: + count_0, subspace_coherence_0 = self.get_distribution(candidate, 0) + # count_1, subspace_coherence_1 = self.get_distribution(candidate, 1) + # delta = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size + # delta = subspace_coherence_0 - subspace_coherence_1 + self.subspace_uplift_samples[i][sample] = subspace_coherence_0 - base_coherence + self.subspace_uplift_weights[i][sample] = count_0 / self.sample_size + # self.subspace_uplift_left_samples[i][sample] = subspace_coherence_0 + # self.subspace_uplift_right_samples[i][sample] = subspace_coherence_1 - base_coherence + + # if index_hash(candidate.indices) in self.stops: + # continue + + for j in range(0, len(self.inputs)): + self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j]) + + coherence = self.coherence() + self.coherence_samples[i][sample] = coherence - base_coherence + # self.coherence_samples[i][sample] = coherence + finally: + candidate.indices.pop() + + if self.epoch >= self.samples: + # for i in range(0, self.actual_N): + # parameters = stats.norm.fit(self.uplift_samples[i]) + # print(i, parameters) + # print(i, stats.kstest(self.uplift_samples[i], "norm", parameters)) + + added = False + # parameters = stats.norm.fit(self.base_coherence_samples) + # (base_mu, _) = parameters + + # (hist, bins) = np.histogram(self.base_coherence_samples, self.num_bins, density=True) + # fitter = modeling.fitting.LevMarLSQFitter() + # model = modeling.models.Gaussian1D() + # fitted_model = fitter(model, bins[1:], hist) + # print('Base', fitted_model.mean.value, self.err(fitted_model, bins, hist)) + + # x = np.linspace(0, 1.0, 10000) + # density = stats.gaussian_kde(self.base_coherence_samples)(x) + # mode = x[np.argsort(density)[-1]] + # print(mode) + + # for i in range(0, self.actual_N): + # count = 0 + # for j in range(0, self.samples): + # for k in range(0, self.samples): + # if self.coherence_samples[i][j] > self.base_coherence_samples[k]: + # count += 1 + # print(i, count) + + try: + index = -1 + lowest_index = -1 + lowest_pvalue = -1 + highest_index = -1 + highest_pvalue = -1 + best_pvalue = -1 + pvalue_sum = 0 + pvalue_denom = 0 + is_subspace = False + + for i in range(0, self.actual_N): + if i in self.knowns: + continue + try: + result = stats.ttest_1samp(self.coherence_samples[i], 0, alternative='greater') + print(i, result) + # (hist, bins) = np.histogram(self.coherence_samples[i], 20, range=(-0.01, 0.01)) + # total = 0 + # for j in range(0, 20): + # total += hist[j] * (bins[j] + bins[j + 1]) / 2 + # mode = total / sum(hist) + + # fitter = modeling.fitting.LevMarLSQFitter() + # model = modeling.models.Gaussian1D() + # fitted_model = fitter(model, bins[1:], hist) + # mode = fitted_model.mean.value + # print(i, total) + + # result = stats.kstest(self.base_coherence_samples, self.coherence_samples[i], alternative='greater') + # print(i, result) + # value = result.pvalue * (1 - result.statistic) + # parameters = stats.norm.fit(self.coherence_samples[i]) + # (mu, _) = parameters + # density = stats.gaussian_kde(self.coherence_samples[i])(x) + # mode = x[np.argsort(density)[-1]] + # print(i, mode) + # print(i, mu) + if not isnan(result.pvalue): + if i == self.last_index: + delta = abs(result.pvalue - self.last_pvalue) + if delta < 0.1: + print('Low delta!') + print(self.last_index, delta) + # self.last_index = -1 + self.left_half = not self.left_half + # self.layers.pop() + # self.base = self.cache_layers() + # return + + pvalue_sum += result.pvalue + pvalue_denom += 1 + if lowest_index < 0 or result.pvalue < lowest_pvalue: + lowest_index = i + lowest_pvalue = result.pvalue + if highest_index < 0 or result.pvalue > highest_pvalue: + highest_index = i + highest_pvalue = result.pvalue + except Exception as e: + print(e) + pass + average_pvalue = pvalue_sum / pvalue_denom + print(average_pvalue) + index = highest_index if self.left_half else lowest_index + best_pvalue = highest_pvalue if self.left_half else lowest_pvalue + + self.last_index = index + self.last_pvalue = best_pvalue + # if average_pvalue < 0.5: + # index = lowest_index + # best_pvalue = lowest_pvalue + # else: + # index = highest_index + # best_pvalue = highest_pvalue + # print(e) + + # for i in range(0, self.actual_N): + # if i in self.knowns: + # continue + # # result = stats.kstest(self.base_coherence_samples, self.subspace_uplift_left_samples[i], alternative='greater') + # # # result = stats.kstest(self.subspace_uplift_left_samples[i], self.subspace_uplift_right_samples[i], alternative='greater') + # # print(i, result) + # # value = result.pvalue * (1 - result.statistic) + # # parameters = stats.norm.fit(self.subspace_uplift_left_samples[i]) + # # (mu, _) = parameters + # try: + # result = stats.ttest_1samp(self.subspace_uplift_samples[i], 0, alternative='greater') + # print(i, result) + # # (hist, bins) = np.histogram(self.subspace_uplift_samples[i], 20, range=(-0.01, 0.01)) + # # bin_index = np.argsort(hist)[-1] + # # mode = (bins[bin_index] + bins[bin_index + 1]) / 2 + # # fitter = modeling.fitting.LevMarLSQFitter() + # # model = modeling.models.Gaussian1D() + # # fitted_model = fitter(model, bins[1:], hist) + # # mode = fitted_model.mean.value + # # print(i, mode) + # # density = stats.gaussian_kde(self.subspace_uplift_samples[i], weights=self.subspace_uplift_weights[i])(x) + # # density = stats.gaussian_kde(self.subspace_uplift_samples[i])(x) + # # mode = x[np.argsort(density)[-1]] + # # print(i, mode) + # # print(i, mu) + # if (index < 0 or result.pvalue < lowest_pvalue) and not isnan(result.pvalue): + # # if index < 0 or value < lowest_pvalue: + # index = i + # lowest_pvalue = result.pvalue + # is_subspace = True + + # # if result.pvalue > 0.95: + # # index = i + # # parameters = stats.norm.fit(self.subspace_uplift_samples[i]) + # # (mu, _) = parameters + # # if mu > base_mu: + # # if index < 0 or mu > highest_mu: + # # index = i + # # highest_mu = mu + # except Exception as e: + # print(e) + # pass + # # print(e) + + if index >= 0: + if is_subspace: + # print('subspace') + self.knowns.append(index) + print(self.knowns, best_pvalue) + else: + # print('flat') + self.knowns.append(index) + # self.layer_confidence[index_hash(self.knowns)] = confidence + # num_terms = len(self.knowns) + print(self.knowns, best_pvalue) + print(base_coherence) + self.add_layer() + # if num_terms > self.num_terms: + # self.stops = set() + # self.num_terms = num_terms + self.knowns = [] + return + else: + self.knowns = [] + # else: + # self.knowns = [] + + # if len(self.knowns) > 0: + # # self.add_stop() + # self.knowns = [] + finally: + # fig, axs = plt.subplots(int(self.actual_N / 4), 4) + # x_eval = np.linspace(-1.0, 1.0, num=1000) + # for i in range(0, int(self.actual_N / 4)): + # for j in range(0, 4): + # # (hist, bins) = np.histogram(self.base_coherence_samples, self.num_bins, density=True) + # # fitter = modeling.fitting.LevMarLSQFitter() + # # model = modeling.models.Gaussian1D() + # # fitted_model = fitter(model, bins[1:], hist) + # # axs[i][j].scatter(bins[1:], hist, s=1, color='r', alpha=0.5) + # # axs[i][j].plot(x_eval, fitted_model(x_eval), color='r') + + # (hist, bins) = np.histogram(self.coherence_samples[i * 4 + j], self.num_bins, density=True) + # # fitter = modeling.fitting.LevMarLSQFitter() + # # model = modeling.models.Gaussian1D() + # # fitted_model = fitter(model, bins[1:], hist) + # axs[i][j].scatter(bins[1:], hist, s=1, color='g', alpha=0.5) + # # axs[i][j].plot(x_eval, fitted_model(x_eval), color='g') + + # (hist, bins) = np.histogram(self.subspace_uplift_samples[i * 4 + j], self.num_bins, density=True) + # # fitter = modeling.fitting.LevMarLSQFitter() + # # model = modeling.models.Gaussian1D() + # # fitted_model = fitter(model, bins[1:], hist) + # axs[i][j].scatter(bins[1:], hist, s=1, color='b', alpha=0.5) + # # axs[i][j].plot(x_eval, fitted_model(x_eval), color='b') + + # # kde0 = stats.gaussian_kde(self.base_coherence_samples) + # kde1 = stats.gaussian_kde(self.coherence_samples[i * 4 + j]) + # # kde2 = stats.gaussian_kde(self.subspace_uplift_samples[i * 4 + j], weights=self.subspace_uplift_weights[i]) + # kde2 = stats.gaussian_kde(self.subspace_uplift_samples[i * 4 + j]) + # # axs[i][j].plot(x_eval, kde0(x_eval), color='r') + # axs[i][j].plot(x_eval, kde1(x_eval), color='g') + # axs[i][j].plot(x_eval, kde2(x_eval), color='b') + # # n, bins, patches = axs[i][j].hist(self.base_coherence_samples, 50, density=True, facecolor='r', alpha=0.5) + # # n, bins, patches = axs[i][j].hist(self.coherence_samples[i * 4 + j], 50, density=True, facecolor='g', alpha=0.5) + # # n, bins, patches = axs[i][j].hist(self.subspace_uplift_samples[i * 4 + j], 50, density=True, facecolor='b', alpha=0.5) + # plt.show() + self.epoch = 0 + + return + + # print('=====' + str(base_coherence)) + # print(self.uplifts) + # print(self.uplift_means) + # print(self.uplift_medians) + # print(self.uplift_stddevs) + # print(self.uplift_ranges) + # print(self.uplift_convergences) + # print(self.subspace_uplifts) + + if index >= 0: + self.knowns.append(index) + print(base_coherence) + print(self.knowns, self.epoch) + # print(self.uplift_medians) + # print(self.uplifts) + # print(self.subspace_uplifts) + self.add_layer() + self.uplifts.fill(0) + self.subspace_uplifts.fill(0) + self.uplift_medians.fill(0) + self.uplift_convergences.fill(0) + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.epoch = 0 + return + + if subspace_index >= 0: + self.knowns.append(subspace_index) + print(self.knowns, self.epoch) + # print(self.uplifts) + # print(self.subspace_uplifts) + self.uplifts.fill(0) + self.subspace_uplifts.fill(0) + self.uplift_medians.fill(0) + self.uplift_convergences.fill(0) + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.epoch = 0 + return + + # print('======') + # print(self.epoch, base_coherence) + # print('======') + + # if len(self.candidate_pool) == 0: + # print(self.p) + + # for i in range(0, min(5, len(self.candidate_pool))): + # candidate = self.candidate_pool[i] + # print(candidate.id(), candidate.uplift) + + # if self.epoch < 15: + # return + + if self.candidate_pool[0].uplift > 0.3: + candidate = self.candidate_pool[0] + candidate_id = candidate.id() + self.candidate_ids.remove(candidate_id) + print(candidate_id) + self.knowns = candidate.indices + self.add_layer() + self.knowns = [] + self.reset_p() + self.epoch = 0 + self.candidate_pool = [] + self.candidate_ids = set() + elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200: + self.epoch = 0 + self.num_terms += 1 + self.candidate_pool = [] + self.candidate_ids = set() + self.knowns = [] + self.stops = set() + self.reset_p() + return + + # np.copyto(self.next_p, self.p) + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in visited: + continue + visited.add(candidate_id) + if self.actual_N in candidate.indices: + continue + has_candidate = True + for i in range(0, len(self.inputs)): + self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i]) + # coherence = self.ring_coherence() + coherence = self.coherence() + # if coherence <= base_coherence: + # continue + # for index in candidate.indices: + # self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0) + # self.p_temp[index] += 0 + for index in candidate.indices: + if coherence > self.max_coherences[index]: + self.max_coherences[index] = coherence + self.max_candidates[index] = candidate + # self.max_coherences[index] = max(self.max_coherences[index], coherence) + # np.copyto(self.p, self.next_p) + + # np.copyto(self.p_temp, self.p) + for i in range(0, self.actual_N): + candidate = self.max_candidates[i] + if candidate is None: + continue + for index in candidate.indices: + self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0) + # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id()) + self.normalize_p() + # print(self.p) + + # np.subtract(self.p_temp, self.p, self.p_temp) + # np.abs(self.p_temp, self.p_temp) + # delta = np.sum(self.p_temp) / len(self.p_temp) + # print(delta, np.argmax(self.p)) + # np.copyto(self.p_temp, self.p) + # for i in range(0, len(self.p_temp)): + # self.p_temp[i] = round(self.p_temp[i] * 100) / 100 + # print(self.p_temp) + + index = np.argmax(self.p) + delta_over_null = self.p[index] - self.p[self.actual_N] + if self.epoch == 0: + self.average_delta_over_null = delta_over_null + else: + self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null + diff = self.num_terms - len(self.knowns) + + print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p)) + + # Always iterate for a minimum number of epochs + if self.epoch < 15: + return + if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300: + return + if self.average_delta_over_null < 0.001: + index = self.actual_N + else: + index = np.argmax(self.p) + + # index = np.argmax(self.p) + # if index == self.last_value: + # self.rounds += 1 + # else: + # self.rounds = 0 + # self.last_value = index + + # if self.rounds < 10 and self.epoch < 100: + # return + + # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50): + # return + + # index = np.argmax(self.p) + + # print(self.p) + # print(self.threshold()) + # print(self.p) + # index = self.get_converged_index() + if not index is None or not has_candidate: + # print(index, delta, np.argmax(self.p)) + self.epoch = 0 + if index == self.actual_N or not has_candidate: + if len(self.knowns) > 0: + self.add_stop() + self.knowns.pop() + print('Backtrack: ' + str(self.knowns)) + self.reset_p() + return + self.num_terms += 1 + self.knowns = [] + self.stops = set() + self.reset_p() + print(self.num_terms) + return + self.knowns.append(index) + # bisect.insort(self.knowns, index) + if len(self.knowns) == self.num_terms: + print('Add layer: ' + str(self.knowns)) + self.add_layer() + else: + print('Found term: ' + str(self.knowns)) + self.reset_p() + print(base_coherence) + return + + def cache_layers(self): + expr = 'def f(x):\n\tresult=0\n' + for layer in self.layers: + expr += '\tresult^=' + layer.eval_str() + '\n' + expr += '\treturn result\n' + scope = {} + exec(expr, scope) + return scope['f'] + +def main(): + probabilities = Probabilities() + # probabilities.knowns = [14] + # probabilities.add_layer() + # probabilities.knowns = [8] + # probabilities.add_layer() + # probabilities.knowns = [4] + # probabilities.add_layer() + while probabilities.num_terms <= probabilities.N: + probabilities.update() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations19.py b/mutations19.py new file mode 100644 index 0000000..7c9e2ec --- /dev/null +++ b/mutations19.py @@ -0,0 +1,1052 @@ +import bisect +from cmath import isnan +from email.mime import base +import matplotlib.pyplot as plt +import hashlib +import math +import numpy as np +import random +import statistics +from math import comb +from pprint import pprint + +from pkg_resources import get_distribution +from scipy import optimize, stats +from astropy import modeling + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def xor(v): + return np.sum(v[2:]) % 2 + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def index_hash(indices): + return ','.join([str(index) for index in sorted(indices)]) + +def bin_div(a, b): + if a == 0 and b == 0: + return 2 + if a == 1 and b == 0: + return -1 + if a == 0 and b == 1: + return 0 + return 1 + +class Candidate(): + def __init__(self, indices): + self.indices = indices[:] + self.uplift = 0 + + def evaluate(self, x): + if len(x) in self.indices: + return 0 + value = 1 + for index in self.indices: + value *= x[index] + return value + + def id(self): + return index_hash(self.indices) + + def eval_str(self): + parts = [] + for index in self.indices: + parts.append('x[' + str(index) + ']') + return '*'.join(parts) + +class Probabilities(): + def __init__(self): + self.N = 16 + self.actual_N = self.N * 2 + self.num_terms = 1 + self.num_candidates = 100 + # self.sample_size = self.N ** 2 + self.sample_size = 1024 + self.p = np.zeros((self.actual_N + 1,)) + self.p_temp = np.empty_like(self.p) + self.next_p = np.empty_like(self.p) + self.knowns = [] + self.stops = set() + self.reset_p() + self.epoch = 0 + + self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32) + self.raw_inputs = np.zeros((self.sample_size, self.N)).astype(np.int32) + self.masked_distances = np.zeros((self.sample_size, self.sample_size)) + self.distances = np.zeros((self.sample_size, self.sample_size)) + self.xor_square = np.zeros((self.sample_size, self.sample_size)) + self.nn = np.zeros((self.sample_size, self.sample_size)).astype(np.int32) + self.nn_distances = np.zeros((2, self.sample_size)).astype(np.int32) + self.base_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.outputs = np.zeros((self.sample_size)).astype(np.int32) + self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32) + self.base_output_xor = np.zeros((self.sample_size)).astype(np.int32) + self.output_xor = np.zeros((self.sample_size)).astype(np.int32) + self.mask = np.zeros((self.sample_size)) + self.numerators = np.zeros((self.sample_size)) + self.denominators = np.zeros((self.sample_size)) + self.coherences = np.zeros((self.sample_size)) + self.max_coherences = np.zeros((self.actual_N + 1)) + self.max_candidates = [None for _ in range(0, self.actual_N)] + self.uplifts = np.zeros((self.actual_N)) + self.uplift_means = np.zeros((self.actual_N)) + self.uplift_medians = np.zeros((self.actual_N)) + self.uplift_convergences = np.zeros((self.actual_N)) + # self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)] + self.superspace_uplift_samples = [] + self.subspace_uplifts = np.zeros((self.actual_N)) + self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)] + self.uplift_stddevs = np.zeros((self.actual_N)) + + self.base_coherences = np.zeros((self.sample_size)) + self.offset_coherences = np.zeros((self.sample_size)) + + self.last_index = -1 + self.last_pvalue = -1 + self.left_half = True + + self.samples = 10 + self.num_bins = 1000 + # self.samples = 200 + self.base_coherence_samples = np.zeros((self.samples)) + self.coherence_samples = np.zeros((self.actual_N, self.samples)) + self.subspace_uplift_samples = np.zeros((self.actual_N, self.samples)) + self.subspace_uplift_weights = np.zeros((self.actual_N, self.samples)) + + self.layers = [] + self.layer_confidence = {} + self.base = None + + self.scratch = np.zeros((self.N,)) + + self.last_value = -1 + self.rounds = 0 + self.average_delta_over_null = 0 + self.visited = set() + + self.candidate_pool = [] + self.candidate_ids = set() + self.has_added_layer = False + + def randomize_inputs(self): + for i in range(0, self.sample_size): + for j in range(0, self.N): + val = random.randint(0, 1) + self.raw_inputs[i][j] = val + self.inputs[i][j * 2] = val + self.inputs[i][j * 2 + 1] = val ^ 1 + + def populate_distances(self): + self.nn.fill(-1) + self.nn_distances.fill(-1) + for i in range(0, len(self.raw_inputs)): + x_a = self.raw_inputs[i] + for j in range(0, len(self.raw_inputs)): + if i == j: + continue + x_b = self.raw_inputs[j] + distance = hamming_distance(x_a, x_b, self.scratch) + if (self.nn_distances[0][i] < 0 or distance < self.nn_distances[0][i]) and distance > 0: + self.nn_distances[0][i] = distance + self.nn_distances[1][i] = 1 + self.nn[i][0] = j + elif distance == self.nn_distances[0][i]: + count = self.nn_distances[1][i] + self.nn_distances[1][i] = count + 1 + self.nn[i][count] = j + # self.distances[i][j] = 1.0 / (2 ** (distance - 1)) if distance > 0 else 0 + self.distances[i][j] = distance + # self.distances[i][j] = 1.0 / (distance ** 12) if distance > 0 else 0 + + def compute_expected_outputs(self): + for i in range(0, len(self.raw_inputs)): + self.expected_outputs[i] = xor(self.raw_inputs[i]) + + def compute_base_outputs(self): + if self.base is None: + self.base_outputs.fill(0) + return + for i in range(0, len(self.inputs)): + self.base_outputs[i] = self.base(self.inputs[i]) + + def mat_coherence(self): + np.abs(self.output_xor, self.mask) + np.subtract(self.output_xor, self.mask, self.mask) + np.divide(self.mask, 2.0, self.mask) + np.add(1.0, self.mask, self.mask) + + for i in range(0, len(self.output_xor)): + for j in range(0, len(self.output_xor)): + self.xor_square[i][j] = self.output_xor[i] ^ self.output_xor[j] ^ (1 if self.distances[i][j] % 2 == 0 else 0) + self.masked_distances[i][j] = 1.0 / (2 ** self.distances[i][j]) + + # self.xor_square.fill(0) + # np.copyto(self.masked_distances, self.distances) + # masked_distances_t = self.masked_distances.transpose() + # for i in range(0, len(self.xor_square)): + # self.xor_square[i] = self.output_xor + # np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i]) + # np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i]) + np.sum(self.masked_distances, axis=0, out=self.denominators) + # self.xor_square = self.xor_square.transpose() + # np.logical_xor(self.xor_square, self.output_xor, self.xor_square) + np.multiply(self.xor_square, self.masked_distances, self.xor_square) + np.sum(self.xor_square, axis=0, out=self.numerators) + np.divide(self.numerators, self.denominators, self.coherences) + mean = np.nanmean(self.coherences) + if isnan(mean): + mean = 1.0 + return 1.0 - mean + + def nn_coherence(self): + for i in range(0, len(self.output_xor)): + total = 0 + y_a = self.output_xor[i] + distance = self.nn_distances[0][i] + count = self.nn_distances[1][i] + for index in range(0, count): + j = self.nn[i][index] + y_b = self.output_xor[j] + total += 1 if y_a == 1 and y_b == 1 or y_a == 0 and y_b == 0 else 0 + self.coherences[i] = total + # if distance % 2 == 0: + # self.coherences[i] = 1.0 - self.coherences[i] + return np.mean(self.coherences) + + def coherence(self, outputs=None): + if outputs is None: + outputs = self.outputs + np.logical_xor(outputs, self.expected_outputs, self.output_xor) + return self.nn_coherence() + # return self.mat_coherence() + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + + raw_coherence = sum(coherences) / len(coherences) + check_coherence = self.mat_coherence() + + return raw_coherence + + def div_coherence(self): + coherences = [] + for i in range(0, len(self.output_xor)): + y_a = self.output_xor[i] + if y_a < 0: + continue + numerator = 0 + denominator = 0 + for j in range(0, len(self.output_xor)): + if i == j: + continue + y_b = self.output_xor[j] + if y_b < 0: + continue + weight = self.distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + # if y_a < 0 or y_b < 0: + # numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + if len(coherences) == 0: + return 1.0 + return sum(coherences) / len(coherences) + + def normalize_p(self): + check = self.knowns[:] + for i in range(0, len(self.p)): + if self.p[i] < 0: + self.p[i] = 0 + for i in range(0, len(self.p)): + if i in self.knowns: + flip = i ^ 0b1 + self.p[i] = 0.0 + self.p[flip] = 0.0 + else: + check.append(i) + stop_id = index_hash(check) + check.pop() + if stop_id in self.stops: + self.p[i] = 0.0 + total = np.sum(self.p) + if total > 0: + for i in range(0, len(self.p)): + self.p[i] = self.p[i] / total + + def reset_p(self): + self.p.fill(1.0) + self.normalize_p() + + def threshold(self): + # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100) + return 1.0 - (self.epoch / 1000) + + def get_converged_index(self): + for i in range(0, len(self.p)): + if self.p[i] > self.threshold(): + return i + return None + + def add_layer(self): + self.has_added_layer = True + self.add_stop() + layer = Candidate(self.knowns) + self.layers.append(layer) + self.base = self.cache_layers() + self.knowns.pop() + self.reset_p() + + def random_sample(self): + self.randomize_inputs() + self.populate_distances() + self.compute_expected_outputs() + self.compute_base_outputs() + return self.coherence(self.base_outputs) + + def random_candidate(self): + indices = self.knowns[:] + np.copyto(self.p_temp, self.p) + self.p_temp[self.actual_N] = 0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + for _ in range(0, self.num_terms - len(self.knowns)): + index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0] + indices.append(index) + flip = index ^ 0b1 + self.p_temp[index] = 0 + self.p_temp[flip] = 0 + for i in range(0, len(self.p_temp)): + if i not in indices: + indices.append(i) + stop_id = index_hash(indices) + indices.pop() + if stop_id in self.stops: + self.p_temp[i] = 0.0 + total = np.sum(self.p_temp) + if total == 0: + return None + np.divide(self.p_temp, total, self.p_temp) + return Candidate(indices) + + def seed_candidate_pool(self): + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in self.candidate_ids: + continue + self.candidate_pool.append(candidate) + self.candidate_ids.add(candidate_id) + + def add_stop(self): + stop_id = index_hash(self.knowns) + self.stops.add(stop_id) + + def get_distribution(self, candidate, half = 1): + count = 0 + for i in range(0, len(self.inputs)): + value = candidate.evaluate(self.inputs[i]) + if value == half: + self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i] + count += 1 + else: + self.output_xor[i] = -1 + # return (count, self.mat_coherence()) + return (count, self.nn_coherence()) + + def err(self, fitted_model, bins, hist): + err = 0 + for i in range(0, self.num_bins): + x = bins[i + 1] + y = hist[i] + delta = fitted_model(x) - y + err += delta * delta + return err / self.num_bins + + def update(self): + sample = self.epoch + self.epoch += 1 + + base_coherence = self.random_sample() + np.copyto(self.base_coherences, self.coherences) + np.copyto(self.base_output_xor, self.output_xor) + + # self.base_coherence_samples[sample] = base_coherence + candidate = Candidate(self.knowns[:]) + + index = -1 + lowest_pvalue = -1 + highest_mode = 0 + + fig, axs = plt.subplots(int(self.actual_N / 4), 4) + x_eval = np.linspace(0, 1.0, num=10000) + + for i in range(0, self.actual_N): + candidate.indices.append(i) + try: + + # count_0, subspace_coherence_0 = self.get_distribution(candidate, 0) + # # count_1, subspace_coherence_1 = self.get_distribution(candidate, 1) + # # delta = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size + # # delta = subspace_coherence_0 - subspace_coherence_1 + # self.subspace_uplift_samples[i][sample] = subspace_coherence_0 - base_coherence + # self.subspace_uplift_weights[i][sample] = count_0 / self.sample_size + # # self.subspace_uplift_left_samples[i][sample] = subspace_coherence_0 + # # self.subspace_uplift_right_samples[i][sample] = subspace_coherence_1 - base_coherence + + # if index_hash(candidate.indices) in self.stops: + # continue + + for j in range(0, len(self.inputs)): + self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j]) + + coherence = self.coherence() + np.subtract(self.coherences, self.base_coherences, self.offset_coherences) + # coherence = sum(self.offset_coherences * self.nn_distances[1] / self.nn_distances[0]) + + # result = stats.ttest_rel(self.base_coherences, self.coherences, alternative='less') + # # print(i, result) + + # pvalue = result.pvalue + + # if pvalue < 0.05 and (pvalue < lowest_pvalue or lowest_pvalue < 0): + # index = i + # lowest_pvalue = pvalue + + # result = stats.ttest_1samp(self.offset_coherences, 0, alternative='greater', weights=self.nn_distances[0]) + # print(i, result) + + # (hist, bins) = np.histogram(self.offset_coherences, 10) + # fitter = modeling.fitting.LevMarLSQFitter() + # model = modeling.models.Gaussian1D() + # fitted_model = fitter(model, bins[1:], hist, weights=np.divide(1.0, self.nn_distances[0])) + # axs[int(i/4)][int(i%4)].scatter(bins[1:], hist, s=1, color='r', alpha=0.5) + # axs[int(i/4)][int(i%4)].plot(x_eval, fitted_model(x_eval), color='r') + est_num = 0 + est_denom = 0 + # print(self.offset_coherences) + for j in range(0, len(self.offset_coherences)): + # weight = 1.0 / 2 ** self.nn_distances[0][j] + if self.offset_coherences[j] == 0: + continue + weight = 1.0 + est_num += weight * self.offset_coherences[j] + est_denom += weight + # print(i, est_num / est_denom) + # mode = est_num / est_denom + + # density = stats.gaussian_kde(self.offset_coherences, weights=1.0 / (2 ** (self.nn_distances[0] - 1)))(x_eval) + filtered_points = [x for x in self.offset_coherences if x != 0 and x != 1 and x != -1] + left_half = [x for x in self.offset_coherences if x < 0 and x != -1] + right_half = [x for x in self.offset_coherences if x > 0 and x != 1] + + left_distances = [self.nn_distances[0][j] for j in range(0, self.sample_size) if self.offset_coherences[j] < 0] + # left_score = sum([1 if d % 2 == 0 else 0 for d in left_distances]) / len(left_distances) + right_distances = [self.nn_distances[0][j] for j in range(0, self.sample_size) if self.offset_coherences[j] > 0] + + left_counts = {} + right_counts = {} + counts = {} + for j in range(1, self.N): + count = sum([1 if d == j else 0 for d in left_distances]) + counts[j] = 0 + if count > 0: + left_counts[j] = count + counts[j] += count + count = sum([1 if d == j else 0 for d in right_distances]) + if count > 0: + right_counts[j] = count + counts[j] += count + + # left_sum = sum([1 if d % 2 == 0 else 0 for d in left_distances]) + right_sum = sum([1 if d % 2 == 0 else 0 for d in right_distances]) + + # print(left_sum, right_sum) + + # left_value = (left_sum / len(left_distances)) * (len(left_distances) / (len(left_distances) + len(right_distances))) if len(left_distances) > 0 else 0 + # right_value = (right_sum / len(right_distances)) * (len(right_distances) / (len(left_distances) + len(right_distances))) if len(right_distances) > 0 else 0 + + score = 1.0 - (right_sum / len(right_distances)) if len(right_distances) > 3 else 0 + + # left_mean = np.mean(left_half) + # right_mean = np.mean(right_half) + # print(i, left_mean, right_mean) + # left_density = stats.gaussian_kde(left_half)(x_eval) + # right_density = stats.gaussian_kde(right_half)(x_eval) + # axs[int(i/4)][int(i%4)].plot(x_eval, left_density, color='g') + # axs[int(i/4)][int(i%4)].plot(x_eval, right_density, color='b') + + # weights = [1.0 / (2 ** self.nn_distances[0][j]) for j in range(0, len(self.offset_coherences)) if self.offset_coherences[j] != 0 and self.offset_coherences[j] != 1 and self.offset_coherences[j] != -1] + # weights_a = [self.nn_distances[0][j] for j in range(0, len(self.offset_coherences)) if self.offset_coherences[j] != 0 and self.offset_coherences[j] != 1 and self.offset_coherences[j] != -1] + base = [((1.0 - self.base_coherences[j]) if self.nn_distances[0][j] % 2 == 0 else self.base_coherences[j]) for j in range(0, self.sample_size)] + # print(i, sum(points)) + modified = [((1.0 - self.coherences[j]) if self.nn_distances[0][j] % 2 == 0 else self.coherences[j]) for j in range(0, self.sample_size)] + # print(i, sum(points)) + # score = (sum([(-self.offset_coherences[j] if self.nn_distances[0][j] % 2 == 0 else self.offset_coherences[j]) / (self.nn_distances[0][j] ** 2) for j in range(0, self.sample_size) if self.offset_coherences[j] != 0])) + score = sum([((self.base_coherences[j] - self.coherences[j]) if self.nn_distances[0][j] % 2 == 0 else (self.coherences[j] - self.base_coherences[j])) * (1.0 / comb(int(self.N / 2) + 1, self.nn_distances[0][j])) for j in range(0, self.sample_size)]) + + # 3 5 7 10 12 14 + total = 0 + unique_inputs = set() + for j in range(0, self.sample_size): + input_id = str(self.raw_inputs[j]) + if input_id in unique_inputs: + continue + unique_inputs.add(input_id) + buckets = {} + for k in range(0, self.sample_size): + distance = int(self.distances[j][k]) + if distance == 0: + continue + if distance not in buckets: + buckets[distance] = [0,0,0,0] + base_value = self.base_output_xor[j] ^ self.base_output_xor[k] + value = self.output_xor[j] ^ self.output_xor[k] + + if distance % 2 == 0: + if value == 0 and base_value == 0: + total += 1 + if value == 1 and base_value == 0: + total -= 1 + # 1,3 + if value == 0 and base_value == 1: + total -= 1 + if value == 1 and base_value == 1: + total -= 1 + else: + if value == 1 and base_value == 1: + total += 1 + if value == 0 and base_value == 1: + total -= 1 + # 0,2 + if value == 0 and base_value == 0: + total -= 1 + if value == 1 and base_value == 0: + total -= 1 + + if value == 0 and base_value == 0: + buckets[distance][0] += 1 + elif value == 0 and base_value == 1: + buckets[distance][1] += 1 + elif value == 1 and base_value == 0: + buckets[distance][2] += 1 + elif value == 1 and base_value == 1: + buckets[distance][3] += 1 + # buckets[distance] += value - base_value + # total += ((base_value - value) if distance % 2 == 0 else (value - base_value)) + if j == 0: + print(j, buckets) + # pprint(buckets) + alt_score = total + + # score += alt_score + # score += (sum([self.offset_coherences[j] * self.nn_distances[1][j] / (2 ** self.nn_distances[0][j]) for j in range(0, self.sample_size)])) + + # alt_score = (sum([self.offset_coherences[j] for j in range(0, self.sample_size)])) / self.sample_size + # score += alt_score + + # points = [-1.0 * self.offset_coherences[j] * self.nn_distances[1][j] if self.nn_distances[0][j] % 2 == 0 and self.offset_coherences[j] > 0 else self.offset_coherences[j] * self.nn_distances[1][j] for j in range(0, self.sample_size) if self.offset_coherences[j] != 0 and self.offset_coherences[j] != 1 and self.offset_coherences[j] != -1] + try: + density = stats.gaussian_kde(self.base_coherences)(x_eval) + density_a = stats.gaussian_kde(self.coherences)(x_eval) + # density_a = stats.gaussian_kde(filtered_points, weights = weights_a)(x_eval) + axs[int(i/4)][int(i%4)].plot(x_eval, density, color='g') + axs[int(i/4)][int(i%4)].plot(x_eval, density_a, color='b') + except: + pass + # axs[int(i/4)][int(i%4)].scatter(filtered_points, np.zeros_like(filtered_points)) + # left_mode = x_eval[np.argsort(left_density)[-1]] + # right_mode = x_eval[np.argsort(right_density)[-1]] + # print(i, left_mode, right_mode) + # score = sum(points) / len(points) + # print(i, score) + + # score = coherence + print(i, score, alt_score, left_counts, right_counts) + + if score > highest_mode: + highest_mode = score + index = i + + # self.coherence_samples[i][sample] = coherence - base_coherence + # self.coherence_samples[i][sample] = coherence + finally: + candidate.indices.pop() + + if index >= 0: + self.knowns.append(index) + print(self.knowns, highest_mode) + self.add_layer() + self.knowns = [] + print(base_coherence) + + plt.show() + return + + + # if self.epoch >= self.samples: + # # for i in range(0, self.actual_N): + # # parameters = stats.norm.fit(self.uplift_samples[i]) + # # print(i, parameters) + # # print(i, stats.kstest(self.uplift_samples[i], "norm", parameters)) + + # added = False + # # parameters = stats.norm.fit(self.base_coherence_samples) + # # (base_mu, _) = parameters + + # # (hist, bins) = np.histogram(self.base_coherence_samples, self.num_bins, density=True) + # # fitter = modeling.fitting.LevMarLSQFitter() + # # model = modeling.models.Gaussian1D() + # # fitted_model = fitter(model, bins[1:], hist) + # # print('Base', fitted_model.mean.value, self.err(fitted_model, bins, hist)) + + # # x = np.linspace(0, 1.0, 10000) + # # density = stats.gaussian_kde(self.base_coherence_samples)(x) + # # mode = x[np.argsort(density)[-1]] + # # print(mode) + + # # for i in range(0, self.actual_N): + # # count = 0 + # # for j in range(0, self.samples): + # # for k in range(0, self.samples): + # # if self.coherence_samples[i][j] > self.base_coherence_samples[k]: + # # count += 1 + # # print(i, count) + + # try: + # index = -1 + # lowest_index = -1 + # lowest_pvalue = -1 + # highest_index = -1 + # highest_pvalue = -1 + # best_pvalue = -1 + # pvalue_sum = 0 + # pvalue_denom = 0 + # is_subspace = False + + # for i in range(0, self.actual_N): + # if i in self.knowns: + # continue + # try: + + + # result = stats.ttest_1samp(self.coherence_samples[i], 0, alternative='greater') + # print(i, result) + # # (hist, bins) = np.histogram(self.coherence_samples[i], 20, range=(-0.01, 0.01)) + # # total = 0 + # # for j in range(0, 20): + # # total += hist[j] * (bins[j] + bins[j + 1]) / 2 + # # mode = total / sum(hist) + + # # fitter = modeling.fitting.LevMarLSQFitter() + # # model = modeling.models.Gaussian1D() + # # fitted_model = fitter(model, bins[1:], hist) + # # mode = fitted_model.mean.value + # # print(i, total) + + # # result = stats.kstest(self.base_coherence_samples, self.coherence_samples[i], alternative='greater') + # # print(i, result) + # # value = result.pvalue * (1 - result.statistic) + # # parameters = stats.norm.fit(self.coherence_samples[i]) + # # (mu, _) = parameters + # # density = stats.gaussian_kde(self.coherence_samples[i])(x) + # # mode = x[np.argsort(density)[-1]] + # # print(i, mode) + # # print(i, mu) + # if not isnan(result.pvalue): + # if i == self.last_index: + # delta = abs(result.pvalue - self.last_pvalue) + # if delta < 0.1: + # print('Low delta!') + # print(self.last_index, delta) + # # self.last_index = -1 + # self.left_half = not self.left_half + # # self.layers.pop() + # # self.base = self.cache_layers() + # # return + + # pvalue_sum += result.pvalue + # pvalue_denom += 1 + # if lowest_index < 0 or result.pvalue < lowest_pvalue: + # lowest_index = i + # lowest_pvalue = result.pvalue + # if highest_index < 0 or result.pvalue > highest_pvalue: + # highest_index = i + # highest_pvalue = result.pvalue + # except Exception as e: + # print(e) + # pass + # average_pvalue = pvalue_sum / pvalue_denom + # print(average_pvalue) + # index = highest_index if self.left_half else lowest_index + # best_pvalue = highest_pvalue if self.left_half else lowest_pvalue + + # self.last_index = index + # self.last_pvalue = best_pvalue + # # if average_pvalue < 0.5: + # # index = lowest_index + # # best_pvalue = lowest_pvalue + # # else: + # # index = highest_index + # # best_pvalue = highest_pvalue + # # print(e) + + # # for i in range(0, self.actual_N): + # # if i in self.knowns: + # # continue + # # # result = stats.kstest(self.base_coherence_samples, self.subspace_uplift_left_samples[i], alternative='greater') + # # # # result = stats.kstest(self.subspace_uplift_left_samples[i], self.subspace_uplift_right_samples[i], alternative='greater') + # # # print(i, result) + # # # value = result.pvalue * (1 - result.statistic) + # # # parameters = stats.norm.fit(self.subspace_uplift_left_samples[i]) + # # # (mu, _) = parameters + # # try: + # # result = stats.ttest_1samp(self.subspace_uplift_samples[i], 0, alternative='greater') + # # print(i, result) + # # # (hist, bins) = np.histogram(self.subspace_uplift_samples[i], 20, range=(-0.01, 0.01)) + # # # bin_index = np.argsort(hist)[-1] + # # # mode = (bins[bin_index] + bins[bin_index + 1]) / 2 + # # # fitter = modeling.fitting.LevMarLSQFitter() + # # # model = modeling.models.Gaussian1D() + # # # fitted_model = fitter(model, bins[1:], hist) + # # # mode = fitted_model.mean.value + # # # print(i, mode) + # # # density = stats.gaussian_kde(self.subspace_uplift_samples[i], weights=self.subspace_uplift_weights[i])(x) + # # # density = stats.gaussian_kde(self.subspace_uplift_samples[i])(x) + # # # mode = x[np.argsort(density)[-1]] + # # # print(i, mode) + # # # print(i, mu) + # # if (index < 0 or result.pvalue < lowest_pvalue) and not isnan(result.pvalue): + # # # if index < 0 or value < lowest_pvalue: + # # index = i + # # lowest_pvalue = result.pvalue + # # is_subspace = True + + # # # if result.pvalue > 0.95: + # # # index = i + # # # parameters = stats.norm.fit(self.subspace_uplift_samples[i]) + # # # (mu, _) = parameters + # # # if mu > base_mu: + # # # if index < 0 or mu > highest_mu: + # # # index = i + # # # highest_mu = mu + # # except Exception as e: + # # print(e) + # # pass + # # # print(e) + + # if index >= 0: + # if is_subspace: + # # print('subspace') + # self.knowns.append(index) + # print(self.knowns, best_pvalue) + # else: + # # print('flat') + # self.knowns.append(index) + # # self.layer_confidence[index_hash(self.knowns)] = confidence + # # num_terms = len(self.knowns) + # print(self.knowns, best_pvalue) + # print(base_coherence) + # self.add_layer() + # # if num_terms > self.num_terms: + # # self.stops = set() + # # self.num_terms = num_terms + # self.knowns = [] + # return + # else: + # self.knowns = [] + # # else: + # # self.knowns = [] + + # # if len(self.knowns) > 0: + # # # self.add_stop() + # # self.knowns = [] + # finally: + # fig, axs = plt.subplots(int(self.actual_N / 4), 4) + # x_eval = np.linspace(-1.0, 1.0, num=1000) + # for i in range(0, int(self.actual_N / 4)): + # for j in range(0, 4): + # # (hist, bins) = np.histogram(self.base_coherence_samples, self.num_bins, density=True) + # # # fitter = modeling.fitting.LevMarLSQFitter() + # # # model = modeling.models.Gaussian1D() + # # # fitted_model = fitter(model, bins[1:], hist) + # # # axs[i][j].scatter(bins[1:], hist, s=1, color='r', alpha=0.5) + # # # axs[i][j].plot(x_eval, fitted_model(x_eval), color='r') + + # # (hist, bins) = np.histogram(self.coherence_samples[i * 4 + j], self.num_bins, density=True) + # # # fitter = modeling.fitting.LevMarLSQFitter() + # # # model = modeling.models.Gaussian1D() + # # # fitted_model = fitter(model, bins[1:], hist) + # # axs[i][j].scatter(bins[1:], hist, s=1, color='g', alpha=0.5) + # # # axs[i][j].plot(x_eval, fitted_model(x_eval), color='g') + + # # (hist, bins) = np.histogram(self.subspace_uplift_samples[i * 4 + j], self.num_bins, density=True) + # # # fitter = modeling.fitting.LevMarLSQFitter() + # # # model = modeling.models.Gaussian1D() + # # # fitted_model = fitter(model, bins[1:], hist) + # # axs[i][j].scatter(bins[1:], hist, s=1, color='b', alpha=0.5) + # # # axs[i][j].plot(x_eval, fitted_model(x_eval), color='b') + + # # # kde0 = stats.gaussian_kde(self.base_coherence_samples) + # # kde1 = stats.gaussian_kde(self.coherence_samples[i * 4 + j]) + # # # kde2 = stats.gaussian_kde(self.subspace_uplift_samples[i * 4 + j], weights=self.subspace_uplift_weights[i]) + # # kde2 = stats.gaussian_kde(self.subspace_uplift_samples[i * 4 + j]) + # # # axs[i][j].plot(x_eval, kde0(x_eval), color='r') + # # axs[i][j].plot(x_eval, kde1(x_eval), color='g') + # # axs[i][j].plot(x_eval, kde2(x_eval), color='b') + # # # n, bins, patches = axs[i][j].hist(self.base_coherence_samples, 50, density=True, facecolor='r', alpha=0.5) + # # # n, bins, patches = axs[i][j].hist(self.coherence_samples[i * 4 + j], 50, density=True, facecolor='g', alpha=0.5) + # # # n, bins, patches = axs[i][j].hist(self.subspace_uplift_samples[i * 4 + j], 50, density=True, facecolor='b', alpha=0.5) + # # plt.show() + # # self.epoch = 0 + + # return + + # print('=====' + str(base_coherence)) + # print(self.uplifts) + # print(self.uplift_means) + # print(self.uplift_medians) + # print(self.uplift_stddevs) + # print(self.uplift_ranges) + # print(self.uplift_convergences) + # print(self.subspace_uplifts) + + if index >= 0: + self.knowns.append(index) + print(base_coherence) + print(self.knowns, self.epoch) + # print(self.uplift_medians) + # print(self.uplifts) + # print(self.subspace_uplifts) + self.add_layer() + self.uplifts.fill(0) + self.subspace_uplifts.fill(0) + self.uplift_medians.fill(0) + self.uplift_convergences.fill(0) + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.epoch = 0 + return + + if subspace_index >= 0: + self.knowns.append(subspace_index) + print(self.knowns, self.epoch) + # print(self.uplifts) + # print(self.subspace_uplifts) + self.uplifts.fill(0) + self.subspace_uplifts.fill(0) + self.uplift_medians.fill(0) + self.uplift_convergences.fill(0) + self.uplift_samples = [[] for _ in range(0, self.actual_N)] + self.epoch = 0 + return + + # print('======') + # print(self.epoch, base_coherence) + # print('======') + + # if len(self.candidate_pool) == 0: + # print(self.p) + + # for i in range(0, min(5, len(self.candidate_pool))): + # candidate = self.candidate_pool[i] + # print(candidate.id(), candidate.uplift) + + # if self.epoch < 15: + # return + + if self.candidate_pool[0].uplift > 0.3: + candidate = self.candidate_pool[0] + candidate_id = candidate.id() + self.candidate_ids.remove(candidate_id) + print(candidate_id) + self.knowns = candidate.indices + self.add_layer() + self.knowns = [] + self.reset_p() + self.epoch = 0 + self.candidate_pool = [] + self.candidate_ids = set() + elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200: + self.epoch = 0 + self.num_terms += 1 + self.candidate_pool = [] + self.candidate_ids = set() + self.knowns = [] + self.stops = set() + self.reset_p() + return + + # np.copyto(self.next_p, self.p) + for _ in range(0, self.num_candidates): + candidate = self.random_candidate() + if candidate is None: + continue + candidate_id = candidate.id() + if candidate_id in visited: + continue + visited.add(candidate_id) + if self.actual_N in candidate.indices: + continue + has_candidate = True + for i in range(0, len(self.inputs)): + self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i]) + # coherence = self.ring_coherence() + coherence = self.coherence() + # if coherence <= base_coherence: + # continue + # for index in candidate.indices: + # self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0) + # self.p_temp[index] += 0 + for index in candidate.indices: + if coherence > self.max_coherences[index]: + self.max_coherences[index] = coherence + self.max_candidates[index] = candidate + # self.max_coherences[index] = max(self.max_coherences[index], coherence) + # np.copyto(self.p, self.next_p) + + # np.copyto(self.p_temp, self.p) + for i in range(0, self.actual_N): + candidate = self.max_candidates[i] + if candidate is None: + continue + for index in candidate.indices: + self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0) + # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id()) + self.normalize_p() + # print(self.p) + + # np.subtract(self.p_temp, self.p, self.p_temp) + # np.abs(self.p_temp, self.p_temp) + # delta = np.sum(self.p_temp) / len(self.p_temp) + # print(delta, np.argmax(self.p)) + # np.copyto(self.p_temp, self.p) + # for i in range(0, len(self.p_temp)): + # self.p_temp[i] = round(self.p_temp[i] * 100) / 100 + # print(self.p_temp) + + index = np.argmax(self.p) + delta_over_null = self.p[index] - self.p[self.actual_N] + if self.epoch == 0: + self.average_delta_over_null = delta_over_null + else: + self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null + diff = self.num_terms - len(self.knowns) + + print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p)) + + # Always iterate for a minimum number of epochs + if self.epoch < 15: + return + if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300: + return + if self.average_delta_over_null < 0.001: + index = self.actual_N + else: + index = np.argmax(self.p) + + # index = np.argmax(self.p) + # if index == self.last_value: + # self.rounds += 1 + # else: + # self.rounds = 0 + # self.last_value = index + + # if self.rounds < 10 and self.epoch < 100: + # return + + # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50): + # return + + # index = np.argmax(self.p) + + # print(self.p) + # print(self.threshold()) + # print(self.p) + # index = self.get_converged_index() + if not index is None or not has_candidate: + # print(index, delta, np.argmax(self.p)) + self.epoch = 0 + if index == self.actual_N or not has_candidate: + if len(self.knowns) > 0: + self.add_stop() + self.knowns.pop() + print('Backtrack: ' + str(self.knowns)) + self.reset_p() + return + self.num_terms += 1 + self.knowns = [] + self.stops = set() + self.reset_p() + print(self.num_terms) + return + self.knowns.append(index) + # bisect.insort(self.knowns, index) + if len(self.knowns) == self.num_terms: + print('Add layer: ' + str(self.knowns)) + self.add_layer() + else: + print('Found term: ' + str(self.knowns)) + self.reset_p() + print(base_coherence) + return + + def cache_layers(self): + expr = 'def f(x):\n\tresult=0\n' + for layer in self.layers: + expr += '\tresult^=' + layer.eval_str() + '\n' + expr += '\treturn result\n' + scope = {} + exec(expr, scope) + return scope['f'] + +def main(): + probabilities = Probabilities() + # probabilities.knowns = [14] + # probabilities.add_layer() + # probabilities.knowns = [8] + # probabilities.add_layer() + # probabilities.knowns = [4] + # probabilities.add_layer() + while probabilities.num_terms <= probabilities.N: + probabilities.update() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations2.py b/mutations2.py new file mode 100644 index 0000000..80c59d4 --- /dev/null +++ b/mutations2.py @@ -0,0 +1,570 @@ +import hashlib +import math +from matplotlib import offsetbox +import numpy as np +import random +from struct import pack, pack_into, unpack_from +import secrets + +from numpy import hamming + +N = 32 +M = 2 + +def bit_at_index(buffer, index): + offset = (index >> 3) % len(buffer) + return buffer[offset] & (1 << (index & 0b111)) != 0 + +def count_one_bits(n): + return bin(n).count("1") + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def encode_f(f, buffer, offset=0): + (inverted, flips, child) = f + pack_into('I', buffer, offset, inverted) + offset += 4 + for index in flips: + pack_into('I', buffer, offset, 0) + offset += 4 + pack_into('I', buffer, offset, index) + offset += 4 + if child is None: + pack_into('I', buffer, offset, 1) + offset += 4 + return offset + (inverted, left, right) = child + pack_into('I', buffer, offset, 2 if not inverted else 3) + offset += 4 + offset = encode_f(left, buffer, offset) + offset = encode_f(right, buffer, offset) + return offset + +def generate_random_branch(p_mutation): + global N + + p_add_indices = p_mutation * random.random() + p_add_children = p_mutation * random.random() + + inverted = random.randint(0, 1) + indices = set() + children = [] + + # randomly add indices + while random.random() < p_add_indices and len(indices) < N: + available_indices = [i for i in range(0, N) if i not in indices] + if len(available_indices) == 1: + indices.add(available_indices[0]) + continue + indices.add(available_indices[random.randint(0, len(available_indices) - 1)]) + # randomly add children + while random.random() < p_add_children: + child_inverted = random.randint(0, 1) + left = generate_random_branch(p_add_children) + right = generate_random_branch(p_add_children) + children.append((child_inverted, left, right)) + return (inverted, indices, children) + +def mutate_f(f, p_mutation): + global N + (inverted, indices, children) = f + mutated_indices = set(indices) + mutated_children = children[:] + + p_invert = p_mutation * random.random() + p_drop_indices = p_mutation * random.random() + p_add_indices = p_mutation * random.random() + p_drop_children = p_mutation * random.random() + p_mutate_child = p_mutation * random.random() + p_clone_child = p_mutation * random.random() + p_invert_child = p_mutation * random.random() + p_add_children = p_mutation * random.random() + + # randomly invert + if random.random() < p_invert: + inverted ^= 1 + # randomly drop indices + while random.random() < p_drop_indices and len(mutated_indices) > 0: + mutated_indices.pop() + # randomly add indices + while random.random() < p_add_indices and len(mutated_indices) < N: + available_indices = [i for i in range(0, N) if i not in mutated_indices] + if len(available_indices) == 1: + mutated_indices.add(available_indices[0]) + continue + mutated_indices.add(available_indices[random.randint(0, len(available_indices) - 1)]) + # randomly drop children + while random.random() < p_drop_children and len(mutated_children) > 0: + if len(mutated_children) == 1: + del mutated_children[0] + break + del mutated_children[random.randint(0, len(mutated_children) - 1)] + # randomly clone children + while random.random() < p_clone_child and len(mutated_children) > 0: + index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1) + (child_inverted, left, right) = mutated_children[index] + if random.random() < p_invert_child: + child_inverted ^= 1 + clone = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation)) + mutated_children.append(clone) + # randomly mutate children + while random.random() < p_mutate_child and len(mutated_children) > 0: + index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1) + (child_inverted, left, right) = mutated_children[index] + if random.random() < p_invert_child: + child_inverted ^= 1 + mutated_children[index] = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation)) + # randomly add children + while random.random() < p_add_children: + child_inverted = random.randint(0, 1) + left = generate_random_branch(p_mutation) + right = generate_random_branch(p_mutation) + mutated_children.append((child_inverted, left, right)) + return (inverted, mutated_indices, mutated_children) + +def decode_f(buffer, mutate = False, offset = 0, skip_invert = False): + global N + inverted = 0 + if not skip_invert: + [inverted] = unpack_from('I', buffer, offset) + offset += 4 + # random invert + if mutate and random.random() < 0.01: + inverted ^= 1 + inverted &= 0b1 + flips = set() + # random add flip + while mutate and random.random() < 0.5 and len(flips) < N: + available_indices = [i for i in range(0, N) if i not in flips] + if len(available_indices) == 1: + flips.add(available_indices[0]) + continue + flips.add(available_indices[random.randint(0, len(available_indices) - 1)]) + while offset < len(buffer): + # random create branch + if mutate and random.random() < 0.01: + gate_inverted = random.randint(0, 1) + left = generate_random_branch() + (offset, right) = decode_f(buffer, mutate, offset, True) + return (offset, (inverted, flips, (gate_inverted, left, right))) + [opcode] = unpack_from('I', buffer, offset) + offset += 4 + opcode &= 0b11 + if opcode == 0: + [index] = unpack_from('I', buffer, offset) + offset += 4 + # random skip flip + if mutate and random.random() < 0.01: + continue + if index in flips: + flips.remove(index) + else: + flips.add(index) + elif opcode == 1: + return (offset, (inverted, flips, None)) + else: + (offset, left) = decode_f(buffer, mutate, offset) + (offset, right) = decode_f(buffer, mutate, offset) + gate_inverted = 0 if opcode == 2 else 1 + # random invert + if mutate and random.random() < 0.01: + gate_inverted ^= 1 + # random skip branch + if mutate and random.random() < 0.01: + return (offset, (inverted, flips, None)) + return (offset, (inverted, flips, (gate_inverted, left, right))) + return (offset, (inverted, [], None)) + +def generate_program(model, output_var='output'): + global N, M + (constant, indices, child) = model + + statement = 'multiply(' + np.array2string(indices, separator=',') + ', x, temp)\n\t' + statement += output_var + '=' + str(constant) + '+sum(temp)\n\t' + + if not child is None: + left_output = output_var + '0' + right_output = output_var + '1' + (left, right) = child + statement += generate_program(left, left_output) + statement += generate_program(right, right_output) + statement += output_var + '+=' + left_output + '*' + right_output + '\n\t' + statement += output_var + '%=' + str(M) + '\n\t' + return statement + +def compile(model): + program = 'def f(x, temp):\n\t' + generate_program(model) + 'return output' + scope = {'multiply': np.multiply, 'sum': np.sum} + exec(program, scope) + return scope['f'] + +def evaluate(model, x, value = 0): + (inverted, indices, children) = model + for i in indices: + if bit_at_index(x, i) != 0: + value ^= 1 + for child in children: + (child_inverted, left, right) = child + left = evaluate(left, x) + right = evaluate(right, x) + if left & right != child_inverted: + value ^= 1 + if inverted: + value ^= 1 + return value + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(N / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(x) + +def sha(v): + global M + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] % M + +def xor(x): + num_one_bits = 0 + for n in x: + num_one_bits += count_one_bits(n) + return num_one_bits % 2 + +def random_sample(m, n): + inputs = np.zeros((m, n)) + for i in range(0, m): + for j in range(0, n): + inputs[i][j] = random.randint(0, 1) + return inputs + +def update_sample(sample, index): + global N + for j in range(0, N): + sample[index][j] = random.randint(0, 1) + +def coherence(inputs, outputs, scratch): + coherences = [] + for i in range(0, len(inputs)): + x_a = inputs[i] + y_a = outputs[i] + numerator = 0 + denominator = 0 + for j in range(0, len(inputs)): + if i == j: + continue + x_b = inputs[j] + y_b = outputs[j] + distance = hamming_distance(x_a, x_b, scratch) + weight = 1.0 / (2 ** distance) + denominator += weight + if y_a == y_b: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def build_coherence_models(inputs, scratch): + coherence_models = [] + for i in range(0, len(inputs)): + x_a = inputs[i] + distances = [hamming_distance(x_a, inputs[j], scratch) for j in range(0, len(inputs))] + indices = sorted(range(len(distances)), key=lambda i: distances[i]) + lowest = -1 + denominator = 0 + components = [] + for index in range(0, len(indices)): + j = indices[index] + if distances[j] == 0: + continue + if lowest < 0: + lowest = distances[j] + distance = distances[j] - lowest + if distance >= 8: + break + weight = 2 ** -distance + denominator += weight + components.append((weight, j)) + coherence_models.append((denominator, components)) + return coherence_models + +def fast_coherence(coherence_models, outputs): + coherences = [] + for i in range(0, len(coherence_models)): + (denominator, components) = coherence_models[i] + numerator = 0 + for component in components: + (weight, j) = component + if outputs[i] == outputs[j]: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def score(f, sample, distances): + return coherence([(x, f(x) ^ y) for (x, y) in sample], distances) + +def compute_distances(inputs, distances, scratch): + for i in range(0, len(inputs)): + a = inputs[i] + for j in range(i, len(inputs)): + if i == j: + distances[i][j] = 0 + continue + b = inputs[j] + distance = 2 ** -hamming_distance(a, b, scratch) + distances[i][j] = distance + distances[j][i] = distance + +def update_distances(inputs, distances, i, scratch): + a = inputs[i] + for j in range(0, len(inputs)): + if i == j: + distances[i][j] = 0 + continue + b = inputs[j] + distance = 2 ** -hamming_distance(a, b, scratch) + distances[i][j] = distance + distances[j][i] = distance + +def evaluate_sample(model, sample, output): + stack = [model] + (_, _, _, root_scratch, _) = model + while len(stack) > 0: + layer = stack.pop() + (inverted, xors, child, scratch, touched) = layer + if child is None: + np.matmul(sample, xors, scratch) + np.mod(scratch, 2, scratch) + if inverted == 1: + np.logical_xor(1, scratch, scratch) + touched[0] = 1 + else: + (child_inverted, left, right) = child + (_, _, _, left_scratch, left_touched) = left + (_, _, _, right_scratch, right_touched) = right + if left_touched[0] and right_touched[0]: + np.multiply(left_scratch, right_scratch, output) + np.matmul(sample, xors, scratch) + np.mod(scratch, 2, scratch) + if inverted: + np.logical_xor(scratch, 1, scratch) + if child_inverted: + np.logical_xor(output, 1, output) + np.logical_xor(scratch, output, scratch) + touched[0] = 1 + else: + stack.insert(0, layer) + stack.insert(0, left) + stack.insert(0, right) + np.copyto(output, root_scratch) + reset_model(model) + +def reset_model(model): + stack = [model] + while len(stack) > 0: + layer = stack.pop() + (_, _, child, _, touched) = layer + touched[0] = 0 + if not child is None: + (_, left, right) = child + stack.append(left) + stack.append(right) + +def clone_model(model, p_mutation): + global N, M + + p_constant = p_mutation * random.random() + p_flip = p_mutation * random.random() + p_add_child = p_mutation * random.random() + p_drop_child = p_mutation * random.random() + + (constant, xors, child) = model + if random.random() < p_constant: + constant += random.randint(0, M - 1) + constant %= M + clone_xors = np.zeros((N,)) + np.copyto(clone_xors, xors) + for i in range(0, N): + if random.random() < p_flip: + offset = 1 if M == 2 else random.randint(1, M - 1) + clone_xors[i] += offset + clone_xors[i] %= M + if child is None: + if random.random() < p_add_child: + left = random_child(p_mutation) + right = random_child(p_mutation) + return (constant, clone_xors, (left, right)) + return (constant, clone_xors, None) + if random.random() < p_drop_child: + return (constant, clone_xors, None) + (left, right) = child + clone_left = clone_model(left, p_mutation) + clone_right = clone_model(right, p_mutation) + return (constant, clone_xors, (clone_left, clone_right)) + +def random_child(p_mutation): + global N, M + constant = random.randint(0, M - 1) + xors = np.zeros((N,)) + + p_flip = p_mutation * random.random() + p_child = p_mutation * random.random() + + index = random.randint(0, N - 1) + xors[index] = 1 if M == 2 else random.randint(1, M - 1) + for i in range(0, N): + if i != index and random.random() < p_flip: + xors[i] = 1 if M == 2 else random.randint(1, M - 1) + # if random.random() < p_child: + # left = random_child(p_mutation * random.random()) + # right = random_child(p_mutation * random.random()) + # return (constant, xors, (left, right)) + return (constant, xors, None) + +def null_candidate(): + global N + return (0, np.zeros((N,)), None) + +def size(model): + (_, xors, child) = model + xor_size = np.sum(xors) + if not child is None: + (left, right) = child + return xor_size + size(left) * size(right) + return xor_size + +def main(): + global N, M + epochs = 10000 + num_survivors = 100 + num_offspring = 10 + num_candidates = num_survivors + num_survivors * num_offspring + sample_size = 128 + eval_size = 100 + p_mutation = 0.5 + g = sha + current_generation = [null_candidate() for _ in range(0, num_candidates)] + + distances = np.zeros((sample_size, sample_size)) + output_equality = np.zeros((sample_size, sample_size)) + inputs = random_sample(sample_size, N) + scratch = np.zeros(N,) + # compute_distances(inputs, distances, scratch) + expected_outputs = np.zeros((sample_size,)) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + outputs = np.zeros((sample_size,)) + output_xor = np.zeros((sample_size,)) + ones = np.ones((sample_size,)) + numerators = np.zeros((sample_size,)) + denominators = np.zeros((sample_size,)) + coherences = np.zeros((sample_size,)) + np.matmul(ones, distances, denominators) + scores = np.zeros((num_candidates,)) + max_score = 0 + last_score = 0 + streak = 0 + + coherence_models = build_coherence_models(inputs, scratch) + + for epoch in range(0, epochs): + for i in range(0, num_candidates): + candidate = current_generation[i] + f = compile(candidate) + for j in range(0, sample_size): + outputs[j] = f(inputs[j], scratch) + np.subtract(outputs, expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + # for p in range(0, sample_size): + # for q in range(0, sample_size): + # m = int(output_xor[p]) + # n = int(output_xor[q]) + # distance = abs(m - n) + # if distance > M / 2: + # distance = M - distance + # distance /= (M / 2) + # distance **= 2 + # output_equality[p][q] = distance + # # output_equality[p][q] = 1 if m == n else 0 + # np.multiply(output_equality, distances, output_equality) + # np.matmul(ones, output_equality, numerators) + # np.divide(numerators, denominators, coherences) + # score = np.average(coherences) + score = fast_coherence(coherence_models, output_xor) + # if random.random() < 0.1: + # check = coherence(inputs, output_xor, scratch) + # if check - score > 1e-3: + # print('not equal') + scores[i] = score + + top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:] + survivors = [current_generation[index] for index in top_n] + + # f = lambda x: evaluate(current_generation[0], x) + # correct = 0 + # for i in range(0, eval_size): + # x = random_input() + # if f(x) == g(x): + # correct += 1 + + top_score = scores[top_n[-1]] + print(epoch, top_score, size(survivors[-1])) + if top_score <= max_score: + p_mutation += 0.01 + else: + p_mutation = 0.5 + max_score = top_score + + for i in range(0, num_survivors): + current_generation[i] = survivors[i] + + for i in range(0, num_survivors): + candidate = survivors[i] + for j in range(0, num_offspring): + index = num_survivors + j * num_survivors + i + current_generation[index] = clone_model(candidate, random.random()) + + # inputs = random_sample(sample_size, N) + # coherence_models = build_coherence_models(inputs, scratch) + # for i in range(0, sample_size): + # expected_outputs[i] = g(inputs[i]) + + # while random.random() < 0.5: + if last_score == top_score: + streak += 1 + else: + streak = 0 + if streak >= 4: + inputs = random_sample(sample_size, N) + coherence_models = build_coherence_models(inputs, scratch) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + # inputs = random_sample(sample_size, N) + # coherence_models = build_coherence_models(inputs, scratch) + # # compute_distances(inputs, distances, scratch) + # # np.matmul(ones, distances, denominators) + # for i in range(0, sample_size): + # expected_outputs[i] = g(inputs[i]) + # streak = 0 + # expected_outputs = np.zeros((sample_size,)) + # for i in range(0, sample_size): + # expected_outputs[i] = g(inputs[i]) + # index = random.randint(0, sample_size - 1) + # update_sample(inputs, index) + # expected_outputs[index] = g(inputs[index]) + # update_distances(inputs, distances, index, scratch) + # np.matmul(ones, distances, denominators) + last_score = top_score + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations20.py b/mutations20.py new file mode 100644 index 0000000..f172476 --- /dev/null +++ b/mutations20.py @@ -0,0 +1,316 @@ +import numpy as np +import random +import hashlib +import math + +def get_state_id(state): + return ','.join([str(x) for x in sorted(state)]) + +class Point(): + def __init__(self, x, y): + self.x = x + self.y = y + + def id(self): + return ''.join([str(x) for x in self.x]) + +class Influence(): + def __init__(self, a, b): + self.a = a + self.b = b + self.original_dof = set() + self.dof = set() + for i in range(0, len(a.x)): + if a.x[i] != b.x[i]: + self.original_dof.add(i) + self.dof.add(i) + self.coherent = self.a.y == self.b.y + + def coherent(self): + return self.a.y == self.b.y + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def hamming_distance(a, b, flips): + distance = 0 + for i in range(0, len(a.x)): + if i in flips: + continue + distance += 1 if a.x[i] != b.x[i] else 0 + return distance + +def random_x(N): + x = np.zeros((N)) + for i in range(0, N): + x[i] = random.randint(0, 1) + return x + +def xor(x): + return np.sum(x[16:]) % 2 + +def create_dof_map(influences): + dof_map = {} + for influence in influences: + for i in influence.dof: + if not i in dof_map: + dof_map[i] = [] + dof_map[i].append(influence) + return dof_map + +def flip(influences, i): + for influence in influences: + if i in influence.dof: + influence.a.y = int(influence.a.y) ^ 1 + +def remove_dof(dof_map, i, flip = False): + for influence in dof_map[i]: + influence.dof.remove(i) + if flip: + influence.a.y = int(influence.a.y) ^ 1 + # if len(influence.dof) == 0 and not influence.coherent(): + # raise Exception('Invalid') + del dof_map[i] + +def solve(dof_map, all_influences, all_samples): + eliminated = True + while eliminated: + eliminated = False + for influence in all_influences: + if len(influence.dof) == 1: + i = next(iter(influence.dof)) + if influence.coherent: + remove_dof(dof_map, i) + eliminated = True + else: + print('Forced', i) + remove_dof(dof_map, i, True) + eliminated = True + + lowest_dof = None + for influence in all_influences: + if not influence.coherent and len(influence.dof) > 1: + if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof): + lowest_dof = influence + + flip = None + highest_score = -1 + + for i in lowest_dof.dof: + per_point_scores = {} + i_influences = dof_map[i] + left = 0 + right = 0 + for influence in i_influences: + if not influence.a in per_point_scores: + per_point_scores[influence.a] = [0, 0] + if not influence.b in per_point_scores: + per_point_scores[influence.b] = [0, 0] + if influence.coherent: + per_point_scores[influence.a][0] += 1 + per_point_scores[influence.b][0] += 1 + left += 1 + else: + per_point_scores[influence.a][1] += 1 + per_point_scores[influence.b][1] += 1 + right += 1 + print(i, left / (left + right)) + num = 0 + denom = 0 + for _, score in per_point_scores.items(): + if score[0] == score[1]: + continue + print(i, score) + num += score[1] / (score[0] + score[1]) + denom += 1 + score = num / denom if denom > 0 else 0 + print(score) + + return None + + + # score_sum = 0 + # for j, j_influences in dof_map.items(): + # if j in lowest_dof.dof: + # continue + # double_score = 0 + # double_totals = [0, 0, 0, 0, 0, 0] + # for influence in i_influences: + # if influence in j_influences: + # weight = 1.0 / ((len(influence.dof) - 2) ** 2) + # if influence.coherent: + # double_score += weight + # double_totals[0] += 1 + # else: + # double_score -= weight + # double_totals[3] += 1 + # else: + # weight = 1.0 / ((len(influence.dof) - 1) ** 2) + # if influence.coherent: + # double_score -= weight + # double_totals[4] += 1 + # else: + # double_score += weight + # double_totals[1] += 1 + # for influence in j_influences: + # if influence in i_influences: + # continue + # weight = 1.0 / ((len(influence.dof) - 1) ** 2) + # if influence.coherent: + # double_score -= weight + # double_totals[5] += 1 + # else: + # double_score += weight + # double_totals[2] += 1 + + # score = double_score + # score_sum += score + # # print((i, j), score, single_totals, double_totals) + + # if flip is None or score_sum > highest_score: + # highest_score = score_sum + # flip = [i] + # print(i, score_sum) + + # if flip is None: + # return None + # print('Chose', flip, 'from', lowest_dof.dof, highest_score) + # for i in flip: + # remove_dof(dof_map, i, True) + # return flip + +def main(): + N = 32 + sample_size = 32 + p_dist = np.ones(N) + p_dist.fill(0.5) + epoch = 0 + + while True: + sample_ids = set() + samples = [] + + for i in range(0, sample_size): + x = random_x(N) + y = int(sha(x)) + p = Point(x, y) + p_id = p.id() + if p_id in sample_ids: + continue + sample_ids.add(p_id) + samples.append(p) + + influences = [] + for i in range(0, len(samples)): + a = samples[i] + for j in range(i + 1, len(samples)): + b = samples[j] + influences.append(Influence(a, b)) + + visited = set() + state = [] + + iterations = 0 + while sum([0 if influence.coherent else 1 for influence in influences]) > 0: + # if iterations > 5000: + # state = [] + # break + iterations += 1 + # print(state) + lowest_dof = None + num_influences = -1 + for influence in influences: + if influence.coherent: + continue + + if lowest_dof is not None and len(influence.dof) >= num_influences: + continue + + has_unvisited_state = False + for i in influence.dof: + state_id = get_state_id(state + [i]) + if state_id not in visited: + has_unvisited_state = True + break + + if not has_unvisited_state: + continue + + if lowest_dof is None or len(influence.dof) < num_influences: + lowest_dof = influence + num_influences = len(influence.dof) + + added = False + if lowest_dof is not None: + valid_choices = [] + for i in lowest_dof.dof: + state_id = get_state_id(state + [i]) + if state_id in visited: + continue + valid_choices.append(i) + + if len(valid_choices) > 0: + i = valid_choices[0] + if len(valid_choices) > 1: + p_partial = np.zeros(len(valid_choices)) + index = 0 + for j in valid_choices: + p_partial[index] = p_dist[j] + np.divide(p_partial, np.sum(p_partial), p_partial) + i = np.random.choice(valid_choices, p=p_partial) + + state_id = get_state_id(state + [i]) + visited.add(state_id) + state.append(i) + added = True + + revert = False + if added: + i = state[-1] + for influence in influences: + if i in influence.dof: + if len(influence.dof) == 1 and influence.coherent: + revert = True + influence.coherent = not influence.coherent + influence.dof.remove(i) + + if revert or not added: + if len(state) == 0: + break + i = state.pop(random.randrange(len(state))) + for influence in influences: + if i in influence.original_dof and not i in influence.dof: + influence.coherent = not influence.coherent + influence.dof.add(i) + + if len(state) > 0: + epoch += 1 + p_dist -= 0.0001 * (sample_size ** 2) + for i in state: + p_dist[i] += 0.0002 * (sample_size ** 2) + # sample_size += 1 + print(p_dist) + else: + # sample_size -= 1 + pass + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations21.py b/mutations21.py new file mode 100644 index 0000000..1221d83 --- /dev/null +++ b/mutations21.py @@ -0,0 +1,368 @@ +from cmath import isnan +import numpy as np +import random +import hashlib +import math + +def get_state_id(state): + return ','.join([str(x) for x in sorted(state)]) + +class Point(): + def __init__(self, x, y): + self.x = x + self.y = y + + def id(self): + return ','.join([str(int(x)) for x in self.x]) + +class Influence(): + def __init__(self, a, b): + self.a = a + self.b = b + self.original_dof = set() + self.dof = set() + for i in range(0, len(a.x)): + if a.x[i] != b.x[i]: + self.original_dof.add(i) + self.dof.add(i) + + def coherent(self): + return self.a.y == self.b.y + + def id(self): + return ','.join(sorted([self.a.id(), self.b.id()])) + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def decode(x, N): + index = 0 + output = np.zeros((N)) + while x > 0 and index < N: + output[index] = x & 0b1 + x >>= 1 + index += 1 + return output + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def hamming_distance(a, b): + return np.sum(np.logical_xor(a.x, b.x)) + +def random_x(N): + x = np.zeros((N)) + for i in range(0, N): + x[i] = random.randint(0, 1) + return x + +def xor(x): + # return sum(x[:4]) % 2 + return sum(x) % 2 + +def create_dof_map(influences): + dof_map = {} + for influence in influences: + for i in influence.dof: + if not i in dof_map: + dof_map[i] = [] + dof_map[i].append(influence) + return dof_map + +def flip(influences, i): + for influence in influences: + if i in influence.dof: + influence.a.y = int(influence.a.y) ^ 1 + +def remove_dof(dof_map, i, flip = False): + for influence in dof_map[i]: + influence.dof.remove(i) + if flip: + influence.a.y = int(influence.a.y) ^ 1 + # if len(influence.dof) == 0 and not influence.coherent(): + # raise Exception('Invalid') + del dof_map[i] + +def solve(dof_map, all_influences, all_samples): + eliminated = True + while eliminated: + eliminated = False + for influence in all_influences: + if len(influence.dof) == 1: + i = next(iter(influence.dof)) + if influence.coherent: + remove_dof(dof_map, i) + eliminated = True + else: + print('Forced', i) + remove_dof(dof_map, i, True) + eliminated = True + + lowest_dof = None + for influence in all_influences: + if not influence.coherent and len(influence.dof) > 1: + if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof): + lowest_dof = influence + + flip = None + highest_score = -1 + + for i in lowest_dof.dof: + per_point_scores = {} + i_influences = dof_map[i] + left = 0 + right = 0 + for influence in i_influences: + if not influence.a in per_point_scores: + per_point_scores[influence.a] = [0, 0] + if not influence.b in per_point_scores: + per_point_scores[influence.b] = [0, 0] + if influence.coherent: + per_point_scores[influence.a][0] += 1 + per_point_scores[influence.b][0] += 1 + left += 1 + else: + per_point_scores[influence.a][1] += 1 + per_point_scores[influence.b][1] += 1 + right += 1 + print(i, left / (left + right)) + num = 0 + denom = 0 + for _, score in per_point_scores.items(): + if score[0] == score[1]: + continue + print(i, score) + num += score[1] / (score[0] + score[1]) + denom += 1 + score = num / denom if denom > 0 else 0 + print(score) + + return None + + +# 1st row (n+1 choose k+1) * (1-(k mod 2)) +# psuedopascal to compute the follow-on rows +# assuming solvability, we want to maximize the probability that our current state and our state with +# a particular single flip are one order apart in the correct direction + + + +# 2, 0 +# 2, 2, 0 +# 2, 4, 2, 0 +# 2, 6, 6, 2, 0 +# 2, 8,12, 8, 2, 0 +# 2,10,20,20,10, 2, 0 + +# 3,-9,19,-33,51,-73,99 +# 3,-6,10,-14,18,-22,26 +# 3,-3, 4, -4, 4, -4, 4 +# 3, 0, 1, 0, 0, 0, 0 +# 3, 3, 1, 1, 0, 0, 0 +# 3, 6, 4, 2, 1, 0, 0 +# 3, 9,10, 6, 3, 1, 0 + +# 4, 0, 4, 0 +# 4, 4, 4, 4, 0 +# 4, 8, 8, 8, 4, 0 +# 4,12,16,16,12, 4, 0 + +# 5, 0,10, 0, 1 +# 5, 5,10,10, 1, 1 +# 5, +# 5, + + + +# 3 +# +# @1 [1, 2, 1] +# @2 [2, 2, 0] +# @3 [3, 0, 1] + +# 5 [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...) +# +# @1 [1, 4, 6, 4, 1], [4, 6, 4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2), +# @2 [2, 6, 6, 2, 0], [3, 4, 4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1) +# @3 [3, 6, 4, 2, 1], [2, 4, 6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1) +# @4 [4, 4, 4, 4, 0], [1, 6, 6, 1, 1] - 16, 15 - +# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 - + +# @0 [0.0, 0.0, 0.0, 0.0, 0.0] +# @1 [0.2, 0.4, 0.6, 0.8, 1.0] +# @2 [0.4, 0.6, 0.6, 0.4, 0.0] +# @3 [0.6, 0.6, 0.4, 0.4, 1.0] +# @4 [0.8, 0.4, 0.4, 0.8, 0.0] +# @5 [1.0, 0.0, 1.0, 0.0, 1.0] + +# 6 +# +# @1 [1, 5, 10, 10, 5, 1] +# @2 [2, 8, 12, 8, 2, 0] +# @3 [3, 9, 10, 6, 3, 1] +# @4 [4, 8, 8, 8, 4, 0] +# @5 [5, 5, 10, 10, 1, 1] +# @6 [6, 0, 20, 0, 6, 0] + +# last row, 1 if odd, 0 if even +# second to last, subtract 2 on odds, add 2 on evens + +def compute_distributions(N): + dist = np.zeros((N, N)) + for j in range(0, N): + dist[0][j] = math.comb(N - 1, j) + dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2)) + for i in range(1, N): + for j in range(0, i + 1): + dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2)) + for k in range(i + 1, N): + for j in reversed(range(0, k)): + dist[i][j+1] = dist[i][j] + dist[i][j+1] + for i in range(0, N): + for j in range(0, N): + denom = math.comb(N, j+1) + dist[i][j] /= denom + return dist + + +def main(): + N = 32 + sample_size = 2048 + sample_ids = set() + samples = [] + + dist = compute_distributions(N) + print(dist) + + for i in range(0, sample_size): + x = random_x(N) + y = int(xor(x)) + p = Point(x, y) + p_id = p.id() + if p_id in sample_ids: + continue + sample_ids.add(p_id) + samples.append(p) + + # for i in range(0, 2**N): + # x = decode(i, N) + # y = int(xor(x)) + # samples.append(Point(x,y)) + + base = np.zeros(N) + current = np.zeros(N) + + for _ in range(0, N): + lowest_err = -1 + use_flip = -1 + for flip in range(-1, N): + coherent_distances = {} + incoherent_distances = {} + all_coherent = True + for i in range(0, len(samples)): + a = samples[i] + for j in range(i + 1, len(samples)): + # if i == j: + # continue + b = samples[j] + distance = hamming_distance(a, b) + if distance not in coherent_distances: + coherent_distances[distance] = 0 + if distance not in incoherent_distances: + incoherent_distances[distance] = 0 + is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y) + if is_coherent: + coherent_distances[distance] += 1 + else: + incoherent_distances[distance] += 1 + all_coherent = False + if all_coherent: + print('Flip and halt', flip) + return + # print(coherent_distances, incoherent_distances) + + for k in range(0, N): + known_incoherence_at_k = dist[k] + err = 0 + # denom = 0 + for i in range(0, N): + if i not in coherent_distances: + continue + est_incoherence = incoherent_distances[i] / (coherent_distances[i] + incoherent_distances[i]) + confidence = 1.0 + # print(k, i, est_incoherence) + err += confidence * abs(est_incoherence - known_incoherence_at_k[i - 1])# / ((est_incoherence + known_incoherence_at_k[i - 1]) / 2) + # denom += 1 + # print(flip, k, err) + # err /= denom + if flip < 0: + base[k] = err + else: + current[k] = err + if flip >= 0: + # np.divide(current, np.max(current), current) + # print(flip, current) + index = -1 + base_sum = 0 + current_sum = 0 + base_total = 0 + current_total = 0 + for k in range(0, N): + if base[k] > 0: + base_sum += k / base[k] + base_total += 1.0 / base[k] + else: + base_sum += k * 1e6 + base_total += 1e6 + if current[k] > 0: + current_sum += k / current[k] + current_total += 1.0 / current[k] + else: + current_sum += k * 1e6 + current_total += 1e6 + # print(base_sum, base_total, current_sum, current_total) + # print(current_sum / current_total, base_sum / base_total) + rel_to_base = (current_sum / current_total) - (base_sum / base_total) + + # print(base_sum, base_total) + # print(base_sum / base_total, current_sum / current_total) + + # for k in range(0, N - 2): + # # err = base[k + 1] * current[k] * 1.0 / (base[k + 1] * current[k + 2]) + # err = base[k + 1] * current[k] + # if rel_to_base < 0 or err < rel_to_base: + # rel_to_base = err + # index = k + + if use_flip < 0 or rel_to_base < lowest_err: + lowest_err = rel_to_base + use_flip = flip + print(flip, rel_to_base) + else: + pass + # np.divide(base, np.max(base), base) + # print(flip, base) + + if lowest_err > 0: + return + print('Flip', use_flip, lowest_err) + for p in samples: + if p.x[use_flip]: + p.y ^= 1 + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations22.py b/mutations22.py new file mode 100644 index 0000000..a80dd18 --- /dev/null +++ b/mutations22.py @@ -0,0 +1,405 @@ +from cmath import isnan +import numpy as np +import random +import hashlib +import math + +def get_state_id(state): + return ','.join([str(x) for x in sorted(state)]) + +class Point(): + def __init__(self, x, y): + self.x = x + self.y = y + + def id(self): + return ','.join([str(int(x)) for x in self.x]) + +class Influence(): + def __init__(self, a, b): + self.a = a + self.b = b + self.original_dof = set() + self.dof = set() + for i in range(0, len(a.x)): + if a.x[i] != b.x[i]: + self.original_dof.add(i) + self.dof.add(i) + + def coherent(self): + return self.a.y == self.b.y + + def id(self): + return ','.join(sorted([self.a.id(), self.b.id()])) + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def decode(x, N): + index = 0 + output = np.zeros((N)) + while x > 0 and index < N: + output[index] = x & 0b1 + x >>= 1 + index += 1 + return output + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def hamming_distance(a, b): + return np.sum(np.logical_xor(a.x, b.x)) + +def random_x(N): + x = np.zeros((N)) + for i in range(0, N): + x[i] = random.randint(0, 1) + return x + +def xor(x): + # return sum(x[:4]) % 2 + return sum(x) % 2 + +def create_dof_map(influences): + dof_map = {} + for influence in influences: + for i in influence.dof: + if not i in dof_map: + dof_map[i] = [] + dof_map[i].append(influence) + return dof_map + +def flip(influences, i): + for influence in influences: + if i in influence.dof: + influence.a.y = int(influence.a.y) ^ 1 + +def remove_dof(dof_map, i, flip = False): + for influence in dof_map[i]: + influence.dof.remove(i) + if flip: + influence.a.y = int(influence.a.y) ^ 1 + # if len(influence.dof) == 0 and not influence.coherent(): + # raise Exception('Invalid') + del dof_map[i] + +def solve(dof_map, all_influences, all_samples): + eliminated = True + while eliminated: + eliminated = False + for influence in all_influences: + if len(influence.dof) == 1: + i = next(iter(influence.dof)) + if influence.coherent: + remove_dof(dof_map, i) + eliminated = True + else: + print('Forced', i) + remove_dof(dof_map, i, True) + eliminated = True + + lowest_dof = None + for influence in all_influences: + if not influence.coherent and len(influence.dof) > 1: + if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof): + lowest_dof = influence + + flip = None + highest_score = -1 + + for i in lowest_dof.dof: + per_point_scores = {} + i_influences = dof_map[i] + left = 0 + right = 0 + for influence in i_influences: + if not influence.a in per_point_scores: + per_point_scores[influence.a] = [0, 0] + if not influence.b in per_point_scores: + per_point_scores[influence.b] = [0, 0] + if influence.coherent: + per_point_scores[influence.a][0] += 1 + per_point_scores[influence.b][0] += 1 + left += 1 + else: + per_point_scores[influence.a][1] += 1 + per_point_scores[influence.b][1] += 1 + right += 1 + print(i, left / (left + right)) + num = 0 + denom = 0 + for _, score in per_point_scores.items(): + if score[0] == score[1]: + continue + print(i, score) + num += score[1] / (score[0] + score[1]) + denom += 1 + score = num / denom if denom > 0 else 0 + print(score) + + return None + + +# 1st row (n+1 choose k+1) * (1-(k mod 2)) +# psuedopascal to compute the follow-on rows +# assuming solvability, we want to maximize the probability that our current state and our state with +# a particular single flip are one order apart in the correct direction + + + +# 2, 0 +# 2, 2, 0 +# 2, 4, 2, 0 +# 2, 6, 6, 2, 0 +# 2, 8,12, 8, 2, 0 +# 2,10,20,20,10, 2, 0 + +# 3,-9,19,-33,51,-73,99 +# 3,-6,10,-14,18,-22,26 +# 3,-3, 4, -4, 4, -4, 4 +# 3, 0, 1, 0, 0, 0, 0 +# 3, 3, 1, 1, 0, 0, 0 +# 3, 6, 4, 2, 1, 0, 0 +# 3, 9,10, 6, 3, 1, 0 + +# 4, 0, 4, 0 +# 4, 4, 4, 4, 0 +# 4, 8, 8, 8, 4, 0 +# 4,12,16,16,12, 4, 0 + +# 5, 0,10, 0, 1 +# 5, 5,10,10, 1, 1 +# 5, +# 5, + + + +# 3 +# +# @1 [1, 2, 1] +# @2 [2, 2, 0] +# @3 [3, 0, 1] + +# 5 [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...) +# +# @1 [1, 4, 6, 4, 1], [4, 6, 4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2), +# @2 [2, 6, 6, 2, 0], [3, 4, 4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1) +# @3 [3, 6, 4, 2, 1], [2, 4, 6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1) +# @4 [4, 4, 4, 4, 0], [1, 6, 6, 1, 1] - 16, 15 - +# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 - + +# @0 [0.0, 0.0, 0.0, 0.0, 0.0] +# @1 [0.2, 0.4, 0.6, 0.8, 1.0] +# @2 [0.4, 0.6, 0.6, 0.4, 0.0] +# @3 [0.6, 0.6, 0.4, 0.4, 1.0] +# @4 [0.8, 0.4, 0.4, 0.8, 0.0] +# @5 [1.0, 0.0, 1.0, 0.0, 1.0] + +# 6 +# +# @1 [1, 5, 10, 10, 5, 1] +# @2 [2, 8, 12, 8, 2, 0] +# @3 [3, 9, 10, 6, 3, 1] +# @4 [4, 8, 8, 8, 4, 0] +# @5 [5, 5, 10, 10, 1, 1] +# @6 [6, 0, 20, 0, 6, 0] + +# last row, 1 if odd, 0 if even +# second to last, subtract 2 on odds, add 2 on evens + +def compute_distributions(N): + dist = np.zeros((N, N)) + for j in range(0, N): + dist[0][j] = math.comb(N - 1, j) + dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2)) + for i in range(1, N): + for j in range(0, i + 1): + dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2)) + for k in range(i + 1, N): + for j in reversed(range(0, k)): + dist[i][j+1] = dist[i][j] + dist[i][j+1] + print(dist) + for i in range(0, N): + for j in range(0, N): + denom = math.comb(N, j+1) + dist[i][j] /= denom + return dist + +def raised_cosine(x, u, s): + if x < (u - s): + return 0 + if x > (u + s): + return 0 + return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s)) + +def average_index(x): + total = 0 + for k in range(0, len(x)): + total += k * x[k] + return total / np.sum(x) + +# 8, 32, 2^5 +# 10, 64, 2^6 +# 12, 128, 2^7 +# 14, 256, 2^8 +# 16, 512, 2^9 +# 18, 1024, 2^10 +# 20, 2048, 2^11 +# 22, 4096, 2^12 +def main(): + N = 16 + sample_size = 128 + sample_ids = set() + samples = [] + + dist = compute_distributions(N) + print(dist) + + for i in range(0, sample_size): + x = random_x(N) + y = int(xor(x)) + p = Point(x, y) + p_id = p.id() + if p_id in sample_ids: + continue + sample_ids.add(p_id) + samples.append(p) + total_sample_count = len(samples) + + # for i in range(0, 2**N): + # x = decode(i, N) + # y = int(xor(x)) + # samples.append(Point(x,y)) + + base = np.zeros(N) + current = np.zeros(N) + cumulative_probability = np.ones(N) + + for _ in range(0, N): + lowest_err = -1 + use_flip = -1 + for flip in range(-1, N): + coherent_distances = np.zeros(N+1) + incoherent_distances = np.zeros(N+1) + all_coherent = True + for i in range(0, len(samples)): + a = samples[i] + for j in range(0, len(samples)): + b = samples[j] + distance = hamming_distance(a, b) + is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y) + if is_coherent: + coherent_distances[distance] += 1 + else: + incoherent_distances[distance] += 1 + all_coherent = False + if all_coherent: + print('Flip and halt', flip) + return + # print(coherent_distances, incoherent_distances) + + # print(coherent_distances, incoherent_distances) + est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances)) + # print(est_incoherence) + + for k in range(0, N): + known_incoherence_at_k = dist[k] + err = 0 + # denom = 0 + probability = 1.0 + for i in range(1, N + 1): + if isnan(est_incoherence[i]): + continue + sample_size = coherent_distances[i] + incoherent_distances[i] + full_size = math.comb(N, i) * (2 ** N) + num_unknowns = full_size - sample_size + min_true_value = incoherent_distances[i] / full_size + max_true_value = (incoherent_distances[i] + num_unknowns) / full_size + s = max(abs(est_incoherence[i] - min_true_value), abs(est_incoherence[i] - max_true_value)) + u = est_incoherence[i] + known_incoherence = known_incoherence_at_k[i - 1] + err = raised_cosine(known_incoherence, u, s) + probability *= err + + # print(k, i, min_true_value, max_true_value) + + # confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative + # err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence + # denom += 1 + # print(flip, k, err) + # err /= denom + if flip < 0: + base[k] = probability + else: + current[k] = probability + + if flip >= 0: + if np.sum(current) == 0: + continue + np.divide(current, np.sum(current), current) + # print(current) + # temp = np.roll(cumulative_probability, -1) + # temp[-1] = 1.0 + # np.multiply(current, temp, current) + # np.divide(current, np.sum(current), current) + p_forward = 0 + p_backward = 0 + for i in range(1, N): + p_forward += cumulative_probability[i] * current[i - 1] + for i in range(0, N - 1): + p_backward += cumulative_probability[i] * current[i + 1] + + # base_index = average_index(cumulative_probability) + # new_index = average_index(current) + # if isnan(new_index): + # continue + # np.divide(current, np.sum(current), current) + # np.subtract(1, current, current) + print(flip,p_forward,p_backward,current) + delta = p_forward - p_backward + if use_flip < 0 or delta > lowest_err: + use_flip = flip + lowest_err = delta + + # for k in range(0, N - 1): + # value = current[k] * cumulative_probability[k + 1] + # if use_flip < 0 or value > lowest_err: + # use_flip = flip + # lowest_err = value + # print(flip, highest_value) + else: + np.divide(base, np.sum(base), base) + # np.subtract(1, base, base) + # print(cumulative_probability) + cumulative_probability = np.roll(cumulative_probability, -1) + cumulative_probability[-1] = 1.0 + # print(cumulative_probability) + # print(base) + np.multiply(base, cumulative_probability, cumulative_probability) + np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability) + print(cumulative_probability) + + if use_flip < 0: + return + + print('Flip', use_flip, lowest_err) + for p in samples: + if p.x[use_flip]: + p.y ^= 1 + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations23.py b/mutations23.py new file mode 100644 index 0000000..756c993 --- /dev/null +++ b/mutations23.py @@ -0,0 +1,761 @@ +from cmath import isnan +import numpy as np +import random +import hashlib +import math + +def get_state_id(state): + return ','.join([str(x) for x in sorted(state)]) + +class Point(): + def __init__(self, x, y): + self.x = x + self.y = y + + def id(self): + return ','.join([str(int(x)) for x in self.x]) + +class Influence(): + def __init__(self, a, b): + self.a = a + self.b = b + self.original_dof = set() + self.dof = set() + for i in range(0, len(a.x)): + if a.x[i] != b.x[i]: + self.original_dof.add(i) + self.dof.add(i) + + def coherent(self): + return self.a.y == self.b.y + + def id(self): + return ','.join(sorted([self.a.id(), self.b.id()])) + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def decode(x, N): + index = 0 + output = np.zeros((N)) + while x > 0 and index < N: + output[index] = x & 0b1 + x >>= 1 + index += 1 + return output + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def hamming_distance(a, b): + return np.sum(np.logical_xor(a.x, b.x)) + +def random_x(N): + x = np.zeros((N)) + for i in range(0, N): + x[i] = random.randint(0, 1) + return x + +def xor(x): + # return sum(x) % 2 + half = int(len(x) / 2) + return sum(x[:half]) % 2 + +def create_dof_map(influences): + dof_map = {} + for influence in influences: + for i in influence.dof: + if not i in dof_map: + dof_map[i] = [] + dof_map[i].append(influence) + return dof_map + +def flip(influences, i): + for influence in influences: + if i in influence.dof: + influence.a.y = int(influence.a.y) ^ 1 + +def remove_dof(dof_map, i, flip = False): + for influence in dof_map[i]: + influence.dof.remove(i) + if flip: + influence.a.y = int(influence.a.y) ^ 1 + # if len(influence.dof) == 0 and not influence.coherent(): + # raise Exception('Invalid') + del dof_map[i] + +def solve(dof_map, all_influences, all_samples): + eliminated = True + while eliminated: + eliminated = False + for influence in all_influences: + if len(influence.dof) == 1: + i = next(iter(influence.dof)) + if influence.coherent: + remove_dof(dof_map, i) + eliminated = True + else: + print('Forced', i) + remove_dof(dof_map, i, True) + eliminated = True + + lowest_dof = None + for influence in all_influences: + if not influence.coherent and len(influence.dof) > 1: + if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof): + lowest_dof = influence + + flip = None + highest_score = -1 + + for i in lowest_dof.dof: + per_point_scores = {} + i_influences = dof_map[i] + left = 0 + right = 0 + for influence in i_influences: + if not influence.a in per_point_scores: + per_point_scores[influence.a] = [0, 0] + if not influence.b in per_point_scores: + per_point_scores[influence.b] = [0, 0] + if influence.coherent: + per_point_scores[influence.a][0] += 1 + per_point_scores[influence.b][0] += 1 + left += 1 + else: + per_point_scores[influence.a][1] += 1 + per_point_scores[influence.b][1] += 1 + right += 1 + print(i, left / (left + right)) + num = 0 + denom = 0 + for _, score in per_point_scores.items(): + if score[0] == score[1]: + continue + print(i, score) + num += score[1] / (score[0] + score[1]) + denom += 1 + score = num / denom if denom > 0 else 0 + print(score) + + return None + + +# 1st row (n+1 choose k+1) * (1-(k mod 2)) +# psuedopascal to compute the follow-on rows +# assuming solvability, we want to maximize the probability that our current state and our state with +# a particular single flip are one order apart in the correct direction + + + +# 2, 0 +# 2, 2, 0 +# 2, 4, 2, 0 +# 2, 6, 6, 2, 0 +# 2, 8,12, 8, 2, 0 +# 2,10,20,20,10, 2, 0 + +# 3,-9,19,-33,51,-73,99 +# 3,-6,10,-14,18,-22,26 +# 3,-3, 4, -4, 4, -4, 4 +# 3, 0, 1, 0, 0, 0, 0 +# 3, 3, 1, 1, 0, 0, 0 +# 3, 6, 4, 2, 1, 0, 0 +# 3, 9,10, 6, 3, 1, 0 + +# 4, 0, 4, 0 +# 4, 4, 4, 4, 0 +# 4, 8, 8, 8, 4, 0 +# 4,12,16,16,12, 4, 0 + +# 5, 0,10, 0, 1 +# 5, 5,10,10, 1, 1 +# 5, +# 5, + + + +# 3 +# +# @1 [1, 2, 1] +# @2 [2, 2, 0] +# @3 [3, 0, 1] + +# 5 [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...) +# +# @1 [1, 4, 6, 4, 1], [4, 6, 4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2), +# @2 [2, 6, 6, 2, 0], [3, 4, 4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1) +# @3 [3, 6, 4, 2, 1], [2, 4, 6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1) +# @4 [4, 4, 4, 4, 0], [1, 6, 6, 1, 1] - 16, 15 - +# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 - + +# @0 [0.0, 0.0, 0.0, 0.0, 0.0] +# @1 [0.2, 0.4, 0.6, 0.8, 1.0] +# @2 [0.4, 0.6, 0.6, 0.4, 0.0] +# @3 [0.6, 0.6, 0.4, 0.4, 1.0] +# @4 [0.8, 0.4, 0.4, 0.8, 0.0] +# @5 [1.0, 0.0, 1.0, 0.0, 1.0] + +# 6 +# +# @1 [1, 5, 10, 10, 5, 1] +# @2 [2, 8, 12, 8, 2, 0] +# @3 [3, 9, 10, 6, 3, 1] +# @4 [4, 8, 8, 8, 4, 0] +# @5 [5, 5, 10, 10, 1, 1] +# @6 [6, 0, 20, 0, 6, 0] + +# last row, 1 if odd, 0 if even +# second to last, subtract 2 on odds, add 2 on evens + +def compute_pseudopascal(N): + dist = np.zeros((N, N)) + for j in range(0, N): + dist[0][j] = math.comb(N - 1, j) + dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2)) + for i in range(1, N): + for j in range(0, i + 1): + dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2)) + for k in range(i + 1, N): + for j in reversed(range(0, k)): + dist[i][j+1] = dist[i][j] + dist[i][j+1] + return dist + +def compute_distributions(N): + dist = compute_pseudopascal(N) + print(dist) + for i in range(0, N): + for j in range(0, N): + denom = math.comb(N, j+1) + dist[i][j] /= denom + return dist + +def confusion_probabilities(N, samples): + sample_sizes = np.zeros(N) + for i in range(0, len(samples)): + a = samples[i] + for j in range(0, len(samples)): + b = samples[j] + if i == j: + continue + distance = hamming_distance(a, b) + sample_sizes[distance - 1] += 1 + + confusion = np.zeros((N, N)) + dist = compute_pseudopascal(N) + np.multiply(dist, 2 ** N, dist) + # These are the probabilities that we might mix up any two orders given a particular sample size + for i in range(0, N): + for j in range(0, N): + probability = 1.0 + for k in range(0, N): + full_size = math.comb(N, k+1) * (2 ** N) + sample_size = sample_sizes[k] + num_unknowns = full_size - sample_size + i_incoherent = dist[i][k] + # Worst case, we sample only the coherent points, + i_min = max(i_incoherent - num_unknowns, 0) / full_size + i_max = min(sample_size, i_incoherent) / full_size + u = i_min + i_max / 2 + s = (i_max - i_min) / 2 + probability *= raised_cosine(dist[j][k] / full_size, u, s) + confusion[i][j] = probability + return confusion + +def raised_cosine(x, u, s): + if x < (u - s): + return 0 + if x > (u + s): + return 0 + return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s)) + +# Probability of getting k red balls after drawing n from a bag with m total balls and j red balls in it +# (n choose k) * p^k * (1-p)^(n-k) + +# p/m chance of getting a red ball +# (1 - p/m) chance of not getting a red ball + +# One way (p/m) * ((p-1)/(m-1)) * ((p-2)/(m-2)) +# (1 - (p/m)) + +def p_bernoulli(n, k, m, j): + probabilities = np.zeros((n + 1, n + 1)) + probabilities.fill(-1) + # if n == k: + # return 1.0 + # if k > p: + # return 0.0 + stack = [(0,0)] + while len(stack) > 0: + (a, b) = stack.pop() + if a + b == n: + probabilities[a][b] = 1 if a == k else 0 + elif a > j: + probabilities[a][b] = 0 + elif b > (m - j): + probabilities[a][b] = 0 + else: + p_left = probabilities[a + 1][b] + p_right = probabilities[a][b + 1] + if p_left >= 0 and p_right >= 0: + p = (j - a) / (m - a - b) + probabilities[a][b] = p_left * p + p_right * (1 - p) + else: + stack.append((a, b)) + if p_left < 0: + stack.append((a + 1, b)) + if p_right < 0: + stack.append((a, b + 1)) + return probabilities[0][0] + + # P = 1.0 + # p_k = 0 + # p_nk = 0 + # for i in range(1, k + 1): + # P *= (n + 1 - i) / i + # while P > 1.0 and p_k < k: + # P *= p + # p_k += 1 + # while P > 1.0 and p_nk < (n - k): + # P *= (1 - p) + # p_nk += 1 + # while p_k < k: + # P *= p + # p_k += 1 + # while (p_nk < (n - k)): + # P *= (1 - p) + # p_nk += 1 + # return P + +def average_index(x): + total = 0 + for k in range(0, len(x)): + total += k * x[k] + return total / np.sum(x) + +def compute_cumulative_probability(N, bases, p_n): + # p_n = np.zeros(N) + # p_n.fill(0.5) + states = [[]] + flips = set() + for i in range(1, len(bases)): + # (base, _) = bases[i] + (_, flip) = bases[i] + # p_forward = 0 + # p_backward = 0 + # for k in range(0, N - 1): + # p_forward += base[k + 1] * next_p[k] + # p_backward += base[k] * next_p[k + 1] + if flip in flips: + # p_n[flip] -= p_forward + # p_n[flip] += p_backward + flips.remove(flip) + else: + # p_n[flip] += p_forward + # p_n[flip] -= p_backward + flips.add(flip) + states.append(flips.copy()) + # np.clip(p_n, 0, 1, p_n) + # print('Contribution probabilities', p_n) + + min_p_n = np.min(p_n) + max_p_n = np.max(p_n) + + + p_k = np.zeros(N) + for k in range(0, N): + stack = [(k, len(bases) - 1)] + probabilities = np.zeros((N, len(bases))) + probabilities.fill(-1) + while len(stack) > 0: + (i, base_index) = stack.pop() + (base, flip) = bases[base_index] + if base_index == 0: + probabilities[i, 0] = base[i] + else: + left = i - 1 + right = i + 1 + state = states[base_index - 1] + p_flip = max(min(p_n[flip] + 0.5, 1.0), 0) + if flip in state: + p_flip = 1 - p_flip + p_left = probabilities[left, base_index - 1] if left >= 0 else 0 + p_right = probabilities[right, base_index - 1] if right < N else 0 + if p_left >= 0 and p_right >= 0: + probabilities[i, base_index] = base[i] * p_left * (1 - p_flip) + base[i] * p_right * p_flip + else: + stack.append((i, base_index)) + if p_left < 0: + stack.append((left, base_index - 1)) + if p_right < 0: + stack.append((right, base_index - 1)) + p_k[k] = probabilities[k][-1] + np.divide(p_k, np.sum(p_k), p_k) + return p_k + +# 8, 32, 2^5 +# 10, 64, 2^6 +# 12, 128, 2^7 +# 14, 256, 2^8 +# 16, 512, 2^9 +# 18, 1024, 2^10 +# 20, 2048, 2^11 +# 22, 4096, 2^12 +def main(): + N = 8 + sample_size = 16 + sample_ids = set() + samples = [] + + dist = compute_pseudopascal(N) + print(dist) + + for i in range(0, sample_size): + x = random_x(N) + y = int(xor(x)) + p = Point(x, y) + p_id = p.id() + if p_id in sample_ids: + continue + sample_ids.add(p_id) + samples.append(p) + # confusion = confusion_probabilities(N, samples) + # print(confusion) + # return + + # for i in range(0, 2**N): + # x = decode(i, N) + # y = int(xor(x)) + # samples.append(Point(x,y)) + + base = np.zeros(N) + current = np.zeros(N) + cumulative_probability = np.ones(N) + flip_likelihood = np.zeros(N) + cumulative_deltas = np.zeros(N) + direction = -1 + flips = set() + bases = [] + last_flip = -1 + + for _ in range(0, 2 ** N): + lowest_err = -1 + use_flip = -1 + for flip in range(-1, N): + coherent_distances = np.zeros((len(samples), N+1)) + incoherent_distances = np.zeros((len(samples), N+1)) + all_coherent = True + for i in range(0, len(samples)): + a = samples[i] + for j in range(0, len(samples)): + b = samples[j] + distance = hamming_distance(a, b) + is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y) + if is_coherent: + coherent_distances[i][distance] += 1 + else: + incoherent_distances[i][distance] += 1 + all_coherent = False + if all_coherent: + print('Flip and halt', flip) + return + # print(coherent_distances, incoherent_distances) + + # print(coherent_distances, incoherent_distances) + # est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances)) + # print(est_incoherence) + + probability = np.ones(N) + np.divide(probability, np.sum(probability), probability) + components = [] + for i in range(0, len(samples)): + for j in range(1, N + 1): + p_k = np.zeros(N) + # confusion = np.zeros((N, N)) + n = coherent_distances[i][j] + incoherent_distances[i][j] + if n == 0: + continue + a = incoherent_distances[i][j] + t = math.comb(N, j) + # for k in range(0, N): + # p = dist[k][j - 1] + # a_ideal = round(p * n / t) + # # base_prob = p_bernoulli(int(n), a_ideal, t, int(p)) + # for q in range(0, N): + # u = dist[q][j - 1] + # p_ratio = p / t + # u_ratio = u / t + # confusion[k][q] = p_bernoulli(int(n), a_ideal, t, int(u)) + # np.divide(confusion, np.max(confusion, axis=0), confusion) + + for k in range(0, N): + p = dist[k][j - 1] + a_ideal = round(p * n / t) + # How likely are we to correctly identify an ideal sample? + # for q in range(0, N): + p_ideal = p_bernoulli(int(n), a_ideal, t, int(p)) + # P = math.comb(int(n), int(a)) * math.pow(p, int(a)) * math.pow(1 - p, int(n - a)) + p_k[k] = p_bernoulli(int(n), int(a), t, int(p))# * (n / t) + # p_bernoulli(int(n), int(a), math.comb(N, j), int(p)) + # probability *= P + components.append(p_k) + np.divide(p_k, np.sum(p_k), p_k) + np.multiply(probability, p_k, probability) + np.divide(probability, np.sum(probability), probability) + + # p_cross_k is the probability that we correctly identified at k + # plus the probabilities that we missidentify at q and it is actually k + + # probability of drawing from sample k = p_bernoulli + + # p_cross_k = np.zeros(N) + # for k in range(0, N): + # for q in range(0, N): + # p_cross_k[k] += p_k[q] * confusion[k][q] + # if k == q: + # continue + # p_cross_k[k] += (1 - p_k[k]) * p_k[q] * confusion[k][q] + # p_cross_k[k] -= (1 - p_k[q]) * p_k[k] * confusion[q][k] + + # if q == k: + # continue + # p_cross_k[k] += (1 - p_k[k]) * p_k[q] * confusion[k][q] + # p_cross_k[k] -= (1 - p_k[k]) + # p_cross_k[k] -= p_k[k] * (1 - confusion[k][k]) * confusion[q][k] + + + # for k in range(0, N): + # P = p_k[k] + # for m in range(0, N): + # if m == k: + # continue + # if p_k[m] == 0: + # continue + # P /= p_k[m] + # p_cross_k[k] = P + # min_value = np.min(p_cross_k) + # np.subtract(p_cross_k, min_value, p_cross_k) + # np.add(probability, p_cross_k, probability) + # total = np.sum(p_k) + # if total > 0: + # np.divide(p_k, total, p_k) + # np.multiply(p_k, probability, probability) + # np.divide(probability, np.sum(probability), probability) + # print(probability) + + + np.divide(probability, np.sum(probability), probability) + if flip < 0: + np.copyto(base, probability) + else: + np.copyto(current, probability) + + + # print(k, i, min_true_value, max_true_value) + + # confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative + # err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence + # denom += 1 + # print(flip, k, err) + # err /= denom + # if flip < 0: + # base[k] = probability + # else: + # current[k] = probability + + if flip >= 0: + if np.sum(current) == 0: + continue + np.divide(current, np.sum(current), current) + + base_mean_index = average_index(base) + base_variance = 0 + for i in range(0, N): + base_variance += base[i] * (base_mean_index - i) ** 2 + base_err = 0 + norm = np.zeros(N) + for i in range(0, N): + norm[i] = 1 / (base_variance * math.sqrt(2 * math.pi)) * math.exp(-1 / 2 * ((i - base_mean_index) / base_variance) ** 2) + np.divide(norm, np.sum(norm), norm) + for i in range(0, N): + base_err += (base[i] - norm[i]) ** 2 + + current_mean_index = average_index(current) + current_variance = 0 + for i in range(0, N): + current_variance += current[i] * (current_mean_index - i) ** 2 + current_err = 0 + for i in range(0, N): + norm[i] = 1 / (current_variance * math.sqrt(2 * math.pi)) * math.exp(-1 / 2 * ((i - current_mean_index) / current_variance) ** 2) + np.divide(norm, np.sum(norm), norm) + for i in range(0, N): + current_err += (current[i] - norm[i]) ** 2 + + delta = abs(1 - (base_mean_index - current_mean_index)) + print(flip, current) + print('Mean', current_mean_index, base_mean_index) + print('Variance', current_variance, base_variance) + print('Err', current_err, base_err) + score = current_variance + + # base_score = 0 + # for i in range(0, N): + # base_score += (base[round(base_mean_index)] - base[i]) ** 2 + + # score = 0 + # for i in range(0, N): + # score += (current[round(current_mean_index)] - current[i]) ** 2 + # print('Score', score, base_score) + + # print(current) + # temp = np.roll(cumulative_probability, -1) + # temp[-1] = 1.0 + # np.multiply(current, temp, current) + # np.divide(current, np.sum(current), current) + # p_forward = 0 + # p_backward = 0 + # for i in range(1, N): + # p_forward += base[i] * current[i - 1] + # for i in range(0, N - 1): + # p_backward += base[i] * current[i + 1] + # scale = 0.01 + # if flip in flips: + # flip_likelihood[flip] += scale * p_backward + # flip_likelihood[flip] -= scale * p_forward + # else: + # flip_likelihood[flip] -= scale * p_backward + # flip_likelihood[flip] += scale * p_forward + # delta = p_forward - p_backward + # print(flip, current, p_forward, p_backward) + # base_index = average_index(base) + # current_index = average_index(current) + # err = abs(1 - (base_index - current_index)) + # print(base_index, current_index, err) + + # base_index = average_index(cumulative_probability) + # new_index = average_index(current) + # if isnan(new_index): + # continue + # np.divide(current, np.sum(current), current) + # np.subtract(1, current, current) + # print(flip,p_forward,p_backward,current) + if use_flip < 0 or delta < lowest_err: + use_flip = flip + lowest_err = score + + # cumulative_deltas[flip] += 0 + + # for k in range(0, N - 1): + # value = current[k] * cumulative_probability[k + 1] + # if use_flip < 0 or value > lowest_err: + # use_flip = flip + # lowest_err = value + # print(flip, highest_value) + else: + # p_next = np.zeros(N) + # for i in range(0, N): + # P = 0.0 + # for j in range(0, N): + # if i == j: + # continue + # P += base[i] * (1 - base[j]) + # p_next[i] = P + # base = p_next + + # base[0] = 0 + np.divide(base, np.sum(base), base) + bases.append((base.copy(), last_flip)) + # bases.insert(0, base.copy()) + # cumulative_probability = compute_cumulative_probability(N, bases) + # p_forward = 0 + # p_backward = 0 + # for i in range(1, N): + # p_forward += cumulative_probability[i] * base[i - 1] + # for i in range(0, N - 1): + # p_backward += cumulative_probability[i] * base[i + 1] + print('Base', base) + # # # np.subtract(1, base, base) + # # # print(cumulative_probability) + # shift_left = np.roll(cumulative_probability, -1) + # shift_left[-1] = 0.0 + # # # # print('Shift Left', p_forward, shift_left) + # shift_right = np.roll(cumulative_probability, 1) + # shift_right[0] = 0.0 + # # # # print('Shift Right', p_backward, shift_right) + # p_next = np.add(np.multiply(shift_left, 0.5), np.multiply(shift_right, 0.5)) + # p_next[0] = 0 + # np.divide(p_next, np.sum(p_next), p_next) + # # # # print('Next', p_next) + # # # # # print(cumulative_probability) + # # # # # print(base) + # np.multiply(base, p_next, cumulative_probability) + # cumulative_probability[0] = 0 + # # # # # np.multiply(cumulative_probability, shift_right, cumulative_probability) + # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability) + cumulative_probability = compute_cumulative_probability(N, bases, flip_likelihood) + print('Cumulative', cumulative_probability) + print('Likelihood', flip_likelihood) + + # cumulative_probability[0] = 0 + # use_flip = -1 + # if direction < 0: + # use_flip = np.argmax(cumulative_deltas) + # if cumulative_deltas[use_flip] < 0: + # use_flip = np.argmin(cumulative_deltas) + # direction = 1 + # # cumulative_deltas.fill(0) + # else: + # use_flip = np.argmin(cumulative_deltas) + # if cumulative_deltas[use_flip] > 0: + # use_flip = np.argmax(cumulative_deltas) + # direction = -1 + # # cumulative_deltas.fill(0) + # if direction < 0: + # cumulative_probability[0] = 0 + # else: + # cumulative_probability[-1] = 0 + # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability) + # print(cumulative_deltas) + + # use_flip = -1 + # highest_p = 0 + # for i in range(0, N): + # p = flip_likelihood[i] + # if i in flips: + # p = -p + # if use_flip < 0 or p > highest_p: + # use_flip = i + # highest_p = p + # if not use_flip in flips and highest_p < 0 or use_flip in flips and highest_p > 0: + # flip_likelihood[use_flip] *= -1.0 + + if use_flip < 0: + return + last_flip = use_flip + if use_flip in flips: + flips.remove(use_flip) + else: + flips.add(use_flip) + print('Flip', use_flip, lowest_err) + print(flips) + cumulative_deltas[use_flip] = -cumulative_deltas[use_flip] + for p in samples: + if p.x[use_flip]: + p.y ^= 1 + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations24.py b/mutations24.py new file mode 100644 index 0000000..82fe95f --- /dev/null +++ b/mutations24.py @@ -0,0 +1,656 @@ +from cmath import isnan +import numpy as np +import random +import hashlib +import math + +def get_state_id(state): + return ','.join([str(x) for x in sorted(state)]) + +class Point(): + def __init__(self, x, y): + self.x = x + self.y = y + + def id(self): + return ','.join([str(int(x)) for x in self.x]) + +class Influence(): + def __init__(self, a, b): + self.a = a + self.b = b + self.original_dof = set() + self.dof = set() + for i in range(0, len(a.x)): + if a.x[i] != b.x[i]: + self.original_dof.add(i) + self.dof.add(i) + + def coherent(self): + return self.a.y == self.b.y + + def id(self): + return ','.join(sorted([self.a.id(), self.b.id()])) + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def decode(x, N): + index = 0 + output = np.zeros((N)) + while x > 0 and index < N: + output[index] = x & 0b1 + x >>= 1 + index += 1 + return output + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def hamming_distance(a, b): + return np.sum(np.logical_xor(a.x, b.x)) + +def random_x(N): + x = np.zeros((N)) + for i in range(0, N): + x[i] = random.randint(0, 1) + return x + +def xor(x): + # return sum(x) % 2 + half = int(len(x) / 2) + return sum(x[:half]) % 2 + +def create_dof_map(influences): + dof_map = {} + for influence in influences: + for i in influence.dof: + if not i in dof_map: + dof_map[i] = [] + dof_map[i].append(influence) + return dof_map + +def flip(influences, i): + for influence in influences: + if i in influence.dof: + influence.a.y = int(influence.a.y) ^ 1 + +def remove_dof(dof_map, i, flip = False): + for influence in dof_map[i]: + influence.dof.remove(i) + if flip: + influence.a.y = int(influence.a.y) ^ 1 + # if len(influence.dof) == 0 and not influence.coherent(): + # raise Exception('Invalid') + del dof_map[i] + +def solve(dof_map, all_influences, all_samples): + eliminated = True + while eliminated: + eliminated = False + for influence in all_influences: + if len(influence.dof) == 1: + i = next(iter(influence.dof)) + if influence.coherent: + remove_dof(dof_map, i) + eliminated = True + else: + print('Forced', i) + remove_dof(dof_map, i, True) + eliminated = True + + lowest_dof = None + for influence in all_influences: + if not influence.coherent and len(influence.dof) > 1: + if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof): + lowest_dof = influence + + flip = None + highest_score = -1 + + for i in lowest_dof.dof: + per_point_scores = {} + i_influences = dof_map[i] + left = 0 + right = 0 + for influence in i_influences: + if not influence.a in per_point_scores: + per_point_scores[influence.a] = [0, 0] + if not influence.b in per_point_scores: + per_point_scores[influence.b] = [0, 0] + if influence.coherent: + per_point_scores[influence.a][0] += 1 + per_point_scores[influence.b][0] += 1 + left += 1 + else: + per_point_scores[influence.a][1] += 1 + per_point_scores[influence.b][1] += 1 + right += 1 + print(i, left / (left + right)) + num = 0 + denom = 0 + for _, score in per_point_scores.items(): + if score[0] == score[1]: + continue + print(i, score) + num += score[1] / (score[0] + score[1]) + denom += 1 + score = num / denom if denom > 0 else 0 + print(score) + + return None + + +# 1st row (n+1 choose k+1) * (1-(k mod 2)) +# psuedopascal to compute the follow-on rows +# assuming solvability, we want to maximize the probability that our current state and our state with +# a particular single flip are one order apart in the correct direction + + + +# 2, 0 +# 2, 2, 0 +# 2, 4, 2, 0 +# 2, 6, 6, 2, 0 +# 2, 8,12, 8, 2, 0 +# 2,10,20,20,10, 2, 0 + +# 3,-9,19,-33,51,-73,99 +# 3,-6,10,-14,18,-22,26 +# 3,-3, 4, -4, 4, -4, 4 +# 3, 0, 1, 0, 0, 0, 0 +# 3, 3, 1, 1, 0, 0, 0 +# 3, 6, 4, 2, 1, 0, 0 +# 3, 9,10, 6, 3, 1, 0 + +# 4, 0, 4, 0 +# 4, 4, 4, 4, 0 +# 4, 8, 8, 8, 4, 0 +# 4,12,16,16,12, 4, 0 + +# 5, 0,10, 0, 1 +# 5, 5,10,10, 1, 1 +# 5, +# 5, + + + +# 3 +# +# @1 [1, 2, 1] +# @2 [2, 2, 0] +# @3 [3, 0, 1] + +# 5 [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...) +# +# @1 [1, 4, 6, 4, 1], [4, 6, 4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2), +# @2 [2, 6, 6, 2, 0], [3, 4, 4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1) +# @3 [3, 6, 4, 2, 1], [2, 4, 6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1) +# @4 [4, 4, 4, 4, 0], [1, 6, 6, 1, 1] - 16, 15 - +# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 - + +# @0 [0.0, 0.0, 0.0, 0.0, 0.0] +# @1 [0.2, 0.4, 0.6, 0.8, 1.0] +# @2 [0.4, 0.6, 0.6, 0.4, 0.0] +# @3 [0.6, 0.6, 0.4, 0.4, 1.0] +# @4 [0.8, 0.4, 0.4, 0.8, 0.0] +# @5 [1.0, 0.0, 1.0, 0.0, 1.0] + +# 6 +# +# @1 [1, 5, 10, 10, 5, 1] +# @2 [2, 8, 12, 8, 2, 0] +# @3 [3, 9, 10, 6, 3, 1] +# @4 [4, 8, 8, 8, 4, 0] +# @5 [5, 5, 10, 10, 1, 1] +# @6 [6, 0, 20, 0, 6, 0] + +# last row, 1 if odd, 0 if even +# second to last, subtract 2 on odds, add 2 on evens + +def compute_pseudopascal(N): + dist = np.zeros((N, N)) + for j in range(0, N): + dist[0][j] = math.comb(N - 1, j) + dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2)) + for i in range(1, N): + for j in range(0, i + 1): + dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2)) + for k in range(i + 1, N): + for j in reversed(range(0, k)): + dist[i][j+1] = dist[i][j] + dist[i][j+1] + return dist + +def compute_distributions(N): + dist = compute_pseudopascal(N) + print(dist) + for i in range(0, N): + for j in range(0, N): + denom = math.comb(N, j+1) + dist[i][j] /= denom + return dist + +def confusion_probabilities(N, samples): + sample_sizes = np.zeros(N) + for i in range(0, len(samples)): + a = samples[i] + for j in range(0, len(samples)): + b = samples[j] + if i == j: + continue + distance = hamming_distance(a, b) + sample_sizes[distance - 1] += 1 + + confusion = np.zeros((N, N)) + dist = compute_pseudopascal(N) + np.multiply(dist, 2 ** N, dist) + # These are the probabilities that we might mix up any two orders given a particular sample size + for i in range(0, N): + for j in range(0, N): + probability = 1.0 + for k in range(0, N): + full_size = math.comb(N, k+1) * (2 ** N) + sample_size = sample_sizes[k] + num_unknowns = full_size - sample_size + i_incoherent = dist[i][k] + # Worst case, we sample only the coherent points, + i_min = max(i_incoherent - num_unknowns, 0) / full_size + i_max = min(sample_size, i_incoherent) / full_size + u = i_min + i_max / 2 + s = (i_max - i_min) / 2 + probability *= raised_cosine(dist[j][k] / full_size, u, s) + confusion[i][j] = probability + return confusion + +def raised_cosine(x, u, s): + if x < (u - s): + return 0 + if x > (u + s): + return 0 + return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s)) + +# Probability of getting k red balls after drawing n from a bag with m total balls and j red balls in it +# (n choose k) * p^k * (1-p)^(n-k) + +# p/m chance of getting a red ball +# (1 - p/m) chance of not getting a red ball + +# One way (p/m) * ((p-1)/(m-1)) * ((p-2)/(m-2)) +# (1 - (p/m)) + +def p_bernoulli(n, k, m, j): + # probabilities = np.zeros((n + 1, n + 1)) + # probabilities.fill(-1) + # # if n == k: + # # return 1.0 + # # if k > p: + # # return 0.0 + # stack = [(0,0)] + # while len(stack) > 0: + # (a, b) = stack.pop() + # if a + b == n: + # probabilities[a][b] = 1 if a == k else 0 + # elif a > j: + # probabilities[a][b] = 0 + # elif b > (m - j): + # probabilities[a][b] = 0 + # else: + # p_left = probabilities[a + 1][b] + # p_right = probabilities[a][b + 1] + # if p_left >= 0 and p_right >= 0: + # p = (j - a) / (m - a - b) + # probabilities[a][b] = p_left * p + p_right * (1 - p) + # else: + # stack.append((a, b)) + # if p_left < 0: + # stack.append((a + 1, b)) + # if p_right < 0: + # stack.append((a, b + 1)) + # return probabilities[0][0] + + p = j / m + P = 1.0 + p_k = 0 + p_nk = 0 + for i in range(1, k + 1): + P *= (n + 1 - i) / i + while P > 1.0 and p_k < k: + P *= p + p_k += 1 + while P > 1.0 and p_nk < (n - k): + P *= (1 - p) + p_nk += 1 + while p_k < k: + P *= p + p_k += 1 + while (p_nk < (n - k)): + P *= (1 - p) + p_nk += 1 + return P + +def average_index(x): + total = 0 + for k in range(0, len(x)): + total += k * x[k] + return total / np.sum(x) + +def compute_cumulative_probability(N, bases, p_n): + # p_n = np.zeros(N) + # p_n.fill(0.5) + states = [[]] + flips = set() + for i in range(1, len(bases)): + # (base, _) = bases[i] + (_, flip) = bases[i] + # p_forward = 0 + # p_backward = 0 + # for k in range(0, N - 1): + # p_forward += base[k + 1] * next_p[k] + # p_backward += base[k] * next_p[k + 1] + if flip in flips: + # p_n[flip] -= p_forward + # p_n[flip] += p_backward + flips.remove(flip) + else: + # p_n[flip] += p_forward + # p_n[flip] -= p_backward + flips.add(flip) + states.append(flips.copy()) + # np.clip(p_n, 0, 1, p_n) + # print('Contribution probabilities', p_n) + + min_p_n = np.min(p_n) + max_p_n = np.max(p_n) + + + p_k = np.zeros(N) + for k in range(0, N): + stack = [(k, len(bases) - 1)] + probabilities = np.zeros((N, len(bases))) + probabilities.fill(-1) + while len(stack) > 0: + (i, base_index) = stack.pop() + (base, flip) = bases[base_index] + if base_index == 0: + probabilities[i, 0] = base[i] + else: + left = i - 1 + right = i + 1 + state = states[base_index - 1] + p_flip = max(min(p_n[flip] + 0.5, 1.0), 0) + if flip in state: + p_flip = 1 - p_flip + p_left = probabilities[left, base_index - 1] if left >= 0 else 0 + p_right = probabilities[right, base_index - 1] if right < N else 0 + if p_left >= 0 and p_right >= 0: + probabilities[i, base_index] = base[i] * p_left * (1 - p_flip) + base[i] * p_right * p_flip + else: + stack.append((i, base_index)) + if p_left < 0: + stack.append((left, base_index - 1)) + if p_right < 0: + stack.append((right, base_index - 1)) + p_k[k] = probabilities[k][-1] + np.divide(p_k, np.sum(p_k), p_k) + return p_k + +# 8, 32, 2^5 +# 10, 64, 2^6 +# 12, 128, 2^7 +# 14, 256, 2^8 +# 16, 512, 2^9 +# 18, 1024, 2^10 +# 20, 2048, 2^11 +# 22, 4096, 2^12 +def main(): + N = 16 + sample_size = 128 + sample_ids = set() + samples = [] + + dist = compute_pseudopascal(N) + print(dist) + + for i in range(0, sample_size): + x = random_x(N) + y = int(xor(x)) + p = Point(x, y) + p_id = p.id() + if p_id in sample_ids: + continue + sample_ids.add(p_id) + samples.append(p) + # confusion = confusion_probabilities(N, samples) + # print(confusion) + # return + + # for i in range(0, 2**N): + # x = decode(i, N) + # y = int(xor(x)) + # samples.append(Point(x,y)) + + base = np.zeros(N) + current = np.zeros(N) + cumulative_probability = np.ones(N) + flip_likelihood = np.zeros(N) + cumulative_deltas = np.zeros(N) + direction = -1 + flips = set() + bases = [] + last_flip = -1 + + for _ in range(0, 2 ** N): + lowest_err = -1 + use_flip = -1 + for flip in range(-1, N): + coherent_distances = np.zeros(N+1) + incoherent_distances = np.zeros(N+1) + all_coherent = True + for i in range(0, len(samples)): + a = samples[i] + for j in range(0, len(samples)): + b = samples[j] + distance = hamming_distance(a, b) + is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y) + if is_coherent: + coherent_distances[distance] += 1 + else: + incoherent_distances[distance] += 1 + all_coherent = False + if all_coherent: + print('Flip and halt', flip) + return + # print(coherent_distances, incoherent_distances) + + # print(coherent_distances, incoherent_distances) + # est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances)) + # print(est_incoherence) + + probability = np.ones(N) + # np.divide(probability, np.sum(probability), probability) + for j in range(1, N + 1): + n = coherent_distances[j] + incoherent_distances[j] + if n == 0: + continue + for k in range(0, N): + a = incoherent_distances[j] + t = math.comb(N, j) * (2 ** N) + p = dist[k][j - 1] * (2 ** N) + prob = p_bernoulli(int(n), int(a), t, p) + probability[k] *= prob + np.divide(probability, np.sum(probability), probability) + + if flip < 0: + np.copyto(base, probability) + else: + np.copyto(current, probability) + + + # print(k, i, min_true_value, max_true_value) + + # confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative + # err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence + # denom += 1 + # print(flip, k, err) + # err /= denom + # if flip < 0: + # base[k] = probability + # else: + # current[k] = probability + + if flip >= 0: + if np.sum(current) == 0: + continue + np.divide(current, np.sum(current), current) + # print(current) + # temp = np.roll(cumulative_probability, -1) + # temp[-1] = 1.0 + # np.multiply(current, temp, current) + # np.divide(current, np.sum(current), current) + p_forward = 0 + p_backward = 0 + for i in range(1, N): + p_forward += base[i] * current[i - 1] + for i in range(0, N - 1): + p_backward += base[i] * current[i + 1] + scale = 0.01 + if flip in flips: + flip_likelihood[flip] += scale * p_backward + flip_likelihood[flip] -= scale * p_forward + else: + flip_likelihood[flip] -= scale * p_backward + flip_likelihood[flip] += scale * p_forward + delta = p_forward - p_backward + print(flip, current, p_forward, p_backward) + base_index = average_index(base) + current_index = average_index(current) + err = abs(1 - (base_index - current_index)) + print(base_index, current_index, err) + + # base_index = average_index(cumulative_probability) + # new_index = average_index(current) + # if isnan(new_index): + # continue + # np.divide(current, np.sum(current), current) + # np.subtract(1, current, current) + # print(flip,p_forward,p_backward,current) + if delta > 0 and (use_flip < 0 or delta > lowest_err): + use_flip = flip + lowest_err = delta + + # cumulative_deltas[flip] += 0 + + # for k in range(0, N - 1): + # value = current[k] * cumulative_probability[k + 1] + # if use_flip < 0 or value > lowest_err: + # use_flip = flip + # lowest_err = value + # print(flip, highest_value) + else: + # p_next = np.zeros(N) + # for i in range(0, N): + # P = 0.0 + # for j in range(0, N): + # if i == j: + # continue + # P += base[i] * (1 - base[j]) + # p_next[i] = P + # base = p_next + + # base[0] = 0 + np.divide(base, np.sum(base), base) + bases.append((base.copy(), last_flip)) + # bases.insert(0, base.copy()) + # cumulative_probability = compute_cumulative_probability(N, bases) + # p_forward = 0 + # p_backward = 0 + # for i in range(1, N): + # p_forward += cumulative_probability[i] * base[i - 1] + # for i in range(0, N - 1): + # p_backward += cumulative_probability[i] * base[i + 1] + print('Base', base) + # # # np.subtract(1, base, base) + # # # print(cumulative_probability) + # shift_left = np.roll(cumulative_probability, -1) + # shift_left[-1] = 0.0 + # # # # print('Shift Left', p_forward, shift_left) + # shift_right = np.roll(cumulative_probability, 1) + # shift_right[0] = 0.0 + # # # # print('Shift Right', p_backward, shift_right) + # p_next = np.add(np.multiply(shift_left, 0.5), np.multiply(shift_right, 0.5)) + # p_next[0] = 0 + # np.divide(p_next, np.sum(p_next), p_next) + # # # # print('Next', p_next) + # # # # # print(cumulative_probability) + # # # # # print(base) + # np.multiply(base, p_next, cumulative_probability) + # cumulative_probability[0] = 0 + # # # # # np.multiply(cumulative_probability, shift_right, cumulative_probability) + # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability) + cumulative_probability = compute_cumulative_probability(N, bases, flip_likelihood) + print('Cumulative', cumulative_probability) + print('Likelihood', flip_likelihood) + + # cumulative_probability[0] = 0 + # use_flip = -1 + # if direction < 0: + # use_flip = np.argmax(cumulative_deltas) + # if cumulative_deltas[use_flip] < 0: + # use_flip = np.argmin(cumulative_deltas) + # direction = 1 + # # cumulative_deltas.fill(0) + # else: + # use_flip = np.argmin(cumulative_deltas) + # if cumulative_deltas[use_flip] > 0: + # use_flip = np.argmax(cumulative_deltas) + # direction = -1 + # # cumulative_deltas.fill(0) + # if direction < 0: + # cumulative_probability[0] = 0 + # else: + # cumulative_probability[-1] = 0 + # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability) + # print(cumulative_deltas) + + # use_flip = -1 + # highest_p = 0 + # for i in range(0, N): + # p = flip_likelihood[i] + # if i in flips: + # p = -p + # if use_flip < 0 or p > highest_p: + # use_flip = i + # highest_p = p + # if not use_flip in flips and highest_p < 0 or use_flip in flips and highest_p > 0: + # flip_likelihood[use_flip] *= -1.0 + + if use_flip < 0: + return + last_flip = use_flip + if use_flip in flips: + flips.remove(use_flip) + else: + flips.add(use_flip) + print('Flip', use_flip, lowest_err) + print(flips) + cumulative_deltas[use_flip] = -cumulative_deltas[use_flip] + for p in samples: + if p.x[use_flip]: + p.y ^= 1 + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations25.py b/mutations25.py new file mode 100644 index 0000000..4c52283 --- /dev/null +++ b/mutations25.py @@ -0,0 +1,791 @@ +from cmath import isnan +import numpy as np +import random +import hashlib +import math + +def get_state_id(state): + return ','.join([str(x) for x in sorted(state)]) + +class Point(): + def __init__(self, x, y): + self.x = x + self.y = y + + def id(self): + return ','.join([str(int(x)) for x in self.x]) + +class Influence(): + def __init__(self, a, b): + self.a = a + self.b = b + self.original_dof = set() + self.dof = set() + for i in range(0, len(a.x)): + if a.x[i] != b.x[i]: + self.original_dof.add(i) + self.dof.add(i) + + def coherent(self): + return self.a.y == self.b.y + + def id(self): + return ','.join(sorted([self.a.id(), self.b.id()])) + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def decode(x, N): + index = 0 + output = np.zeros((N)) + while x > 0 and index < N: + output[index] = x & 0b1 + x >>= 1 + index += 1 + return output + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def hamming_distance(a, b): + return np.sum(np.logical_xor(a.x, b.x)) + +def random_x(N): + x = np.zeros((N)) + for i in range(0, N): + x[i] = random.randint(0, 1) + return x + +def xor(x): + # return sum(x) % 2 + half = int(len(x) * 3 / 4) + return sum(x[:half]) % 2 + +def create_dof_map(influences): + dof_map = {} + for influence in influences: + for i in influence.dof: + if not i in dof_map: + dof_map[i] = [] + dof_map[i].append(influence) + return dof_map + +def flip(influences, i): + for influence in influences: + if i in influence.dof: + influence.a.y = int(influence.a.y) ^ 1 + +def remove_dof(dof_map, i, flip = False): + for influence in dof_map[i]: + influence.dof.remove(i) + if flip: + influence.a.y = int(influence.a.y) ^ 1 + # if len(influence.dof) == 0 and not influence.coherent(): + # raise Exception('Invalid') + del dof_map[i] + +def solve(dof_map, all_influences, all_samples): + eliminated = True + while eliminated: + eliminated = False + for influence in all_influences: + if len(influence.dof) == 1: + i = next(iter(influence.dof)) + if influence.coherent: + remove_dof(dof_map, i) + eliminated = True + else: + print('Forced', i) + remove_dof(dof_map, i, True) + eliminated = True + + lowest_dof = None + for influence in all_influences: + if not influence.coherent and len(influence.dof) > 1: + if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof): + lowest_dof = influence + + flip = None + highest_score = -1 + + for i in lowest_dof.dof: + per_point_scores = {} + i_influences = dof_map[i] + left = 0 + right = 0 + for influence in i_influences: + if not influence.a in per_point_scores: + per_point_scores[influence.a] = [0, 0] + if not influence.b in per_point_scores: + per_point_scores[influence.b] = [0, 0] + if influence.coherent: + per_point_scores[influence.a][0] += 1 + per_point_scores[influence.b][0] += 1 + left += 1 + else: + per_point_scores[influence.a][1] += 1 + per_point_scores[influence.b][1] += 1 + right += 1 + print(i, left / (left + right)) + num = 0 + denom = 0 + for _, score in per_point_scores.items(): + if score[0] == score[1]: + continue + print(i, score) + num += score[1] / (score[0] + score[1]) + denom += 1 + score = num / denom if denom > 0 else 0 + print(score) + + return None + + +# 1st row (n+1 choose k+1) * (1-(k mod 2)) +# psuedopascal to compute the follow-on rows +# assuming solvability, we want to maximize the probability that our current state and our state with +# a particular single flip are one order apart in the correct direction + + + +# 2, 0 +# 2, 2, 0 +# 2, 4, 2, 0 +# 2, 6, 6, 2, 0 +# 2, 8,12, 8, 2, 0 +# 2,10,20,20,10, 2, 0 + +# 3,-9,19,-33,51,-73,99 +# 3,-6,10,-14,18,-22,26 +# 3,-3, 4, -4, 4, -4, 4 +# 3, 0, 1, 0, 0, 0, 0 +# 3, 3, 1, 1, 0, 0, 0 +# 3, 6, 4, 2, 1, 0, 0 +# 3, 9,10, 6, 3, 1, 0 + +# 4, 0, 4, 0 +# 4, 4, 4, 4, 0 +# 4, 8, 8, 8, 4, 0 +# 4,12,16,16,12, 4, 0 + +# 5, 0,10, 0, 1 +# 5, 5,10,10, 1, 1 +# 5, +# 5, + + + +# 3 +# +# @1 [1, 2, 1] +# @2 [2, 2, 0] +# @3 [3, 0, 1] + +# 5 [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...) +# +# @1 [1, 4, 6, 4, 1], [4, 6, 4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2), +# @2 [2, 6, 6, 2, 0], [3, 4, 4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1) +# @3 [3, 6, 4, 2, 1], [2, 4, 6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1) +# @4 [4, 4, 4, 4, 0], [1, 6, 6, 1, 1] - 16, 15 - +# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 - + +# @0 [0.0, 0.0, 0.0, 0.0, 0.0] +# @1 [0.2, 0.4, 0.6, 0.8, 1.0] +# @2 [0.4, 0.6, 0.6, 0.4, 0.0] +# @3 [0.6, 0.6, 0.4, 0.4, 1.0] +# @4 [0.8, 0.4, 0.4, 0.8, 0.0] +# @5 [1.0, 0.0, 1.0, 0.0, 1.0] + +# 6 +# +# @1 [1, 5, 10, 10, 5, 1] +# @2 [2, 8, 12, 8, 2, 0] +# @3 [3, 9, 10, 6, 3, 1] +# @4 [4, 8, 8, 8, 4, 0] +# @5 [5, 5, 10, 10, 1, 1] +# @6 [6, 0, 20, 0, 6, 0] + +# last row, 1 if odd, 0 if even +# second to last, subtract 2 on odds, add 2 on evens + +def compute_pseudopascal(N): + dist = np.zeros((N, N)) + for j in range(0, N): + dist[0][j] = math.comb(N - 1, j) + dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2)) + for i in range(1, N): + for j in range(0, i + 1): + dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2)) + for k in range(i + 1, N): + for j in reversed(range(0, k)): + dist[i][j+1] = dist[i][j] + dist[i][j+1] + return dist + +def compute_distributions(N): + dist = compute_pseudopascal(N) + print(dist) + for i in range(0, N): + for j in range(0, N): + denom = math.comb(N, j+1) + dist[i][j] /= denom + return dist + +def compute_pyramids(N): + num_orders = max(int(N / 2), 1) + pyramids = np.zeros((num_orders, N, N)).astype(np.int32) + # 1st order can be filled in as multiplication and forms the base case + for i in range(0, N): + for j in range(0, i + 1): + pyramids[0][i][j] = (i - j + 1) * (j + 1) + for order in range(1, num_orders): + offset = order * 2 + + # fill in the LHS and diagonal + for i in range(0, N - offset): + value = math.comb(2 * (order + 1) + i - 1, i) + pyramids[order][i + offset][0] = value + # mirror + pyramids[order][i + offset][i + offset] = value + + # accumulate along the diagonals + for i in range(1, N): + value = pyramids[order][i][0] + acc = value + for j in range(1, N - i): + value += acc + pyramids[order][i + j][j] = value + acc += pyramids[order - 1][i + j - 1][j - 1] + + return pyramids + +def get_total_band_count(distance, band_distance, N): + if band_distance % 2 == 1: + return 0 + order = int(band_distance / 2) - 1 + if order < 0: + return 0 + if distance < order + 1: + return 0 + if distance > N - order - 1: + return 0 + order_root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2 + scale = math.comb(N - (order + 1) * 2, distance - order - 1) + value = math.comb(2 * (order + 1) + N - 2 * (order + 1), N - 2 * (order + 1)) + return order_root * scale * value + +def get_incoherent_band_count(pyramids, distance, band_distance, k, N): + if k == 0 or k == N or band_distance % 2 == 1: + return 0 + order = int(band_distance / 2) - 1 + if order < 0: + return 0 + if distance < order + 1: + return 0 + if distance > N - order - 1: + return 0 + order_root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2 + scale = math.comb(N - (order + 1) * 2, distance - order - 1) + value = pyramids[order][N - 2][k - 1] + return order_root * scale * value + +def confusion_probabilities(N, samples): + sample_sizes = np.zeros(N) + for i in range(0, len(samples)): + a = samples[i] + for j in range(0, len(samples)): + b = samples[j] + if i == j: + continue + distance = hamming_distance(a, b) + sample_sizes[distance - 1] += 1 + + confusion = np.zeros((N, N)) + dist = compute_pseudopascal(N) + np.multiply(dist, 2 ** N, dist) + # These are the probabilities that we might mix up any two orders given a particular sample size + for i in range(0, N): + for j in range(0, N): + probability = 1.0 + for k in range(0, N): + full_size = math.comb(N, k+1) * (2 ** N) + sample_size = sample_sizes[k] + num_unknowns = full_size - sample_size + i_incoherent = dist[i][k] + # Worst case, we sample only the coherent points, + i_min = max(i_incoherent - num_unknowns, 0) / full_size + i_max = min(sample_size, i_incoherent) / full_size + u = i_min + i_max / 2 + s = (i_max - i_min) / 2 + probability *= raised_cosine(dist[j][k] / full_size, u, s) + confusion[i][j] = probability + return confusion + +def raised_cosine(x, u, s): + if x < (u - s): + return 0 + if x > (u + s): + return 0 + return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s)) + +# Probability of getting k red balls after drawing n from a bag with m total balls and j red balls in it +# (n choose k) * p^k * (1-p)^(n-k) + +# p/m chance of getting a red ball +# (1 - p/m) chance of not getting a red ball + +# One way (p/m) * ((p-1)/(m-1)) * ((p-2)/(m-2)) +# (1 - (p/m)) + +cache = {} +hits = 0 +misses = 0 +def p_bernoulli(n, k, m, j): + global hits, misses + key = (n, k, m, j) + if key in cache: + hits += 1 + return cache[key] + misses += 1 + probabilities = np.zeros((n + 1, n + 1)) + probabilities.fill(-1) + stack = [(0,0)] + while len(stack) > 0: + (a, b) = stack.pop() + if a + b == n: + probabilities[a][b] = 1 if a == k else 0 + elif a > j: + probabilities[a][b] = 0 + elif b > (m - j): + probabilities[a][b] = 0 + else: + p_left = probabilities[a + 1][b] + p_right = probabilities[a][b + 1] + if p_left >= 0 and p_right >= 0: + p = (j - a) / (m - a - b) + probabilities[a][b] = p_left * p + p_right * (1 - p) + else: + stack.append((a, b)) + if p_left < 0: + stack.append((a + 1, b)) + if p_right < 0: + stack.append((a, b + 1)) + cache[key] = probabilities[0][0] + # if len(cache) % 100 == 0: + # print('Cache size: ', len(cache), math.floor(10000 * hits / (hits + misses)) / 100, '%') + return probabilities[0][0] + + p = j / m + if n == k: + return 1.0 + if k > p: + return 0.0 + P = 1.0 + p_k = 0 + p_nk = 0 + for i in range(1, k + 1): + P *= (n + 1 - i) / i + while P > 1.0 and p_k < k: + P *= p + p_k += 1 + while P > 1.0 and p_nk < (n - k): + P *= (1 - p) + p_nk += 1 + while p_k < k: + P *= p + p_k += 1 + while (p_nk < (n - k)): + P *= (1 - p) + p_nk += 1 + return P + +def average_index(x): + total = 0 + for k in range(0, len(x)): + total += k * x[k] + return total / np.sum(x) + +def compute_cumulative_probability(N, bases, p_n): + # p_n = np.zeros(N) + # p_n.fill(0.5) + states = [[]] + flips = set() + for i in range(1, len(bases)): + # (base, _) = bases[i] + (_, flip) = bases[i] + # p_forward = 0 + # p_backward = 0 + # for k in range(0, N - 1): + # p_forward += base[k + 1] * next_p[k] + # p_backward += base[k] * next_p[k + 1] + if flip in flips: + # p_n[flip] -= p_forward + # p_n[flip] += p_backward + flips.remove(flip) + else: + # p_n[flip] += p_forward + # p_n[flip] -= p_backward + flips.add(flip) + states.append(flips.copy()) + # np.clip(p_n, 0, 1, p_n) + # print('Contribution probabilities', p_n) + + min_p_n = np.min(p_n) + max_p_n = np.max(p_n) + + + p_k = np.zeros(N) + for k in range(0, N): + stack = [(k, len(bases) - 1)] + probabilities = np.zeros((N, len(bases))) + probabilities.fill(-1) + while len(stack) > 0: + (i, base_index) = stack.pop() + (base, flip) = bases[base_index] + if base_index == 0: + probabilities[i, 0] = base[i] + else: + left = i - 1 + right = i + 1 + state = states[base_index - 1] + p_flip = max(min(p_n[flip] + 0.5, 1.0), 0) + if flip in state: + p_flip = 1 - p_flip + p_left = probabilities[left, base_index - 1] if left >= 0 else 0 + p_right = probabilities[right, base_index - 1] if right < N else 0 + if p_left >= 0 and p_right >= 0: + probabilities[i, base_index] = base[i] * p_left * (1 - p_flip) + base[i] * p_right * p_flip + else: + stack.append((i, base_index)) + if p_left < 0: + stack.append((left, base_index - 1)) + if p_right < 0: + stack.append((right, base_index - 1)) + p_k[k] = probabilities[k][-1] + np.divide(p_k, np.sum(p_k), p_k) + return p_k + +# 8, 32, 2^5 +# 10, 64, 2^6 +# 12, 128, 2^7 +# 14, 256, 2^8 +# 16, 512, 2^9 +# 18, 1024, 2^10 +# 20, 2048, 2^11 +# 22, 4096, 2^12 +def main(): + N = 10 + sample_size = 32 + sample_ids = set() + samples = [] + + dist = compute_pseudopascal(N) + pyramids = compute_pyramids(N + 1) + + for i in range(0, sample_size): + x = random_x(N) + y = int(xor(x)) + p = Point(x, y) + p_id = p.id() + if p_id in sample_ids: + continue + sample_ids.add(p_id) + samples.append(p) + # confusion = confusion_probabilities(N, samples) + # print(confusion) + # return + + # for i in range(0, 2**N): + # x = decode(i, N) + # y = int(xor(x)) + # samples.append(Point(x,y)) + + base = np.zeros(N) + current = np.zeros(N) + cumulative_probability = np.ones(N) + flip_likelihood = np.zeros(N) + cumulative_deltas = np.zeros(N) + direction = -1 + flips = set() + bases = [] + last_flip = -1 + max_base_index = -1 + scores = np.zeros(N) + indices = [] + + for _ in range(0, 2 ** N): + lowest_err = -1 + use_flip = -1 + for flip in range(-1, N): + coherent_distances = np.zeros(N+1) + incoherent_distances = np.zeros(N+1) + probability = np.ones(N) + all_coherent = True + for i in range(0, len(samples)): + a = samples[i] + bands = [[] for _ in range(0, N + 1)] + for j in range(0, len(samples)): + if i == j: + continue + b = samples[j] + distance = hamming_distance(a, b) + bands[distance].append(b) + is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y) + if is_coherent: + coherent_distances[distance] += 1 + else: + incoherent_distances[distance] += 1 + all_coherent = False + for distance in range(0, N + 1): + band = bands[distance] + if len(band) < 2: + continue + coherent_bands = np.zeros(N + 1) + incoherent_bands = np.zeros(N + 1) + for j in range(0, len(band)): + c = band[j] + for k in range(0, len(band)): + if j == k: + continue + d = band[k] + band_distance = hamming_distance(c, d) + is_coherent = ((flip < 0 or c.x[flip] == d.x[flip]) and c.y == d.y) or ((flip >= 0 and c.x[flip] != d.x[flip]) and c.y != d.y) + if is_coherent: + coherent_bands[band_distance] += 1 + else: + incoherent_bands[band_distance] += 1 + for band_distance in range(1, N + 1): + n = coherent_bands[band_distance] + incoherent_bands[band_distance] + if n == 0: + continue + t = get_total_band_count(distance, band_distance, N) + if t == 0: + continue + a = incoherent_bands[band_distance] + for k in range(0, N): + p = get_incoherent_band_count(pyramids, distance, band_distance, k + 1, N) + prob = p_bernoulli(int(n), int(a), t, p) + # if prob == 0 and k == 5: + # p = get_incoherent_band_count(pyramids, distance, band_distance, k, N) + # print('test') + probability[k] *= prob + if np.sum(probability) == 0: + print('Uh-oh') + np.divide(probability, np.sum(probability), probability) + + if all_coherent: + print('Flip and halt', flip) + return + # print(coherent_distances, incoherent_distances) + + # print(coherent_distances, incoherent_distances) + # est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances)) + # print(est_incoherence) + # np.divide(probability, np.sum(probability), probability) + for j in range(1, N + 1): + n = coherent_distances[j] + incoherent_distances[j] + if n == 0: + continue + t = math.comb(N, j) * (2 ** N) + if t == 0: + continue + a = incoherent_distances[j] + for k in range(0, N): + p = dist[k][j - 1] * (2 ** N) + prob = p_bernoulli(int(n), int(a), t, p) + probability[k] *= prob + if np.sum(probability) == 0: + print('Uh-oh') + np.divide(probability, np.sum(probability), probability) + + if flip < 0: + np.copyto(base, probability) + else: + np.copyto(current, probability) + + + # print(k, i, min_true_value, max_true_value) + + # confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative + # err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence + # denom += 1 + # print(flip, k, err) + # err /= denom + # if flip < 0: + # base[k] = probability + # else: + # current[k] = probability + + if flip >= 0: + if np.sum(current) == 0: + continue + np.divide(current, np.sum(current), current) + + # print(current) + # temp = np.roll(cumulative_probability, -1) + # temp[-1] = 1.0 + # np.multiply(current, temp, current) + # np.divide(current, np.sum(current), current) + p_forward = 0 + p_backward = 0 + for i in range(1, N): + p_forward += base[i] * current[i - 1] + for i in range(0, N - 1): + p_backward += base[i] * current[i + 1] + scores[flip] += p_forward - p_backward + + scale = 0.01 + if flip in flips: + flip_likelihood[flip] += scale * p_backward + flip_likelihood[flip] -= scale * p_forward + else: + flip_likelihood[flip] -= scale * p_backward + flip_likelihood[flip] += scale * p_forward + delta = p_forward - p_backward + # print(flip, current, p_forward, p_backward) + base_index = average_index(cumulative_probability) + current_index = average_index(current) + err = abs(1 - (base_index - current_index)) + # print(base_index, current_index, err) + + # base_index = average_index(cumulative_probability) + # new_index = average_index(current) + # if isnan(new_index): + # continue + # np.divide(current, np.sum(current), current) + # np.subtract(1, current, current) + # print(flip,p_forward,p_backward,current) + if use_flip < 0 or delta > lowest_err: + use_flip = flip + lowest_err = delta + + # cumulative_deltas[flip] += 0 + + # for k in range(0, N - 1): + # value = current[k] * cumulative_probability[k + 1] + # if use_flip < 0 or value > lowest_err: + # use_flip = flip + # lowest_err = value + # print(flip, highest_value) + else: + # p_next = np.zeros(N) + # for i in range(0, N): + # P = 0.0 + # for j in range(0, N): + # if i == j: + # continue + # P += base[i] * (1 - base[j]) + # p_next[i] = P + # base = p_next + + # base[0] = 0 + np.divide(base, np.sum(base), base) + max_base_index = np.argmax(base) + bases.append((base.copy(), last_flip)) + # bases.insert(0, base.copy()) + # cumulative_probability = compute_cumulative_probability(N, bases) + # p_forward = 0 + # p_backward = 0 + # for i in range(1, N): + # p_forward += cumulative_probability[i] * base[i - 1] + # for i in range(0, N - 1): + # p_backward += cumulative_probability[i] * base[i + 1] + print('Base', base) + # # np.subtract(1, base, base) + # # print(cumulative_probability) + # shift_left = np.roll(cumulative_probability, -len(indic)) + # shift_left[-1] = 0.0 + # # # # print('Shift Left', p_forward, shift_left) + # shift_right = np.roll(cumulative_probability, 1) + # shift_right[0] = 0.0 + # # # # print('Shift Right', p_backward, shift_right) + # p_next = np.add(np.multiply(shift_left, 0.5), np.multiply(shift_right, 0.5)) + # np.divide(p_next, np.sum(p_next), p_next) + # # # # # print('Next', p_next) + # # # # # # print(cumulative_probability) + # # # # # # print(base) + # np.multiply(base, p_next, cumulative_probability) + # cumulative_probability[0] = 0 + # # # # # np.multiply(cumulative_probability, shift_right, cumulative_probability) + # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability) + # cumulative_probability = compute_cumulative_probability(N, bases, flip_likelihood) + # print('Cumulative', cumulative_probability) + # print('Likelihood', flip_likelihood) + + # cumulative_probability[0] = 0 + # use_flip = -1 + # if direction < 0: + # use_flip = np.argmax(cumulative_deltas) + # if cumulative_deltas[use_flip] < 0: + # use_flip = np.argmin(cumulative_deltas) + # direction = 1 + # # cumulative_deltas.fill(0) + # else: + # use_flip = np.argmin(cumulative_deltas) + # if cumulative_deltas[use_flip] > 0: + # use_flip = np.argmax(cumulative_deltas) + # direction = -1 + # # cumulative_deltas.fill(0) + # if direction < 0: + # cumulative_probability[0] = 0 + # else: + # cumulative_probability[-1] = 0 + # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability) + # print(cumulative_deltas) + + # use_flip = -1 + # highest_p = 0 + # for i in range(0, N): + # p = flip_likelihood[i] + # if i in flips: + # p = -p + # if use_flip < 0 or p > highest_p: + # use_flip = i + # highest_p = p + # if not use_flip in flips and highest_p < 0 or use_flip in flips and highest_p > 0: + # flip_likelihood[use_flip] *= -1.0 + print(scores) + + indices = sorted(range(len(scores)), key=lambda i: scores[i])[-(max_base_index + 1):] + print(indices) + + for flip in indices: + scores[flip] *= -1.0 + if flip in flips: + flips.remove(flip) + else: + flips.add(flip) + for p in samples: + if p.x[flip]: + p.y ^= 1 + print(flips) + + # if use_flip < 0: + # return + # last_flip = use_flip + # if use_flip in flips: + # flips.remove(use_flip) + # else: + # flips.add(use_flip) + # print('Flip', use_flip, lowest_err) + # print(flips) + # cumulative_deltas[use_flip] = -cumulative_deltas[use_flip] + # for p in samples: + # if p.x[use_flip]: + # p.y ^= 1 + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations26.py b/mutations26.py new file mode 100644 index 0000000..062921c --- /dev/null +++ b/mutations26.py @@ -0,0 +1,741 @@ +from cmath import isnan +import numpy as np +import random +import hashlib +import math + +def get_state_id(state): + return ','.join([str(x) for x in sorted(state)]) + +class Point(): + def __init__(self, x, y): + self.x = x + self.y = y + + def id(self): + return ','.join([str(int(x)) for x in self.x]) + +class Influence(): + def __init__(self, a, b): + self.a = a + self.b = b + self.original_dof = set() + self.dof = set() + for i in range(0, len(a.x)): + if a.x[i] != b.x[i]: + self.original_dof.add(i) + self.dof.add(i) + + def coherent(self): + return self.a.y == self.b.y + + def id(self): + return ','.join(sorted([self.a.id(), self.b.id()])) + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(len(v) / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def decode(x, N): + index = 0 + output = np.zeros((N)) + while x > 0 and index < N: + output[index] = x & 0b1 + x >>= 1 + index += 1 + return output + +def sha(v): + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def hamming_distance(a, b): + return np.sum(np.logical_xor(a.x, b.x)) + +def random_x(N): + x = np.zeros((N)) + for i in range(0, N): + x[i] = random.randint(0, 1) + return x + +def xor_n(x, n): + return sum(x[:n]) % 2 + +def create_dof_map(influences): + dof_map = {} + for influence in influences: + for i in influence.dof: + if not i in dof_map: + dof_map[i] = [] + dof_map[i].append(influence) + return dof_map + +def flip(influences, i): + for influence in influences: + if i in influence.dof: + influence.a.y = int(influence.a.y) ^ 1 + +def remove_dof(dof_map, i, flip = False): + for influence in dof_map[i]: + influence.dof.remove(i) + if flip: + influence.a.y = int(influence.a.y) ^ 1 + # if len(influence.dof) == 0 and not influence.coherent(): + # raise Exception('Invalid') + del dof_map[i] + +def solve(dof_map, all_influences, all_samples): + eliminated = True + while eliminated: + eliminated = False + for influence in all_influences: + if len(influence.dof) == 1: + i = next(iter(influence.dof)) + if influence.coherent: + remove_dof(dof_map, i) + eliminated = True + else: + print('Forced', i) + remove_dof(dof_map, i, True) + eliminated = True + + lowest_dof = None + for influence in all_influences: + if not influence.coherent and len(influence.dof) > 1: + if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof): + lowest_dof = influence + + flip = None + highest_score = -1 + + for i in lowest_dof.dof: + per_point_scores = {} + i_influences = dof_map[i] + left = 0 + right = 0 + for influence in i_influences: + if not influence.a in per_point_scores: + per_point_scores[influence.a] = [0, 0] + if not influence.b in per_point_scores: + per_point_scores[influence.b] = [0, 0] + if influence.coherent: + per_point_scores[influence.a][0] += 1 + per_point_scores[influence.b][0] += 1 + left += 1 + else: + per_point_scores[influence.a][1] += 1 + per_point_scores[influence.b][1] += 1 + right += 1 + print(i, left / (left + right)) + num = 0 + denom = 0 + for _, score in per_point_scores.items(): + if score[0] == score[1]: + continue + print(i, score) + num += score[1] / (score[0] + score[1]) + denom += 1 + score = num / denom if denom > 0 else 0 + print(score) + + return None + + +# 1st row (n+1 choose k+1) * (1-(k mod 2)) +# psuedopascal to compute the follow-on rows +# assuming solvability, we want to maximize the probability that our current state and our state with +# a particular single flip are one order apart in the correct direction + + + +# 2, 0 +# 2, 2, 0 +# 2, 4, 2, 0 +# 2, 6, 6, 2, 0 +# 2, 8,12, 8, 2, 0 +# 2,10,20,20,10, 2, 0 + +# 3,-9,19,-33,51,-73,99 +# 3,-6,10,-14,18,-22,26 +# 3,-3, 4, -4, 4, -4, 4 +# 3, 0, 1, 0, 0, 0, 0 +# 3, 3, 1, 1, 0, 0, 0 +# 3, 6, 4, 2, 1, 0, 0 +# 3, 9,10, 6, 3, 1, 0 + +# 4, 0, 4, 0 +# 4, 4, 4, 4, 0 +# 4, 8, 8, 8, 4, 0 +# 4,12,16,16,12, 4, 0 + +# 5, 0,10, 0, 1 +# 5, 5,10,10, 1, 1 +# 5, +# 5, + + + +# 3 +# +# @1 [1, 2, 1] +# @2 [2, 2, 0] +# @3 [3, 0, 1] + +# 5 [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...) +# +# @1 [1, 4, 6, 4, 1], [4, 6, 4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2), +# @2 [2, 6, 6, 2, 0], [3, 4, 4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1) +# @3 [3, 6, 4, 2, 1], [2, 4, 6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1) +# @4 [4, 4, 4, 4, 0], [1, 6, 6, 1, 1] - 16, 15 - +# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 - + +# @0 [0.0, 0.0, 0.0, 0.0, 0.0] +# @1 [0.2, 0.4, 0.6, 0.8, 1.0] +# @2 [0.4, 0.6, 0.6, 0.4, 0.0] +# @3 [0.6, 0.6, 0.4, 0.4, 1.0] +# @4 [0.8, 0.4, 0.4, 0.8, 0.0] +# @5 [1.0, 0.0, 1.0, 0.0, 1.0] + +# 6 +# +# @1 [1, 5, 10, 10, 5, 1] +# @2 [2, 8, 12, 8, 2, 0] +# @3 [3, 9, 10, 6, 3, 1] +# @4 [4, 8, 8, 8, 4, 0] +# @5 [5, 5, 10, 10, 1, 1] +# @6 [6, 0, 20, 0, 6, 0] + +# last row, 1 if odd, 0 if even +# second to last, subtract 2 on odds, add 2 on evens + +def compute_pseudopascal(N): + dist = np.zeros((N, N)) + for j in range(0, N): + dist[0][j] = math.comb(N - 1, j) + dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2)) + for i in range(1, N): + for j in range(0, i + 1): + dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2)) + for k in range(i + 1, N): + for j in reversed(range(0, k)): + dist[i][j+1] = dist[i][j] + dist[i][j+1] + return dist + +def compute_distributions(N): + dist = compute_pseudopascal(N) + print(dist) + for i in range(0, N): + for j in range(0, N): + denom = math.comb(N, j+1) + dist[i][j] /= denom + return dist + +def confusion_probabilities(N, samples): + sample_sizes = np.zeros(N) + for i in range(0, len(samples)): + a = samples[i] + for j in range(0, len(samples)): + b = samples[j] + if i == j: + continue + distance = hamming_distance(a, b) + sample_sizes[distance - 1] += 1 + + confusion = np.zeros((N, N)) + dist = compute_pseudopascal(N) + np.multiply(dist, 2 ** N, dist) + # These are the probabilities that we might mix up any two orders given a particular sample size + for i in range(0, N): + for j in range(0, N): + probability = 1.0 + for k in range(0, N): + full_size = math.comb(N, k+1) * (2 ** N) + sample_size = sample_sizes[k] + num_unknowns = full_size - sample_size + i_incoherent = dist[i][k] + # Worst case, we sample only the coherent points, + i_min = max(i_incoherent - num_unknowns, 0) / full_size + i_max = min(sample_size, i_incoherent) / full_size + u = i_min + i_max / 2 + s = (i_max - i_min) / 2 + probability *= raised_cosine(dist[j][k] / full_size, u, s) + confusion[i][j] = probability + return confusion + +def raised_cosine(x, u, s): + if x < (u - s): + return 0 + if x > (u + s): + return 0 + return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s)) + +# Probability of getting k red balls after drawing n from a bag with m total balls and j red balls in it +# (n choose k) * p^k * (1-p)^(n-k) + +# p/m chance of getting a red ball +# (1 - p/m) chance of not getting a red ball + +# One way (p/m) * ((p-1)/(m-1)) * ((p-2)/(m-2)) +# (1 - (p/m)) + +def p_bernoulli(n, k, m, j): + # probabilities = np.zeros((n + 1, n + 1)) + # probabilities.fill(-1) + # # if n == k: + # # return 1.0 + # # if k > p: + # # return 0.0 + # stack = [(0,0)] + # while len(stack) > 0: + # (a, b) = stack.pop() + # if a + b == n: + # probabilities[a][b] = 1 if a == k else 0 + # elif a > j: + # probabilities[a][b] = 0 + # elif b > (m - j): + # probabilities[a][b] = 0 + # else: + # p_left = probabilities[a + 1][b] + # p_right = probabilities[a][b + 1] + # if p_left >= 0 and p_right >= 0: + # p = (j - a) / (m - a - b) + # probabilities[a][b] = p_left * p + p_right * (1 - p) + # else: + # stack.append((a, b)) + # if p_left < 0: + # stack.append((a + 1, b)) + # if p_right < 0: + # stack.append((a, b + 1)) + # return probabilities[0][0] + + p = j / m + P = 1.0 + p_k = 0 + p_nk = 0 + for i in range(1, k + 1): + P *= (n + 1 - i) / i + while P > 1.0 and p_k < k: + P *= p + p_k += 1 + while P > 1.0 and p_nk < (n - k): + P *= (1 - p) + p_nk += 1 + while p_k < k: + P *= p + p_k += 1 + while (p_nk < (n - k)): + P *= (1 - p) + p_nk += 1 + return P + +def average_index(x): + total = 0 + for k in range(0, len(x)): + total += k * x[k] + return total / np.sum(x) + +def compute_cumulative_probability(N, bases, p_n): + # p_n = np.zeros(N) + # p_n.fill(0.5) + states = [[]] + flips = set() + for i in range(1, len(bases)): + # (base, _) = bases[i] + (_, flip) = bases[i] + # p_forward = 0 + # p_backward = 0 + # for k in range(0, N - 1): + # p_forward += base[k + 1] * next_p[k] + # p_backward += base[k] * next_p[k + 1] + if flip in flips: + # p_n[flip] -= p_forward + # p_n[flip] += p_backward + flips.remove(flip) + else: + # p_n[flip] += p_forward + # p_n[flip] -= p_backward + flips.add(flip) + states.append(flips.copy()) + # np.clip(p_n, 0, 1, p_n) + # print('Contribution probabilities', p_n) + + min_p_n = np.min(p_n) + max_p_n = np.max(p_n) + + + p_k = np.zeros(N) + for k in range(0, N): + stack = [(k, len(bases) - 1)] + probabilities = np.zeros((N, len(bases))) + probabilities.fill(-1) + while len(stack) > 0: + (i, base_index) = stack.pop() + (base, flip) = bases[base_index] + if base_index == 0: + probabilities[i, 0] = base[i] + else: + left = i - 1 + right = i + 1 + state = states[base_index - 1] + p_flip = max(min(p_n[flip] + 0.5, 1.0), 0) + if flip in state: + p_flip = 1 - p_flip + p_left = probabilities[left, base_index - 1] if left >= 0 else 0 + p_right = probabilities[right, base_index - 1] if right < N else 0 + if p_left >= 0 and p_right >= 0: + probabilities[i, base_index] = base[i] * p_left * (1 - p_flip) + base[i] * p_right * p_flip + else: + stack.append((i, base_index)) + if p_left < 0: + stack.append((left, base_index - 1)) + if p_right < 0: + stack.append((right, base_index - 1)) + p_k[k] = probabilities[k][-1] + np.divide(p_k, np.sum(p_k), p_k) + return p_k + +# 8, 32, 2^5 +# 10, 64, 2^6 +# 12, 128, 2^7 +# 14, 256, 2^8 +# 16, 512, 2^9 +# 18, 1024, 2^10 +# 20, 2048, 2^11 +# 22, 4096, 2^12 +def main(): + N = 16 + sample_size = 32 + e_bits = 2 + sample_ids = set() + samples = [] + + dist = compute_pseudopascal(N) + print(dist) + + for i in range(0, sample_size): + x = random_x(N) + y = int(xor_n(x, e_bits)) + p = Point(x, y) + p_id = p.id() + if p_id in sample_ids: + continue + sample_ids.add(p_id) + samples.append(p) + + chords = [{} for _ in range(0, len(samples))] + for i in range(0, len(samples)): + a = samples[i] + for j in range(i + 1, len(samples)): + b = samples[j] + distance = hamming_distance(a, b) + if distance not in chords[i]: + chords[i][distance] = [] + chords[i][distance].append(j) + if distance not in chords[j]: + chords[j][distance] = [] + chords[j][distance].append(i) + + probability = np.zeros((N, N)) + scalars = np.ones(N) + for i in range(0, len(samples)): + origin = samples[i] + for (distance, points) in chords[i].items(): + n = len(points) + t = math.comb(N, distance) + a = sum([0 if origin.y == samples[index].y else 1 for index in points]) + for k in range(1, N - 1): + p = dist[k][distance - 1] + prob_at_k = p_bernoulli(n, a, t, p) + for flip in range(0, N): + a_flip = sum([0 if origin.y == samples[index].y and origin.x[flip] == samples[index].x[flip] or origin.y != samples[index].y and origin.x[flip] != samples[index].x[flip] else 1 for index in points]) + p_forward = dist[k - 1][distance - 1] + p_backward = dist[k + 1][distance - 1] + prob_at_k_forward = p_bernoulli(n, a_flip, t, p_forward) + prob_at_k_backward = p_bernoulli(n, a_flip, t, p_backward) + # prob_at_k_backward = 0 + probability[k][flip] += (n / t) * prob_at_k * (prob_at_k_forward - prob_at_k_backward) + # probability[k][flip] *= prob_at_k * prob_at_k_forward + # scalars[k] *= np.max(probability[k]) + # np.divide(probability[k], np.max(probability[k]), probability[k]) + + # print(scalars) + print(probability) + return + + coherent_distances = np.zeros(N + 1) + incoherent_distances = np.zeros(N + 1) + total_distances = np.zeros(N + 1) + for i in range(0, len(samples)): + coherent_distances.fill(0) + incoherent_distances.fill(0) + total_distances.fill(0) + a = samples[i] + for j in range(0, len(samples)): + b = samples[j] + distance = hamming_distance(a, b) + is_coherent = a.y == b.y + total_distances[distance] += 1 + if is_coherent: + coherent_distances[distance] += 1 + else: + incoherent_distances[distance] += 1 + print(total_distances) + print(incoherent_distances) + print() + for d in range(1, N + 1): + n = coherent_distances[d] + incoherent_distances[d] + if n == 0: + continue + local_probability = np.ones(N) + for k in range(0, N): + a = incoherent_distances[d] + t = math.comb(N, d) + p = dist[k][d - 1] + prob = p_bernoulli(int(n), int(a), t, p) + local_probability[k] = prob + probability[i][k] *= prob + print(local_probability) + np.divide(probability[i], np.sum(probability[i]), probability[i]) + print() + print(probability) + total_probability = np.ones(N) + for i in range(0, len(samples)): + np.multiply(probability[i], total_probability, total_probability) + np.divide(total_probability, np.sum(total_probability), total_probability) + print(total_probability) + + return + + + # confusion = confusion_probabilities(N, samples) + # print(confusion) + # return + + # for i in range(0, 2**N): + # x = decode(i, N) + # y = int(xor(x)) + # samples.append(Point(x,y)) + + base = np.zeros(N) + current = np.zeros(N) + cumulative_probability = np.ones(N) + flip_likelihood = np.zeros(N) + cumulative_deltas = np.zeros(N) + direction = -1 + flips = set() + bases = [] + last_flip = -1 + + for _ in range(0, 2 ** N): + lowest_err = -1 + use_flip = -1 + for flip in range(-1, N): + coherent_distances = np.zeros(len(samples), N+1) + incoherent_distances = np.zeros(N+1) + all_coherent = True + for i in range(0, len(samples)): + a = samples[i] + for j in range(0, len(samples)): + b = samples[j] + distance = hamming_distance(a, b) + is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y) + if is_coherent: + coherent_distances[distance] += 1 + else: + incoherent_distances[distance] += 1 + all_coherent = False + if all_coherent: + print('Flip and halt', flip) + return + # print(coherent_distances, incoherent_distances) + + # print(coherent_distances, incoherent_distances) + # est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances)) + # print(est_incoherence) + + probability = np.ones(N) + # np.divide(probability, np.sum(probability), probability) + for j in range(1, N + 1): + n = coherent_distances[j] + incoherent_distances[j] + if n == 0: + continue + for k in range(0, N): + a = incoherent_distances[j] + t = math.comb(N, j) * (2 ** N) + p = dist[k][j - 1] * (2 ** N) + prob = p_bernoulli(int(n), int(a), t, p) + probability[k] *= prob + np.divide(probability, np.sum(probability), probability) + + if flip < 0: + np.copyto(base, probability) + else: + np.copyto(current, probability) + + + # print(k, i, min_true_value, max_true_value) + + # confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative + # err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence + # denom += 1 + # print(flip, k, err) + # err /= denom + # if flip < 0: + # base[k] = probability + # else: + # current[k] = probability + + if flip >= 0: + if np.sum(current) == 0: + continue + np.divide(current, np.sum(current), current) + # print(current) + # temp = np.roll(cumulative_probability, -1) + # temp[-1] = 1.0 + # np.multiply(current, temp, current) + # np.divide(current, np.sum(current), current) + p_forward = 0 + p_backward = 0 + for i in range(1, N): + p_forward += base[i] * current[i - 1] + for i in range(0, N - 1): + p_backward += base[i] * current[i + 1] + scale = 0.01 + if flip in flips: + flip_likelihood[flip] += scale * p_backward + flip_likelihood[flip] -= scale * p_forward + else: + flip_likelihood[flip] -= scale * p_backward + flip_likelihood[flip] += scale * p_forward + delta = p_forward - p_backward + print(flip, current, p_forward, p_backward) + base_index = average_index(base) + current_index = average_index(current) + err = abs(1 - (base_index - current_index)) + print(base_index, current_index, err) + + # base_index = average_index(cumulative_probability) + # new_index = average_index(current) + # if isnan(new_index): + # continue + # np.divide(current, np.sum(current), current) + # np.subtract(1, current, current) + # print(flip,p_forward,p_backward,current) + if delta > 0 and (use_flip < 0 or delta > lowest_err): + use_flip = flip + lowest_err = delta + + # cumulative_deltas[flip] += 0 + + # for k in range(0, N - 1): + # value = current[k] * cumulative_probability[k + 1] + # if use_flip < 0 or value > lowest_err: + # use_flip = flip + # lowest_err = value + # print(flip, highest_value) + else: + # p_next = np.zeros(N) + # for i in range(0, N): + # P = 0.0 + # for j in range(0, N): + # if i == j: + # continue + # P += base[i] * (1 - base[j]) + # p_next[i] = P + # base = p_next + + # base[0] = 0 + np.divide(base, np.sum(base), base) + bases.append((base.copy(), last_flip)) + # bases.insert(0, base.copy()) + # cumulative_probability = compute_cumulative_probability(N, bases) + # p_forward = 0 + # p_backward = 0 + # for i in range(1, N): + # p_forward += cumulative_probability[i] * base[i - 1] + # for i in range(0, N - 1): + # p_backward += cumulative_probability[i] * base[i + 1] + print('Base', base) + # # # np.subtract(1, base, base) + # # # print(cumulative_probability) + # shift_left = np.roll(cumulative_probability, -1) + # shift_left[-1] = 0.0 + # # # # print('Shift Left', p_forward, shift_left) + # shift_right = np.roll(cumulative_probability, 1) + # shift_right[0] = 0.0 + # # # # print('Shift Right', p_backward, shift_right) + # p_next = np.add(np.multiply(shift_left, 0.5), np.multiply(shift_right, 0.5)) + # p_next[0] = 0 + # np.divide(p_next, np.sum(p_next), p_next) + # # # # print('Next', p_next) + # # # # # print(cumulative_probability) + # # # # # print(base) + # np.multiply(base, p_next, cumulative_probability) + # cumulative_probability[0] = 0 + # # # # # np.multiply(cumulative_probability, shift_right, cumulative_probability) + # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability) + cumulative_probability = compute_cumulative_probability(N, bases, flip_likelihood) + print('Cumulative', cumulative_probability) + print('Likelihood', flip_likelihood) + + # cumulative_probability[0] = 0 + # use_flip = -1 + # if direction < 0: + # use_flip = np.argmax(cumulative_deltas) + # if cumulative_deltas[use_flip] < 0: + # use_flip = np.argmin(cumulative_deltas) + # direction = 1 + # # cumulative_deltas.fill(0) + # else: + # use_flip = np.argmin(cumulative_deltas) + # if cumulative_deltas[use_flip] > 0: + # use_flip = np.argmax(cumulative_deltas) + # direction = -1 + # # cumulative_deltas.fill(0) + # if direction < 0: + # cumulative_probability[0] = 0 + # else: + # cumulative_probability[-1] = 0 + # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability) + # print(cumulative_deltas) + + # use_flip = -1 + # highest_p = 0 + # for i in range(0, N): + # p = flip_likelihood[i] + # if i in flips: + # p = -p + # if use_flip < 0 or p > highest_p: + # use_flip = i + # highest_p = p + # if not use_flip in flips and highest_p < 0 or use_flip in flips and highest_p > 0: + # flip_likelihood[use_flip] *= -1.0 + + if use_flip < 0: + return + last_flip = use_flip + if use_flip in flips: + flips.remove(use_flip) + else: + flips.add(use_flip) + print('Flip', use_flip, lowest_err) + print(flips) + cumulative_deltas[use_flip] = -cumulative_deltas[use_flip] + for p in samples: + if p.x[use_flip]: + p.y ^= 1 + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations3.py b/mutations3.py new file mode 100644 index 0000000..9e6b8df --- /dev/null +++ b/mutations3.py @@ -0,0 +1,541 @@ +import hashlib +import math +from matplotlib import offsetbox +import numpy as np +import random +from struct import pack, pack_into, unpack_from +import secrets + +from numpy import hamming + +N = 32 +M = 2 + +def bit_at_index(buffer, index): + offset = (index >> 3) % len(buffer) + return buffer[offset] & (1 << (index & 0b111)) != 0 + +def count_one_bits(n): + return bin(n).count("1") + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def encode_f(f, buffer, offset=0): + (inverted, flips, child) = f + pack_into('I', buffer, offset, inverted) + offset += 4 + for index in flips: + pack_into('I', buffer, offset, 0) + offset += 4 + pack_into('I', buffer, offset, index) + offset += 4 + if child is None: + pack_into('I', buffer, offset, 1) + offset += 4 + return offset + (inverted, left, right) = child + pack_into('I', buffer, offset, 2 if not inverted else 3) + offset += 4 + offset = encode_f(left, buffer, offset) + offset = encode_f(right, buffer, offset) + return offset + +def generate_random_branch(p_mutation): + global N + + p_add_indices = p_mutation * random.random() + p_add_children = p_mutation * random.random() + + inverted = random.randint(0, 1) + indices = set() + children = [] + + # randomly add indices + while random.random() < p_add_indices and len(indices) < N: + available_indices = [i for i in range(0, N) if i not in indices] + if len(available_indices) == 1: + indices.add(available_indices[0]) + continue + indices.add(available_indices[random.randint(0, len(available_indices) - 1)]) + # randomly add children + while random.random() < p_add_children: + child_inverted = random.randint(0, 1) + left = generate_random_branch(p_add_children) + right = generate_random_branch(p_add_children) + children.append((child_inverted, left, right)) + return (inverted, indices, children) + +def mutate_f(f, p_mutation): + global N + (inverted, indices, children) = f + mutated_indices = set(indices) + mutated_children = children[:] + + p_invert = p_mutation * random.random() + p_drop_indices = p_mutation * random.random() + p_add_indices = p_mutation * random.random() + p_drop_children = p_mutation * random.random() + p_mutate_child = p_mutation * random.random() + p_clone_child = p_mutation * random.random() + p_invert_child = p_mutation * random.random() + p_add_children = p_mutation * random.random() + + # randomly invert + if random.random() < p_invert: + inverted ^= 1 + # randomly drop indices + while random.random() < p_drop_indices and len(mutated_indices) > 0: + mutated_indices.pop() + # randomly add indices + while random.random() < p_add_indices and len(mutated_indices) < N: + available_indices = [i for i in range(0, N) if i not in mutated_indices] + if len(available_indices) == 1: + mutated_indices.add(available_indices[0]) + continue + mutated_indices.add(available_indices[random.randint(0, len(available_indices) - 1)]) + # randomly drop children + while random.random() < p_drop_children and len(mutated_children) > 0: + if len(mutated_children) == 1: + del mutated_children[0] + break + del mutated_children[random.randint(0, len(mutated_children) - 1)] + # randomly clone children + while random.random() < p_clone_child and len(mutated_children) > 0: + index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1) + (child_inverted, left, right) = mutated_children[index] + if random.random() < p_invert_child: + child_inverted ^= 1 + clone = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation)) + mutated_children.append(clone) + # randomly mutate children + while random.random() < p_mutate_child and len(mutated_children) > 0: + index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1) + (child_inverted, left, right) = mutated_children[index] + if random.random() < p_invert_child: + child_inverted ^= 1 + mutated_children[index] = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation)) + # randomly add children + while random.random() < p_add_children: + child_inverted = random.randint(0, 1) + left = generate_random_branch(p_mutation) + right = generate_random_branch(p_mutation) + mutated_children.append((child_inverted, left, right)) + return (inverted, mutated_indices, mutated_children) + +def generate_program(model, output_var='output'): + global N, M + (constant, indices, child) = model + + statement = 'multiply(' + np.array2string(indices, separator=',') + ', x, temp)\n\t' + statement += output_var + '=' + str(constant) + '+sum(temp)\n\t' + + if not child is None: + left_output = output_var + '0' + right_output = output_var + '1' + (left, right) = child + statement += generate_program(left, left_output) + statement += generate_program(right, right_output) + statement += output_var + '+=' + left_output + '*' + right_output + '\n\t' + statement += output_var + '%=' + str(M) + '\n\t' + return statement + +def compile(model): + program = 'def f(x, temp):\n\t' + generate_program(model) + 'return output' + scope = {'multiply': np.multiply, 'sum': np.sum} + exec(program, scope) + return scope['f'] + +def evaluate(model, x, value = 0): + (inverted, indices, children) = model + for i in indices: + if bit_at_index(x, i) != 0: + value ^= 1 + for child in children: + (child_inverted, left, right) = child + left = evaluate(left, x) + right = evaluate(right, x) + if left & right != child_inverted: + value ^= 1 + if inverted: + value ^= 1 + return value + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(N / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(x) + +def sha(v): + global M + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] % M + +def xor(x): + num_one_bits = 0 + for n in x: + num_one_bits += count_one_bits(n) + return num_one_bits % 2 + +def random_sample(m, n): + inputs = np.zeros((m, n)) + for i in range(0, m): + for j in range(0, n): + inputs[i][j] = random.randint(0, 1) + return inputs + +def update_sample(sample, index): + global N + for j in range(0, N): + sample[index][j] = random.randint(0, 1) + +def coherence(inputs, outputs, scratch): + coherences = [] + for i in range(0, len(inputs)): + x_a = inputs[i] + y_a = outputs[i] + numerator = 0 + denominator = 0 + for j in range(0, len(inputs)): + if i == j: + continue + x_b = inputs[j] + y_b = outputs[j] + distance = hamming_distance(x_a, x_b, scratch) + weight = 1.0 / (2 ** distance) + denominator += weight + if y_a == y_b: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def build_coherence_models(inputs, scratch): + coherence_models = [] + for i in range(0, len(inputs)): + x_a = inputs[i] + distances = [hamming_distance(x_a, inputs[j], scratch) for j in range(0, len(inputs))] + indices = sorted(range(len(distances)), key=lambda i: distances[i]) + lowest = -1 + denominator = 0 + components = [] + for index in range(0, len(indices)): + j = indices[index] + if distances[j] == 0: + continue + if lowest < 0: + lowest = distances[j] + distance = distances[j] - lowest + if distance >= 8: + break + weight = 2 ** -distance + denominator += weight + components.append((weight, j)) + coherence_models.append((denominator, components)) + return coherence_models + +def fast_coherence(coherence_models, outputs): + coherences = [] + for i in range(0, len(coherence_models)): + (denominator, components) = coherence_models[i] + numerator = 0 + for component in components: + (weight, j) = component + if outputs[i] == outputs[j]: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def score(f, sample, distances): + return coherence([(x, f(x) ^ y) for (x, y) in sample], distances) + +def compute_distances(inputs, distances, scratch): + for i in range(0, len(inputs)): + a = inputs[i] + for j in range(i, len(inputs)): + if i == j: + distances[i][j] = 0 + continue + b = inputs[j] + distance = 2 ** -hamming_distance(a, b, scratch) + distances[i][j] = distance + distances[j][i] = distance + +def update_distances(inputs, distances, i, scratch): + a = inputs[i] + for j in range(0, len(inputs)): + if i == j: + distances[i][j] = 0 + continue + b = inputs[j] + distance = 2 ** -hamming_distance(a, b, scratch) + distances[i][j] = distance + distances[j][i] = distance + +def clone_model(model, p_mutation): + global N, M + + clone = model[:] + p_insert_node = p_mutation + + i = 0 + while i < len(clone): + (bias, op, indices, (p_modify, p_bias, p_index, p_insert)) = clone[i] + + # if random.random() < p_modify: + # p_modify += 0.01 + p_add_index = p_index + indices = indices.copy() + if random.random() < p_bias: + p_bias += 0.001 + bias += random.randint(0, M - 1) + bias %= M + else: + p_bias -= 0.001 + for absolute_index in range(0, N + i): + relative_index = N - absolute_index - 1 + if random.random() < p_add_index: + p_index += 0.001 + if relative_index in indices: + indices.remove(relative_index) + else: + indices.add(relative_index) + else: + p_index -= 0.001 + # else: + # p_modify -= 0.01 + + if random.random() < p_insert: + p_insert += 0.001 + clone.insert(i, random_node(i, p_mutation)) + for j in range(i + 1, len(clone)): + (bias, op, indices, p) = clone[j] + modified_indices = set() + for index in indices: + if index >= 0: + modified_indices.add(index) + continue + absolute_index = j + index + if absolute_index == i: + if random.random() > 0.5: + modified_indices.add(index) + else: + modified_indices.add(index - 1) + continue + if absolute_index < i: + modified_indices.add(index - 1) + else: + modified_indices.add(index) + clone[j] = (bias, op, modified_indices, p) + i += 1 + else: + p_insert -= 0.001 + + p_modify = min(max(0.001, p_modify), 0.999) + p_bias = min(max(0.001, p_bias), 0.999) + p_index = min(max(0.001, p_index), 0.999) + p_insert = min(max(0.001, p_insert), 0.999) + clone[i] = (bias, op, indices, (p_modify, p_bias, p_index, p_insert)) + i += 1 + + if random.random() < p_insert_node: + i = len(clone) + clone.insert(i, random_node(i, p_mutation)) + for j in range(i + 1, len(clone)): + (bias, op, indices, p) = clone[j] + modified_indices = set() + for index in indices: + if index < N: + modified_indices.add(index) + continue + shifted_index = index - N + if shifted_index == i: + if random.randint(0, 1) == 0: + modified_indices.add(index) + else: + modified_indices.add(index + 1) + if shifted_index > i: + modified_indices.add(index + 1) + else: + modified_indices.add(index) + clone[j] = (bias, op, modified_indices, p) + return clone + +def random_node(i, p_mutation): + global N, M + bias = random.randint(0, M - 1) + op = random.randint(0, 1) + p_modify = 0.5 + p_bias = 0.01 + p_index = 0.5 + p_insert = 0.01 + max_index = N + i - 1 + indices = set() + indices.add(N - 1 - random.randint(0, max_index)) + + for index in range(0, max_index + 1): + if random.random() < p_index: + indices.add(N - 1 - index) + return (bias, op, indices, (p_modify, p_bias, p_index, p_insert)) + +def null_candidate(): + global N + return [] + +def eval_model(model, buffer, x): + global N, M + for i in range(0, len(model)): + (bias, op, indices, _) = model[i] + value = op + for index in indices: + if op == 1: + value *= x[index] if index >= 0 else buffer[i + index] + value %= M + else: + value += x[index] if index >= 0 else buffer[i + index] + value %= M + value += bias + value %= M + if i == len(model) - 1: + return value + else: + buffer[i] = value + return 0 + +def size(model): + return len(model) + +def main(): + global N, M + epochs = 10000 + num_survivors = 10 + num_offspring = 10 + num_candidates = num_survivors + num_survivors * num_offspring + sample_size = 64 + eval_size = 100 + max_nodes = 65536 + p_mutation = 0.5 + g = sha + current_generation = [null_candidate() for _ in range(0, num_candidates)] + + distances = np.zeros((sample_size, sample_size)) + output_equality = np.zeros((sample_size, sample_size)) + inputs = random_sample(sample_size, N) + scratch = np.zeros(N,) + # compute_distances(inputs, distances, scratch) + expected_outputs = np.zeros((sample_size,)) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + outputs = np.zeros((sample_size,)) + output_xor = np.zeros((sample_size,)) + ones = np.ones((sample_size,)) + numerators = np.zeros((sample_size,)) + denominators = np.zeros((sample_size,)) + coherences = np.zeros((sample_size,)) + np.matmul(ones, distances, denominators) + scores = np.zeros((num_candidates,)) + eval_buffer = np.zeros((max_nodes,)) + max_score = 0 + last_score = 0 + streak = 0 + + coherence_models = build_coherence_models(inputs, scratch) + + for epoch in range(0, epochs): + for i in range(0, num_candidates): + candidate = current_generation[i] + for j in range(0, sample_size): + outputs[j] = eval_model(candidate, eval_buffer, inputs[j]) + np.subtract(outputs, expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + # for p in range(0, sample_size): + # for q in range(0, sample_size): + # m = int(output_xor[p]) + # n = int(output_xor[q]) + # distance = abs(m - n) + # if distance > M / 2: + # distance = M - distance + # distance /= (M / 2) + # distance **= 2 + # output_equality[p][q] = distance + # # output_equality[p][q] = 1 if m == n else 0 + # np.multiply(output_equality, distances, output_equality) + # np.matmul(ones, output_equality, numerators) + # np.divide(numerators, denominators, coherences) + # score = np.average(coherences) + score = fast_coherence(coherence_models, output_xor) + # if random.random() < 0.1: + # check = coherence(inputs, output_xor, scratch) + # if check - score > 1e-3: + # print('not equal') + scores[i] = score + + top_n = sorted(range(len(scores)), key=lambda i: (scores[i], -size(current_generation[i])))[-num_survivors:] + survivors = [current_generation[index] for index in top_n] + + # f = lambda x: evaluate(current_generation[0], x) + # correct = 0 + # for i in range(0, eval_size): + # x = random_input() + # if f(x) == g(x): + # correct += 1 + + top_score = scores[top_n[-1]] + print(epoch, top_score, size(survivors[-1])) + if top_score <= max_score: + p_mutation += 0.001 + else: + p_mutation = 0.5 + max_score = top_score + + for i in range(0, num_survivors): + current_generation[i] = survivors[i] + + for i in range(0, num_survivors): + candidate = survivors[i] + for j in range(0, num_offspring): + index = num_survivors + j * num_survivors + i + current_generation[index] = clone_model(candidate, random.random()) + + # inputs = random_sample(sample_size, N) + # coherence_models = build_coherence_models(inputs, scratch) + # for i in range(0, sample_size): + # expected_outputs[i] = g(inputs[i]) + + + # # while random.random() < 0.5: + # if last_score == top_score: + # streak += 1 + # else: + # streak = 0 + # if streak >= 4: + # inputs = random_sample(sample_size, N) + # coherence_models = build_coherence_models(inputs, scratch) + # # compute_distances(inputs, distances, scratch) + # # np.matmul(ones, distances, denominators) + # for i in range(0, sample_size): + # expected_outputs[i] = g(inputs[i]) + # streak = 0 + # expected_outputs = np.zeros((sample_size,)) + # for i in range(0, sample_size): + # expected_outputs[i] = g(inputs[i]) + # index = random.randint(0, sample_size - 1) + # update_sample(inputs, index) + # expected_outputs[index] = g(inputs[index]) + # update_distances(inputs, distances, index, scratch) + # np.matmul(ones, distances, denominators) + last_score = top_score + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations4.py b/mutations4.py new file mode 100644 index 0000000..12b8c74 --- /dev/null +++ b/mutations4.py @@ -0,0 +1,591 @@ +import hashlib +import math +from matplotlib import offsetbox +import numpy as np +import random +from struct import pack, pack_into, unpack_from +import secrets + +from numpy import hamming + +N = 32 +M = 2 + +def bit_at_index(buffer, index): + offset = (index >> 3) % len(buffer) + return buffer[offset] & (1 << (index & 0b111)) != 0 + +def count_one_bits(n): + return bin(n).count("1") + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def encode_f(f, buffer, offset=0): + (inverted, flips, child) = f + pack_into('I', buffer, offset, inverted) + offset += 4 + for index in flips: + pack_into('I', buffer, offset, 0) + offset += 4 + pack_into('I', buffer, offset, index) + offset += 4 + if child is None: + pack_into('I', buffer, offset, 1) + offset += 4 + return offset + (inverted, left, right) = child + pack_into('I', buffer, offset, 2 if not inverted else 3) + offset += 4 + offset = encode_f(left, buffer, offset) + offset = encode_f(right, buffer, offset) + return offset + +def generate_random_branch(p_mutation): + global N + + p_add_indices = p_mutation * random.random() + p_add_children = p_mutation * random.random() + + inverted = random.randint(0, 1) + indices = set() + children = [] + + # randomly add indices + while random.random() < p_add_indices and len(indices) < N: + available_indices = [i for i in range(0, N) if i not in indices] + if len(available_indices) == 1: + indices.add(available_indices[0]) + continue + indices.add(available_indices[random.randint(0, len(available_indices) - 1)]) + # randomly add children + while random.random() < p_add_children: + child_inverted = random.randint(0, 1) + left = generate_random_branch(p_add_children) + right = generate_random_branch(p_add_children) + children.append((child_inverted, left, right)) + return (inverted, indices, children) + +def mutate_f(f, p_mutation): + global N + (inverted, indices, children) = f + mutated_indices = set(indices) + mutated_children = children[:] + + p_invert = p_mutation * random.random() + p_drop_indices = p_mutation * random.random() + p_add_indices = p_mutation * random.random() + p_drop_children = p_mutation * random.random() + p_mutate_child = p_mutation * random.random() + p_clone_child = p_mutation * random.random() + p_invert_child = p_mutation * random.random() + p_add_children = p_mutation * random.random() + + # randomly invert + if random.random() < p_invert: + inverted ^= 1 + # randomly drop indices + while random.random() < p_drop_indices and len(mutated_indices) > 0: + mutated_indices.pop() + # randomly add indices + while random.random() < p_add_indices and len(mutated_indices) < N: + available_indices = [i for i in range(0, N) if i not in mutated_indices] + if len(available_indices) == 1: + mutated_indices.add(available_indices[0]) + continue + mutated_indices.add(available_indices[random.randint(0, len(available_indices) - 1)]) + # randomly drop children + while random.random() < p_drop_children and len(mutated_children) > 0: + if len(mutated_children) == 1: + del mutated_children[0] + break + del mutated_children[random.randint(0, len(mutated_children) - 1)] + # randomly clone children + while random.random() < p_clone_child and len(mutated_children) > 0: + index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1) + (child_inverted, left, right) = mutated_children[index] + if random.random() < p_invert_child: + child_inverted ^= 1 + clone = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation)) + mutated_children.append(clone) + # randomly mutate children + while random.random() < p_mutate_child and len(mutated_children) > 0: + index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1) + (child_inverted, left, right) = mutated_children[index] + if random.random() < p_invert_child: + child_inverted ^= 1 + mutated_children[index] = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation)) + # randomly add children + while random.random() < p_add_children: + child_inverted = random.randint(0, 1) + left = generate_random_branch(p_mutation) + right = generate_random_branch(p_mutation) + mutated_children.append((child_inverted, left, right)) + return (inverted, mutated_indices, mutated_children) + +def decode_f(buffer, mutate = False, offset = 0, skip_invert = False): + global N + inverted = 0 + if not skip_invert: + [inverted] = unpack_from('I', buffer, offset) + offset += 4 + # random invert + if mutate and random.random() < 0.01: + inverted ^= 1 + inverted &= 0b1 + flips = set() + # random add flip + while mutate and random.random() < 0.5 and len(flips) < N: + available_indices = [i for i in range(0, N) if i not in flips] + if len(available_indices) == 1: + flips.add(available_indices[0]) + continue + flips.add(available_indices[random.randint(0, len(available_indices) - 1)]) + while offset < len(buffer): + # random create branch + if mutate and random.random() < 0.01: + gate_inverted = random.randint(0, 1) + left = generate_random_branch() + (offset, right) = decode_f(buffer, mutate, offset, True) + return (offset, (inverted, flips, (gate_inverted, left, right))) + [opcode] = unpack_from('I', buffer, offset) + offset += 4 + opcode &= 0b11 + if opcode == 0: + [index] = unpack_from('I', buffer, offset) + offset += 4 + # random skip flip + if mutate and random.random() < 0.01: + continue + if index in flips: + flips.remove(index) + else: + flips.add(index) + elif opcode == 1: + return (offset, (inverted, flips, None)) + else: + (offset, left) = decode_f(buffer, mutate, offset) + (offset, right) = decode_f(buffer, mutate, offset) + gate_inverted = 0 if opcode == 2 else 1 + # random invert + if mutate and random.random() < 0.01: + gate_inverted ^= 1 + # random skip branch + if mutate and random.random() < 0.01: + return (offset, (inverted, flips, None)) + return (offset, (inverted, flips, (gate_inverted, left, right))) + return (offset, (inverted, [], None)) + +def generate_program(model, output_var='output'): + global N, M + (constant, indices, child) = model + + statement = 'multiply(' + np.array2string(indices, separator=',') + ', x, temp)\n\t' + statement += output_var + '=' + str(constant) + '+sum(temp)\n\t' + + if not child is None: + left_output = output_var + '0' + right_output = output_var + '1' + (left, right) = child + statement += generate_program(left, left_output) + statement += generate_program(right, right_output) + statement += output_var + '+=' + left_output + '*' + right_output + '\n\t' + statement += output_var + '%=' + str(M) + '\n\t' + return statement + +def compile(model): + program = 'def f(x, temp):\n\t' + generate_program(model) + 'return output' + scope = {'multiply': np.multiply, 'sum': np.sum} + exec(program, scope) + return scope['f'] + +def evaluate(model, x, value = 0): + (inverted, indices, children) = model + for i in indices: + if bit_at_index(x, i) != 0: + value ^= 1 + for child in children: + (child_inverted, left, right) = child + left = evaluate(left, x) + right = evaluate(right, x) + if left & right != child_inverted: + value ^= 1 + if inverted: + value ^= 1 + return value + +def encode(v): + byte_values = [] + for i in range(0, math.ceil(N / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(x) + +def sha(v): + global M + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] % M + +def xor(x): + num_one_bits = 0 + for n in x: + num_one_bits += count_one_bits(n) + return num_one_bits % 2 + +def random_sample(m, n): + inputs = np.zeros((m, n)) + for i in range(0, m): + for j in range(0, n): + inputs[i][j] = random.randint(0, 1) + return inputs + +def update_sample(sample, index): + global N + for j in range(0, N): + sample[index][j] = random.randint(0, 1) + +def coherence(inputs, outputs, scratch): + coherences = [] + for i in range(0, len(inputs)): + x_a = inputs[i] + y_a = outputs[i] + numerator = 0 + denominator = 0 + for j in range(0, len(inputs)): + if i == j: + continue + x_b = inputs[j] + y_b = outputs[j] + distance = hamming_distance(x_a, x_b, scratch) + weight = 1.0 / (2 ** distance) + denominator += weight + if y_a == y_b: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def build_coherence_models(inputs, scratch): + coherence_models = [] + for i in range(0, len(inputs)): + x_a = inputs[i] + distances = [hamming_distance(x_a, inputs[j], scratch) for j in range(0, len(inputs))] + indices = sorted(range(len(distances)), key=lambda i: distances[i]) + lowest = -1 + denominator = 0 + components = [] + for index in range(0, len(indices)): + j = indices[index] + if distances[j] == 0: + continue + if lowest < 0: + lowest = distances[j] + distance = distances[j] - lowest + if distance >= 8: + break + weight = 2 ** -distance + denominator += weight + components.append((weight, j)) + coherence_models.append((denominator, components)) + return coherence_models + +def fast_coherence(coherence_models, outputs): + coherences = [] + for i in range(0, len(coherence_models)): + (denominator, components) = coherence_models[i] + numerator = 0 + for component in components: + (weight, j) = component + if outputs[i] == outputs[j]: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def score(f, sample, distances): + return coherence([(x, f(x) ^ y) for (x, y) in sample], distances) + +def compute_distances(inputs, distances, scratch): + for i in range(0, len(inputs)): + a = inputs[i] + for j in range(i, len(inputs)): + if i == j: + distances[i][j] = 0 + continue + b = inputs[j] + distance = 2 ** -hamming_distance(a, b, scratch) + distances[i][j] = distance + distances[j][i] = distance + +def update_distances(inputs, distances, i, scratch): + a = inputs[i] + for j in range(0, len(inputs)): + if i == j: + distances[i][j] = 0 + continue + b = inputs[j] + distance = 2 ** -hamming_distance(a, b, scratch) + distances[i][j] = distance + distances[j][i] = distance + +def clone_model(model, p_mutation): + global N, M + + clone = model[:] + p_insert_node = p_mutation * random.random() + + i = 0 + while i < len(clone): + (bias, op, indices, (p_modify, p_bias, p_index)) = clone[i] + p_modify_node = p_modify + + if random.random() < p_modify_node: + p_modify += 0.01 + p_add_index = p_index + p_modify_bias = p_bias + indices = indices.copy() + if random.random() < p_modify_bias: + p_bias += 0.01 + bias += random.randint(0, M - 1) + bias %= M + else: + p_bias -= 0.01 + for index in range(0, N + i): + if random.random() < p_add_index: + p_index += 0.01 + if index in indices: + indices.remove(index) + else: + indices.add(index) + else: + p_index -= 0.01 + else: + p_modify -= 0.01 + + p_modify = min(max(0.01, p_modify), 0.99) + p_bias = min(max(0.01, p_bias), 0.99) + p_index = min(max(0.01, p_index), 0.99) + clone[i] = (bias, op, indices, (p_modify, p_bias, p_index)) + i += 1 + + if random.random() < p_insert_node: + i = random.randint(0, len(clone)) + clone.insert(i, random_node(N + i - 1, p_mutation)) + for j in range(i + 1, len(clone)): + (bias, op, indices, p) = clone[j] + modified_indices = set() + for index in indices: + if index < N: + modified_indices.add(index) + continue + shifted_index = index - N + if shifted_index == i: + if random.randint(0, 1) == 0: + modified_indices.add(index) + else: + modified_indices.add(index + 1) + if shifted_index > i: + modified_indices.add(index + 1) + else: + modified_indices.add(index) + clone[j] = (bias, op, modified_indices, p) + return clone + +def random_node(max_index, p_mutation): + global N + bias = random.randint(0, M - 1) + op = random.randint(0, 1) + p_modify = random.random() + p_bias = random.random() + p_index = random.random() + indices = set() + indices.add(random.randint(0, max_index)) + + p_add_index = p_mutation * random.random() + for index in range(0, max_index): + if random.random() < p_add_index: + indices.add(index) + return (bias, op, indices, (p_modify, p_bias, p_index)) + +def null_candidate(): + global N + return [] + +def encode_tree(tree_model): + stack = [tree_model] + node_indices = {} + index = 0 + while len(stack) > 0: + node = stack.pop() + node_indices[node] = index + index += 1 + (p, bias, value) = node + if isinstance(value, int): + continue + (left, right) = value + stack.append(left) + stack.append(right) + length = index + + stack = [tree_model] + serialized_model = [] + while len(stack) > 0: + node = stack.pop() + (p, bias, value) = node + serialized_model.insert(0, ) + +def eval_model(model, buffer, x): + global N, M + for i in range(0, len(model)): + (bias, op, indices, _) = model[i] + value = op + for index in indices: + if index >= N + i: + print('This should not happen') + if op == 1: + value *= x[index] if index < N else buffer[index - N] + value %= M + else: + value += x[index] if index < N else buffer[index - N] + value %= M + value += bias + value %= M + if i == len(model) - 1: + return value + else: + buffer[i] = value + return 0 + +def size(model): + return len(model) + +def main(): + global N, M + epochs = 10000 + num_survivors = 100 + num_offspring = 10 + num_candidates = num_survivors + num_survivors * num_offspring + sample_size = 64 + eval_size = 100 + max_nodes = 65536 + p_mutation = 0.5 + g = sha + current_generation = [null_candidate() for _ in range(0, num_candidates)] + + distances = np.zeros((sample_size, sample_size)) + output_equality = np.zeros((sample_size, sample_size)) + inputs = random_sample(sample_size, N) + scratch = np.zeros(N,) + # compute_distances(inputs, distances, scratch) + expected_outputs = np.zeros((sample_size,)) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + outputs = np.zeros((sample_size,)) + output_xor = np.zeros((sample_size,)) + ones = np.ones((sample_size,)) + numerators = np.zeros((sample_size,)) + denominators = np.zeros((sample_size,)) + coherences = np.zeros((sample_size,)) + np.matmul(ones, distances, denominators) + scores = np.zeros((num_candidates,)) + eval_buffer = np.zeros((max_nodes,)) + max_score = 0 + last_score = 0 + streak = 0 + + coherence_models = build_coherence_models(inputs, scratch) + + for epoch in range(0, epochs): + for i in range(0, num_candidates): + candidate = current_generation[i] + for j in range(0, sample_size): + outputs[j] = eval_model(candidate, eval_buffer, inputs[j]) + np.subtract(outputs, expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + # for p in range(0, sample_size): + # for q in range(0, sample_size): + # m = int(output_xor[p]) + # n = int(output_xor[q]) + # distance = abs(m - n) + # if distance > M / 2: + # distance = M - distance + # distance /= (M / 2) + # distance **= 2 + # output_equality[p][q] = distance + # # output_equality[p][q] = 1 if m == n else 0 + # np.multiply(output_equality, distances, output_equality) + # np.matmul(ones, output_equality, numerators) + # np.divide(numerators, denominators, coherences) + # score = np.average(coherences) + score = fast_coherence(coherence_models, output_xor) + # if random.random() < 0.1: + # check = coherence(inputs, output_xor, scratch) + # if check - score > 1e-3: + # print('not equal') + scores[i] = score + + top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:] + survivors = [current_generation[index] for index in top_n] + + # f = lambda x: evaluate(current_generation[0], x) + # correct = 0 + # for i in range(0, eval_size): + # x = random_input() + # if f(x) == g(x): + # correct += 1 + + top_score = scores[top_n[-1]] + print(epoch, top_score, size(survivors[-1])) + if top_score <= max_score: + p_mutation += 0.01 + else: + p_mutation = 0.5 + max_score = top_score + + for i in range(0, num_survivors): + current_generation[i] = survivors[i] + + for i in range(0, num_survivors): + candidate = survivors[i] + for j in range(0, num_offspring): + index = num_survivors + j * num_survivors + i + current_generation[index] = clone_model(candidate, random.random() * 0.1) + + inputs = random_sample(sample_size, N) + coherence_models = build_coherence_models(inputs, scratch) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + + # while random.random() < 0.5: + # if last_score == top_score: + # streak += 1 + # else: + # streak = 0 + # if streak >= 4: + # inputs = random_sample(sample_size, N) + # coherence_models = build_coherence_models(inputs, scratch) + # # compute_distances(inputs, distances, scratch) + # # np.matmul(ones, distances, denominators) + # for i in range(0, sample_size): + # expected_outputs[i] = g(inputs[i]) + # streak = 0 + # expected_outputs = np.zeros((sample_size,)) + # for i in range(0, sample_size): + # expected_outputs[i] = g(inputs[i]) + # index = random.randint(0, sample_size - 1) + # update_sample(inputs, index) + # expected_outputs[index] = g(inputs[index]) + # update_distances(inputs, distances, index, scratch) + # np.matmul(ones, distances, denominators) + last_score = top_score + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations5.py b/mutations5.py new file mode 100644 index 0000000..bfda5f2 --- /dev/null +++ b/mutations5.py @@ -0,0 +1,417 @@ +import hashlib +import math +import numpy as np +import random + +N = 8 +M = 2 + +class Candidate: + def __init__(self): + global N + self.bias = 0 + self.offsets = np.zeros((N,)).astype(np.int32) + self.has_child = 0 + self.left = None + self.right = None + + def addOffset(self, x): + self.offsets[x] = 1 + return self + + def setChild(self, left, right): + self.has_child = 1 + self.left = left + self.right = right + return self + +class Probabilities: + def __init__(self): + global N, M + self.p_bias = np.zeros(2,) + self.p_bias.fill(0.5) + self.p_offsets = np.zeros((2,N)) + self.p_offsets.fill(0.5) + self.p_has_child = 0 + + self.bias_coherences = np.zeros((2, M,)) + self.bias_coherences.fill(0.5) + self.offset_coherences = np.zeros((2, M, N)) + self.offset_coherences.fill(0.5) + self.has_child_coherences = np.zeros((2,)) + self.has_child_coherences.fill(0.5) + + self.uncertainty = np.zeros((2,)) + self.totals = np.zeros((2,)) + + self.left = None + self.right = None + self.parent = None + self.depth = 1 + + def reset_uncertainty(self): + if self.totals[0] == 0 and self.totals[1] == 0: + return + self.uncertainty.fill(0) + self.totals.fill(0) + if not self.left is None: + self.left.reset_uncertainty() + if not self.right is None: + self.right.reset_uncertainty() + + def min_p_has_child(self): + without_child = self.uncertainty[0] / self.totals[0] if self.totals[0] > 0 else 0 + with_child = self.uncertainty[1] / self.totals[1] if self.totals[1] > 0 else 0 + + if without_child == 0 and with_child == 0: + return 0.5 + return without_child / (without_child + with_child) + + def confidence(self): + global N + total = (2 * self.p_bias[0] - 1) ** 2 + for i in range(0, N): + total += (2 * self.p_offsets[0][i] - 1) ** 2 + return total / (N + 1) + +def clamp(x, min_value = 0.01, max_value = 1): + return min(max(x, min_value), max_value) + +def encode(v): + global N + byte_values = [] + for i in range(0, math.ceil(N / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +def sha(v): + global M + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] % M + +def xor(x): + num_one_bits = 0 + for i in range(0, len(x)): + if i == 0: + continue + num_one_bits += x[i] + return num_one_bits % 2 + +test_candidate = Candidate().addOffset(0).addOffset(1).setChild( + Candidate().addOffset(2), Candidate().addOffset(3).setChild( + Candidate().addOffset(4), Candidate().addOffset(5) + )) + +def eval_test_candidate(x): + global test_candidate + return evaluate_candidate(test_candidate, x) + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def coherence(inputs, outputs, scratch): + coherences = [] + for i in range(0, len(inputs)): + x_a = inputs[i] + y_a = outputs[i] + numerator = 0 + denominator = 0 + for j in range(0, len(inputs)): + if i == j: + continue + x_b = inputs[j] + y_b = outputs[j] + distance = hamming_distance(x_a, x_b, scratch) + weight = 1.0 / (2 ** distance) + denominator += weight + if y_a == 0 and y_b == 0: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def random_sample(m, n): + inputs = np.zeros((m, n)) + for i in range(0, m): + for j in range(0, n): + inputs[i][j] = random.randint(0, 1) + return inputs + +def evaluate_candidate(candidate, x): + global N, M + value = candidate.bias + for i in range(0, N): + value += x[i] * candidate.offsets[i] + value %= M + if candidate.has_child == 0: + return value + left = evaluate_candidate(candidate.left, x) + right = evaluate_candidate(candidate.right, x) + value += left * right + value %= M + return value + +def evaluate(probabilities, candidate, x, z, update_uncertainty = True): + global N, M + value = candidate.bias + for i in range(0, N): + value += x[i] * candidate.offsets[i] + value %= M + if candidate.has_child == 0: + if update_uncertainty: + if value != z: + probabilities.uncertainty[0] += 1 + probabilities.totals[0] += 1 + return value + e = (value - z) % M + left = evaluate(probabilities.left, candidate.left, x, e, False) + right = evaluate(probabilities.right, candidate.right, x, e, False) + if update_uncertainty: + if e == 0: + if left == 1 and right == 1: + evaluate(probabilities.left, candidate.left, x, e) + evaluate(probabilities.right, candidate.right, x, e) + if left == 0: + evaluate(probabilities.left, candidate.left, x, e) + if right == 0: + evaluate(probabilities.right, candidate.right, x, e) + elif e == 1: + if left == 1 and right == 1: + evaluate(probabilities.left, candidate.left, x, e) + evaluate(probabilities.right, candidate.right, x, e) + if left == 0: + evaluate(probabilities.left, candidate.left, x, e) + if right == 0: + evaluate(probabilities.right, candidate.right, x, e) + value += left * right + value %= M + if update_uncertainty: + if value != z: + probabilities.uncertainty[1] += 1 + probabilities.totals[1] += 1 + return value + +def update_probabilities(probabilities, candidates, scores, depth = 1): + global N, M + num_candidates = len(candidates) + min_p_has_child = probabilities.min_p_has_child() + + for z in range(0, 2): + for i in range(0, M): + bias_i_max = 0 + for k in range(0, num_candidates): + candidate = candidates[k] + if candidate is None: + continue + if candidate.bias != i: + continue + if candidate.has_child != z: + continue + bias_i_max = max(bias_i_max, scores[k]) + if bias_i_max == 0: + continue + probabilities.bias_coherences[z][i] = 0.9 * probabilities.bias_coherences[z][i] + 0.1 * bias_i_max + + for z in range(0, 2): + for i in range(0, M): + for j in range(0, N): + offset_ij_max = 0 + for k in range(0, num_candidates): + candidate = candidates[k] + if candidate is None: + continue + if candidate.offsets[j] != i: + continue + if candidate.has_child != z: + continue + offset_ij_max = max(offset_ij_max, scores[k]) + if offset_ij_max == 0: + continue + probabilities.offset_coherences[z][i][j] = 0.9 * probabilities.offset_coherences[z][i][j] + 0.1 * offset_ij_max + + for i in range(0, 2): + has_child_i_max = 0 + for k in range(0, num_candidates): + candidate = candidates[k] + if candidate is None: + continue + if candidate.has_child != i: + continue + has_child_i_max = max(has_child_i_max, scores[k]) + if has_child_i_max == 0: + continue + probabilities.has_child_coherences[i] = 0.9 * probabilities.has_child_coherences[i] + 0.1 * has_child_i_max + + + for z in range(0, 2): + # direction = 1 if z == 0 and probabilities.has_child_coherences[0] > probabilities.has_child_coherences[1] or z == 1 and probabilities.has_child_coherences[1] > probabilities.has_child_coherences[0] else -1 + direction = 1 + p_bias_next = clamp(probabilities.p_bias[z] + direction * (probabilities.bias_coherences[z][1] - probabilities.bias_coherences[z][0]), 0, 1) + # if z == 0 and probabilities.has_child_coherences[0] < probabilities.has_child_coherences[1] or z == 1 and probabilities.has_child_coherences[0] > probabilities.has_child_coherences[1]: + # p_bias_next = 0.5 + probabilities.p_bias[z] = 0.9 * probabilities.p_bias[z] + 0.1 * p_bias_next + for j in range(0, N): + p_offset_next = clamp(probabilities.p_offsets[z][j] + direction * (probabilities.offset_coherences[z][1][j] - probabilities.offset_coherences[z][0][j]), 0, 1) + # if z == 0 and probabilities.has_child_coherences[0] < probabilities.has_child_coherences[1] or z == 1 and probabilities.has_child_coherences[0] > probabilities.has_child_coherences[1]: + # p_offset_next = 0.5 + probabilities.p_offsets[z][j] = 0.9 * probabilities.p_offsets[z][j] + 0.1 * p_offset_next + + # direction = 1 if probabilities.parent is None or probabilities.parent.has_child_coherences[1] > probabilities.parent.has_child_coherences[0] else -1 + direction = 1 + # p_has_child_next = clamp(probabilities.p_has_child + direction * (probabilities.has_child_coherences[1] - probabilities.has_child_coherences[0]), probabilities.min_p_has_child(), 1) + # probabilities.p_has_child = 0.9 * probabilities.p_has_child + 0.1 * + if probabilities.confidence() > 0.9 and probabilities.p_has_child == 0: + probabilities.p_bias[0] = round(probabilities.p_bias[0]) + for i in range(0, N): + probabilities.p_offsets[0][i] = round(probabilities.p_offsets[0][i]) + probabilities.p_has_child = 1 + + # if probabilities.has_child_coherences[0] > probabilities.has_child_coherences[1]: + # return + + p_left = probabilities.left + p_right = probabilities.right + if not p_left is None: + left = [candidate.left if not candidate is None and candidate.has_child else None for candidate in candidates] + if any(x is not None for x in left): + update_probabilities(p_left, left, scores, depth + 1) + if not p_right is None: + right = [candidate.right if not candidate is None and candidate.has_child else None for candidate in candidates] + if any(x is not None for x in right): + update_probabilities(p_right, right, scores, depth + 1) + + +def create_candidate(probabilities, candidate): + global N + new_children = 0 + z = 1 if random.random() < probabilities.p_has_child and probabilities.depth <= 4 else 0 + candidate.bias = 1 if random.random() < probabilities.p_bias[0] else 0 + for i in range(0, N): + candidate.offsets[i] = 1 if random.random() < probabilities.p_offsets[0][i] else 0 + if not z: + candidate.has_child = 0 + return new_children + if probabilities.p_has_child < 1: + new_children += 1 + candidate.has_child = 1 + if candidate.left is None: + candidate.left = Candidate() + if candidate.right is None: + candidate.right = Candidate() + depth = probabilities.depth + 1 + if probabilities.left is None: + probabilities.left = Probabilities() + probabilities.left.parent = probabilities + probabilities.left.depth = depth + # probabilities.left.p_has_child = 2 ** -depth + if probabilities.right is None: + probabilities.right = Probabilities() + probabilities.right.parent = probabilities + probabilities.right.depth = depth + # probabilities.right.p_has_child = 2 ** -depth + new_children += create_candidate(probabilities.left, candidate.left) + new_children += create_candidate(probabilities.right, candidate.right) + return new_children + +def copy_candidate(src, dest): + global N + dest.bias = src.bias + for i in range(0, N): + dest.offsets[i] = src.offsets[i] + has_child = src.has_child + dest.has_child = has_child + if not has_child: + return + if dest.left is None: + dest.left = Candidate() + if dest.right is None: + dest.right = Candidate() + copy_candidate(src.left, dest.left) + copy_candidate(src.right, dest.right) + +def p(x): + return math.ceil(x * 100) / 100 + +def p_a(x): + return [p(z) for z in x] + +def print_probabilities(probabilities, depth=0): + global M + if depth == 0: + print('=====================') + left = probabilities.left + right = probabilities.right + if left is None: + print('None') + else: + print_probabilities(left, depth + 1) + if right is None: + print('None') + else: + print_probabilities(right, depth + 1) + for z in range(0, 2): + # for i in range(0, M): + # print(z, i, p(probabilities.bias_coherences[z][i]), p_a(probabilities.offset_coherences[z][i]), p(probabilities.has_child_coherences[i])) + print(depth, z, p(probabilities.p_bias[z]), p_a(probabilities.p_offsets[z]), p(probabilities.p_has_child), p(probabilities.confidence())) + if depth == 0: + print('=====================') + +def main(): + global N, M + sample_size = 64 + num_candidates = 100 + num_survivors = 10 + epochs = 1000 + output_xor = np.zeros(sample_size,) + scratch = np.zeros(N,) + g = eval_test_candidate + expected_outputs = np.zeros((sample_size,)) + inputs = random_sample(sample_size, N) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + outputs = np.zeros((sample_size,)) + probabilities = Probabilities() + candidates = [Candidate() for _ in range(0, num_candidates + num_survivors)] + scores = np.zeros((num_candidates + num_survivors,)) + + while True: + max_new_children = 0 + min_new_children = 1e6 + probabilities.reset_uncertainty() + for i in range(0, len(candidates)): + candidate = candidates[i] + if i < num_candidates: + create_candidate(probabilities, candidate) + for j in range(0, sample_size): + outputs[j] = evaluate(probabilities, candidate, inputs[j], expected_outputs[j]) + np.subtract(outputs, expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + scores[i] = coherence(inputs, output_xor, scratch) + update_probabilities(probabilities, candidates, scores) + print_probabilities(probabilities) + print(np.max(scores)) + + top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:] + for i in range(0, num_survivors): + src_index = top_n[i] + dest_index = num_candidates + i + if src_index == dest_index: + continue + copy_candidate(candidates[src_index], candidates[dest_index]) + + inputs = random_sample(sample_size, N) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations6.py b/mutations6.py new file mode 100644 index 0000000..a3acb63 --- /dev/null +++ b/mutations6.py @@ -0,0 +1,488 @@ +from enum import unique +import hashlib +import math +import numpy as np +import random +import time + +N = 8 +M = 2 + +def timeit(f): + def timed(*args, **kw): + ts = time.time() + result = f(*args, **kw) + te = time.time() + + print('func:%r took: %2.4f sec' % (f.__name__, te-ts)) + return result + return timed + +def vec_to_int(bias, x): + global N + z = bias + for i in range(0, N): + z <<= 1 + z |= x[i] + return z + +class Candidate: + def __init__(self, layer): + global N + self.layer = layer + self.node_count = 2 ** layer + self.bias = np.zeros((self.node_count,)).astype(np.int32) + self.offsets = np.zeros((self.node_count, N)).astype(np.int32) + + def normalize(self): + global N + if self.node_count < 2: + return + # pairs of two must be in order + for i in range(0, self.node_count, 2): + left_id = vec_to_int(self.bias[i], self.offsets[i]) + right_id = vec_to_int(self.bias[i + 1], self.offsets[i + 1]) + if left_id > right_id: + temp = self.bias[i] + self.bias[i] = self.bias[i + 1] + self.bias[i + 1] = temp + for j in range(0, N): + temp = self.offsets[i][j] + self.offsets[i][j] = self.offsets[i + 1][j] + self.offsets[i + 1][j] = temp + +class Probabilities: + def __init__(self, layer): + global N + self.layer = layer + self.node_count = 2 ** layer + self.p_bias = np.zeros((self.node_count,)) + self.p_bias.fill(0.5) + self.p_offsets = np.zeros((self.node_count, N)) + self.p_offsets.fill(0.5) + + self.bias_coherences = np.zeros((2, self.node_count,)) + self.bias_coherences.fill(0.5) + self.offset_coherences = np.zeros((2, self.node_count, N)) + self.offset_coherences.fill(0.5) + + def inertia(self): + global N + total = 0 + for i in range(0, self.node_count): + if self.p_bias[i] > 1e-2 and self.p_bias[i] < (1 - 1e-2): + total += abs(self.bias_coherences[1][i] - self.bias_coherences[0][i]) + for j in range(0, N): + if self.p_offsets[i][j] > 1e-2 and self.p_offsets[i][j] < (1 - 1e-2): + total += abs(self.offset_coherences[1][i][j] - self.offset_coherences[0][i][j]) + return total + + def has_converged(self): + global N + for i in range(0, self.node_count): + for j in range(0, N): + if self.p_offsets[i][j] > 1e-2 and self.p_offsets[i][j] < 1 - 1e-2: + return False + return True + + def confidence(self): + global N + total = 0 + for i in range(0, self.node_count): + total += (2 * self.p_bias[i] - 1) ** 2 + for j in range(0, N): + total += (2 * self.p_offsets[i][j] - 1) ** 2 + return total / ((N + 1) * self.node_count) + + def flatten(self): + candidate = Candidate(self.layer) + for i in range(0, self.node_count): + force_zero = True + if self.node_count > 1: + k = i ^ 0b1 + if self.p_bias[k] > 1e-2: + force_zero = False + if force_zero: + for j in range(0, N): + if self.p_offsets[k][j] > 1e-2: + force_zero = False + break + else: + force_zero = False + + candidate.bias[i] = 1 if not force_zero and self.p_bias[i] >= (1 - 1e-2) else 0 + for j in range(0, N): + candidate.offsets[i][j] = 1 if not force_zero and self.p_offsets[i][j] >= (1 - 1e-2) else 0 + return candidate + +def clamp(x, min_value = 0.01, max_value = 1): + return min(max(x, min_value), max_value) + +def encode(v): + global N + byte_values = [] + for i in range(0, math.ceil(N / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +# 00100111 x4 +# 00000110 x1 +def sha(v): + global M + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] % M + +def xor(x): + num_one_bits = 0 + for i in range(0, len(x)): + if i == 0: + continue + num_one_bits += x[i] + return num_one_bits % 2 + +def test_fn(x): + # 0 1 + # 2 | 3 + # 4 | 5 | 6 | 7 + # | | 0 | 7 | | | | + return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7]))) + +def candidate_fn(x): + return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2]) + +def true_fn(x): + return x[0] ^ x[1] ^ (x[3] * x[2]) + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def coherence(outputs, distances): + coherences = [] + for i in range(0, len(outputs)): + y_a = outputs[i] + numerator = 0 + denominator = 0 + for j in range(0, len(outputs)): + if i == j: + continue + y_b = outputs[j] + weight = distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def random_sample(m, n): + inputs = np.zeros((m, n)).astype(np.int32) + for i in range(0, m): + for j in range(0, n): + inputs[i][j] = random.randint(0, 1) + return inputs + +def populate_distances(inputs, distances, scratch): + for i in range(0, len(inputs)): + x_a = inputs[i] + for j in range(0, len(inputs)): + if i == j: + continue + x_b = inputs[j] + distance = hamming_distance(x_a, x_b, scratch) + distances[i][j] = 1.0 / (2 ** distance) + +def populate_layers_scratch(layers, x, layers_scratch, compute_scratch): + layers_scratch[0].fill(0) + for i in range(1, len(layers_scratch)): + scratch = layers_scratch[i] + layer = layers[i - 1] + for j in range(0, layer.node_count): + value = layer.bias[j] + np.multiply(layer.offsets[j], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + left = layers_scratch[i - 1][j * 2] + right = layers_scratch[i - 1][j * 2 + 1] + value ^= left * right + scratch[j] = value + return layers_scratch[-1][0] + +def evaluate_cached(candidate, x, layers_scratch, layers_scratch_base, compute_scratch): + global N + maybe_evaluate = set() + for j in range(0, candidate.node_count, 2): + value = candidate.bias[j] + np.multiply(candidate.offsets[j], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + layers_scratch[0][j] = value + if candidate.node_count > 1: + value = candidate.bias[j + 1] + np.multiply(candidate.offsets[j + 1], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + layers_scratch[0][j + 1] = value + if layers_scratch[0][j] == 1 and layers_scratch[0][j + 1] == 1: + maybe_evaluate.add(int(j / 2)) + + for i in range(1, len(layers_scratch)): + np.copyto(layers_scratch[i], layers_scratch_base[i]) + maybe_evaluate_next = set() + for j in maybe_evaluate: + left = layers_scratch[i - 1][j * 2] + right = layers_scratch[i - 1][j * 2 + 1] + child_value = left * right + left_base = layers_scratch_base[i - 1][j * 2] + right_base = layers_scratch_base[i - 1][j * 2 + 1] + child_base_value = left_base * right_base + if child_value != child_base_value: + layers_scratch[i][j] ^= 1 + maybe_evaluate_next.add(int(j / 2)) + maybe_evaluate = maybe_evaluate_next + return layers_scratch[-1][0] + +def evaluate(layers, candidate, x, layers_scratch, compute_scratch): + global N + for i in range(0, len(layers_scratch)): + scratch = layers_scratch[i] + if i == 0: + for j in range(0, candidate.node_count): + value = candidate.bias[j] + np.multiply(candidate.offsets[j], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + scratch[j] = value + else: + layer = layers[i - 1] + for j in range(0, layer.node_count): + value = layer.bias[j] + np.multiply(layer.offsets[j], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + left = layers_scratch[i - 1][j * 2] + right = layers_scratch[i - 1][j * 2 + 1] + value ^= left * right + scratch[j] = value + return layers_scratch[-1][0] + +@timeit +def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch): + global M, N + scores.fill(0) + unique_candidates = {} + for j in range(0, num_candidates): + create_candidate(probabilities, candidates[j]) + unique_candidates[candidate_str(candidates[j])] = j + + for i in range(0, sample_size): + populate_layers_scratch(layers, inputs[i], layers_scratch_base, int_scratch) + for _, j in unique_candidates.items(): + candidate = candidates[j] + outputs[j][i] = evaluate_cached(candidate, inputs[i], layers_scratch, layers_scratch_base, int_scratch) + # if outputs[j][i] != evaluate(layers, candidate, inputs[i], layers_scratch, int_scratch): + # print('Uh-oh') + for _, j in unique_candidates.items(): + candidate = candidates[j] + np.subtract(outputs[j], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + scores[j] = coherence(output_xor, distances) + +@timeit +def update_probabilities(probabilities, candidates, scores): + global N + num_candidates = len(candidates) + + for i in range(0, 2): + for j in range(0, probabilities.node_count): + bias_max = 0 + bias_sum = 0 + bias_count = 0 + for p in range(0, num_candidates): + candidate = candidates[p] + if candidate.bias[j] != i: + continue + if scores[p] == 0: + continue + bias_max = max(bias_max, scores[p]) + bias_sum += scores[p] + bias_count += 1 + if bias_max == 0: + continue + # weight = bias_count / num_candidates + weight = 0.1 + bias_avg = bias_sum / bias_count + probabilities.bias_coherences[i][j] = (1.0 - weight) * probabilities.bias_coherences[i][j] + weight * bias_max + # probabilities.bias_coherences[i][j] = bias_max + + for i in range(0, 2): + for j in range(0, probabilities.node_count): + for k in range(0, N): + offset_max = 0 + offset_sum = 0 + offset_count = 0 + for p in range(0, num_candidates): + candidate = candidates[p] + if candidate.offsets[j][k] != i: + continue + if scores[p] == 0: + continue + offset_max = max(offset_max, scores[p]) + offset_sum += scores[p] + offset_count += 1 + if offset_max == 0: + continue + # weight = offset_count / num_candidates + weight = 0.1 + offset_avg = offset_sum / offset_count + probabilities.offset_coherences[i][j][k] = (1.0 - weight) * probabilities.offset_coherences[i][j][k] + weight * offset_max + # probabilities.offset_coherences[i][j][k] = offset_max + + for j in range(0, probabilities.node_count): + base_delta = probabilities.bias_coherences[1][j] - probabilities.bias_coherences[0][j] + delta = base_delta + q = j ^ 0b1 + if probabilities.node_count > 1: + q_delta = probabilities.bias_coherences[1][q] - probabilities.bias_coherences[0][q] + if base_delta > 0 and q_delta > 0: + delta -= 0.5 * q_delta + + p_bias_next = clamp(probabilities.p_bias[j] + delta, 0, 1) + probabilities.p_bias[j] = 0.9 * probabilities.p_bias[j] + 0.1 * p_bias_next + for k in range(0, N): + base_delta = probabilities.offset_coherences[1][j][k] - probabilities.offset_coherences[0][j][k] + delta = base_delta + if probabilities.node_count > 1: + q_delta = probabilities.offset_coherences[1][q][k] - probabilities.offset_coherences[0][q][k] + if base_delta > 0 and q_delta > 0: + delta -= 0.5 * q_delta + + p_offset_next = clamp(probabilities.p_offsets[j][k] + delta, 0, 1) + probabilities.p_offsets[j][k] = 0.9 * probabilities.p_offsets[j][k] + 0.1 * p_offset_next + +def create_candidate(probabilities, candidate): + global N + for i in range(0, probabilities.node_count): + candidate.bias[i] = 1 if random.random() < probabilities.p_bias[i] else 0 + # candidate.bias[i] = 0 + for j in range(0, N): + candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0 + # candidate.normalize() + +def copy_candidate(src, dest): + global N + for i in range(0, src.node_count): + dest.bias[i] = src.bias[i] + for i in range(0, src.node_count): + for j in range(0, N): + dest.offsets[i][j] = src.offsets[i][j] + +def p(x): + return math.ceil(x * 100) / 100 + +def p_a(x): + return [p(z) for z in x] + +def print_probabilities(probabilities): + print('=====================') + for i in range(0, probabilities.node_count): + print(i, p(probabilities.p_bias[i]), p_a(probabilities.p_offsets[i])) + print('=====================') + +def candidate_str(candidate): + global N + build_str = '' + for i in range(0, candidate.node_count): + build_str += str(candidate.bias[i]) + for j in range(0, N): + build_str += str(candidate.offsets[i][j]) + return build_str + +def main(): + global N, M + sample_size = 64 + num_candidates = 100 + num_survivors = 8 + output_xor = np.zeros(sample_size,) + scratch = np.zeros((N,)) + int_scratch = np.zeros((N,)).astype(np.int32) + g = sha + expected_outputs = np.zeros((sample_size,)) + inputs = random_sample(sample_size, N) + distances = np.zeros((sample_size, sample_size)) + populate_distances(inputs, distances, scratch) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + outputs = np.zeros((num_candidates + num_survivors, sample_size,)) + scores = np.zeros((num_candidates + num_survivors,)) + + layers = [] + layers_scratch = [np.zeros(1, ).astype(np.int32)] + layers_scratch_base = [np.zeros(1, ).astype(np.int32)] + layer = 0 + + # for i in range(0, sample_size): + # outputs[0][i] = candidate_fn(inputs[i]) + + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + # print(score) + + # for i in range(0, sample_size): + # outputs[0][i] = true_fn(inputs[i]) + + # np.subtract(outputs[0], expected_outputs, output_xor) + # np.mod(output_xor, M, output_xor) + # score = coherence(output_xor, distances) + # print(score) + # return + + while score < 1: + probabilities = Probabilities(layer) + candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)] + inertia = 1 + while inertia > 1e-2: + compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch) + update_probabilities(probabilities, candidates, scores) + inertia = 0.9 * inertia + 0.1 * probabilities.inertia() + + print_probabilities(probabilities) + for candidate in layers: + print(candidate.bias, candidate.offsets) + print(np.max(scores), probabilities.inertia(), inertia) + + top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:] + + for i in range(0, num_survivors): + src_index = top_n[i] + dest_index = num_candidates + i + if src_index == dest_index: + continue + src = candidates[src_index] + dest = candidates[dest_index] + candidates[dest_index] = src + candidates[src_index] = dest + + inputs = random_sample(sample_size, N) + populate_distances(inputs, distances, scratch) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + + candidate = probabilities.flatten() + for j in range(0, sample_size): + outputs[0][j] = evaluate(layers, candidate, inputs[j], layers_scratch, int_scratch) + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + + layers.insert(0, candidate) + layer += 1 + layers_scratch.insert(0, np.zeros(2 ** layer,).astype(np.int32)) + layers_scratch_base.insert(0, np.zeros(2 ** layer,).astype(np.int32)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations7.py b/mutations7.py new file mode 100644 index 0000000..3bd296d --- /dev/null +++ b/mutations7.py @@ -0,0 +1,455 @@ +from enum import unique +import hashlib +import math +import numpy as np +import random +import time + +N = 8 +M = 2 + +def vec_to_int(x): + global N + z = 0 + for i in range(0, N + 1): + z <<= 1 + z |= x[i] + return z + +def timeit(f): + def timed(*args, **kw): + ts = time.time() + result = f(*args, **kw) + te = time.time() + + print('func:%r took: %2.4f sec' % (f.__name__, te-ts)) + return result + return timed + +class Candidate: + def __init__(self, layer): + global N + self.layer = layer + self.node_count = 2 ** layer + self.offsets = np.zeros((self.node_count, N + 1)).astype(np.int32) + +class Probabilities: + def __init__(self, layer): + global N + self.layer = layer + self.node_count = 2 ** layer + self.p_offsets = np.zeros((self.node_count, N + 1)) + self.p_offsets.fill(0.5) + self.offset_coherences = np.zeros((2, self.node_count, N + 1, 2, self.node_count, N + 1)) + self.offset_coherences.fill(-1) + + def inertia(self): + global N + total = 0 + for i in range(0, self.node_count): + for j in range(0, N + 1): + if self.p_offsets[i][j] > 1e-2 and self.p_offsets[i][j] < (1 - 1e-2): + total += abs(self.offset_coherences[1][i][j][1][i][j] - self.offset_coherences[0][i][j][0][i][j]) + return total + + def flatten(self): + candidate = Candidate(self.layer) + for i in range(0, self.node_count): + for j in range(0, N + 1): + candidate.offsets[i][j] = 1 if self.p_offsets[i][j] >= 0.5 else 0 + if self.node_count > 1: + for i in range(0, self.node_count): + if not candidate.offsets[i].any(): + q = i ^ 0b1 + candidate.offsets[q].fill(0) + return candidate + +def clamp(x, min_value = 0.01, max_value = 1): + return min(max(x, min_value), max_value) + +def encode(v): + global N + byte_values = [] + for i in range(0, math.ceil(N / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +# 00100111 x4 +# 00000110 x1 +def sha(v): + global M + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] % M + +def xor(x): + num_one_bits = 0 + for i in range(0, len(x)): + if i == 0: + continue + num_one_bits += x[i] + return num_one_bits % 2 + +def test_fn(x): + # 0 1 + # 2 | 3 + # 4 | 5 | 6 | 7 + # | | 0 | 7 | | | | + return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7]))) + +def candidate_fn(x): + return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2]) + +def true_fn(x): + return x[0] ^ x[1] ^ (x[3] * x[2]) + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def coherence(outputs, distances): + coherences = [] + for i in range(0, len(outputs)): + y_a = outputs[i] + numerator = 0 + denominator = 0 + for j in range(0, len(outputs)): + if i == j: + continue + y_b = outputs[j] + weight = distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def random_sample(m, n): + inputs = np.zeros((m, n + 1)).astype(np.int32) + for i in range(0, m): + for j in range(0, n): + inputs[i][j] = random.randint(0, 1) + inputs[i][n] = 1 + return inputs + +def populate_distances(inputs, distances, scratch): + for i in range(0, len(inputs)): + x_a = inputs[i] + for j in range(0, len(inputs)): + if i == j: + continue + x_b = inputs[j] + distance = hamming_distance(x_a, x_b, scratch) + distances[i][j] = 1.0 / (2 ** distance) + +def populate_layers_scratch(layers, x, layers_scratch, compute_scratch): + layers_scratch[0].fill(0) + for i in range(1, len(layers_scratch)): + scratch = layers_scratch[i] + layer = layers[i - 1] + for j in range(0, layer.node_count): + value = 0 + np.multiply(layer.offsets[j], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + left = layers_scratch[i - 1][j * 2] + right = layers_scratch[i - 1][j * 2 + 1] + value ^= left * right + scratch[j] = value + return layers_scratch[-1][0] + +def evaluate_cached(candidate, x, layers_scratch, layers_scratch_base, compute_scratch): + global N + maybe_evaluate = set() + for j in range(0, candidate.node_count, 2): + value = 0 + np.multiply(candidate.offsets[j], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + layers_scratch[0][j] = value + if candidate.node_count > 1: + value = 0 + np.multiply(candidate.offsets[j + 1], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + layers_scratch[0][j + 1] = value + if layers_scratch[0][j] == 1 and layers_scratch[0][j + 1] == 1: + maybe_evaluate.add(int(j / 2)) + + for i in range(1, len(layers_scratch)): + np.copyto(layers_scratch[i], layers_scratch_base[i]) + maybe_evaluate_next = set() + for j in maybe_evaluate: + left = layers_scratch[i - 1][j * 2] + right = layers_scratch[i - 1][j * 2 + 1] + child_value = left * right + left_base = layers_scratch_base[i - 1][j * 2] + right_base = layers_scratch_base[i - 1][j * 2 + 1] + child_base_value = left_base * right_base + if child_value != child_base_value: + layers_scratch[i][j] ^= 1 + maybe_evaluate_next.add(int(j / 2)) + maybe_evaluate = maybe_evaluate_next + return layers_scratch[-1][0] + +def evaluate(layers, candidate, x, layers_scratch, compute_scratch): + global N + for i in range(0, len(layers_scratch)): + scratch = layers_scratch[i] + if i == 0: + for j in range(0, candidate.node_count): + value = 0 + np.multiply(candidate.offsets[j], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + scratch[j] = value + else: + layer = layers[i - 1] + for j in range(0, layer.node_count): + value = 0 + np.multiply(layer.offsets[j], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + left = layers_scratch[i - 1][j * 2] + right = layers_scratch[i - 1][j * 2 + 1] + value ^= left * right + scratch[j] = value + return layers_scratch[-1][0] + +@timeit +def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch): + global M, N + scores.fill(0) + unique_candidates = {} + for j in range(0, num_candidates): + create_candidate(probabilities, candidates[j]) + unique_candidates[candidate_str(candidates[j])] = j + + for i in range(0, sample_size): + populate_layers_scratch(layers, inputs[i], layers_scratch_base, int_scratch) + for _, j in unique_candidates.items(): + candidate = candidates[j] + outputs[j][i] = evaluate_cached(candidate, inputs[i], layers_scratch, layers_scratch_base, int_scratch) + # if outputs[j][i] != evaluate(layers, candidate, inputs[i], layers_scratch, int_scratch): + # print('Uh-oh') + for _, j in unique_candidates.items(): + candidate = candidates[j] + np.subtract(outputs[j], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + scores[j] = coherence(output_xor, distances) + +@timeit +def update_probabilities(probabilities, candidates, inputs, scores): + global N + num_candidates = len(candidates) + + variance = np.zeros((N + 1,)) + for x in inputs: + variance += x + + probabilities.offset_coherences.fill(-1) + for p in range(0, num_candidates): + candidate = candidates[p] + score = scores[p] + if score == 0: + continue + for j in range(0, probabilities.node_count): + for k in range(0, N + 1): + i = candidate.offsets[j][k] + for m in range(0, probabilities.node_count): + for n in range(0, N + 1): + l = candidate.offsets[m][n] + probabilities.offset_coherences[i][j][k][l][m][n] = max(score, probabilities.offset_coherences[i][j][k][l][m][n]) + + # for i in range(0, 2): + # for j in range(0, probabilities.node_count): + # for k in range(0, N + 1): + # for l in range(0, 2): + # for m in range(0, probabilities.node_count): + # for n in range(0, N + 1): + # offset_max = 0 + # offset_sum = 0 + # offset_count = 0 + # for p in range(0, num_candidates): + # candidate = candidates[p] + # if candidate.offsets[j][k] != i: + # continue + # if candidate.offsets[m][n] != l: + # continue + # if scores[p] == 0: + # continue + # offset_max = max(offset_max, scores[p]) + # offset_sum += scores[p] + # offset_count += 1 + # if offset_max == 0: + # continue + # probabilities.offset_coherences[i][j][k][l][m][n] = offset_max + + p_offsets_next = np.zeros((probabilities.node_count, N + 1)) + inertia = 0 + for j in range(0, probabilities.node_count): + for k in range(0, N + 1): + delta = 0 + count = 0 + for m in range(0, probabilities.node_count): + for n in range(0, N + 1): + if j == m and k == n: + continue + # confidence = variance[k] * variance[n] / (len(inputs) ** 2) + confidence = 1.0 + p_j1_if_m0 = probabilities.offset_coherences[1][j][k][0][m][n] + p_j0_if_m0 = probabilities.offset_coherences[0][j][k][0][m][n] + p_j1_if_m1 = probabilities.offset_coherences[1][j][k][1][m][n] + p_j0_if_m1 = probabilities.offset_coherences[0][j][k][1][m][n] + if p_j1_if_m0 >= 0 and p_j0_if_m0 >= 0: + delta_if_m0 = p_j1_if_m0 - p_j0_if_m0 + delta += delta_if_m0 * (1.0 - probabilities.p_offsets[m][n]) * confidence + count += 1 + if p_j1_if_m1 >= 0 and p_j0_if_m1 >= 0: + delta_if_m1 = p_j1_if_m1 - p_j0_if_m1 + delta += delta_if_m1 * probabilities.p_offsets[m][n] * confidence + count += 1 + if count > 0: + delta /= count + p_offsets_next[j][k] = clamp(probabilities.p_offsets[j][k] + delta, 0, 1) + inertia += abs(p_offsets_next[j][k] - probabilities.p_offsets[j][k]) + + for j in range(0, probabilities.node_count): + for k in range(0, N + 1): + p_offset_next = p_offsets_next[j][k] + probabilities.p_offsets[j][k] = 0.9 * probabilities.p_offsets[j][k] + 0.1 * p_offset_next + + # if probabilities.node_count > 1: + # for j in range(0, probabilities.node_count): + # q = j ^ 0b1 + # for k in range(0, N + 1): + # if probabilities.p_offsets[j][k] > 0.5: + # probabilities.p_offsets[q][k] = min(probabilities.p_offsets[q][k], 1 - probabilities.p_offsets[j][k]) + + return inertia + +def create_candidate(probabilities, candidate): + global N + for i in range(0, probabilities.node_count): + for j in range(0, N + 1): + candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0 + +def copy_candidate(src, dest): + global N + for i in range(0, src.node_count): + for j in range(0, N + 1): + dest.offsets[i][j] = src.offsets[i][j] + +def p(x): + return math.ceil(x * 100) / 100 + +def p_a(x): + return [p(z) for z in x] + +def print_probabilities(probabilities): + print('=====================') + for i in range(0, probabilities.node_count): + print(i, p_a(probabilities.p_offsets[i])) + print('=====================') + +def candidate_str(candidate): + global N + build_str = '' + for i in range(0, candidate.node_count): + for j in range(0, N + 1): + build_str += str(candidate.offsets[i][j]) + return build_str + +def main(): + global N, M + sample_size = 64 + num_candidates = 100 + num_survivors = 8 + output_xor = np.zeros(sample_size,) + scratch = np.zeros((N + 1,)) + int_scratch = np.zeros((N + 1,)).astype(np.int32) + g = test_fn + expected_outputs = np.zeros((sample_size,)) + inputs = random_sample(sample_size, N) + distances = np.zeros((sample_size, sample_size)) + populate_distances(inputs, distances, scratch) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + outputs = np.zeros((num_candidates + num_survivors, sample_size,)) + scores = np.zeros((num_candidates + num_survivors,)) + + layers = [] + layers_scratch = [np.zeros(1, ).astype(np.int32)] + layers_scratch_base = [np.zeros(1, ).astype(np.int32)] + layer = 0 + + # for i in range(0, sample_size): + # outputs[0][i] = candidate_fn(inputs[i]) + + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + # print(score) + + # for i in range(0, sample_size): + # outputs[0][i] = true_fn(inputs[i]) + + # np.subtract(outputs[0], expected_outputs, output_xor) + # np.mod(output_xor, M, output_xor) + # score = coherence(output_xor, distances) + # print(score) + # return + + while score < 1: + probabilities = Probabilities(layer) + candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)] + inertia = 1 + while inertia > 0.01: + compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch) + round_inertia = update_probabilities(probabilities, candidates, inputs, scores) + inertia = 0.9 * inertia + 0.1 * round_inertia + + print_probabilities(probabilities) + for candidate in layers: + print(candidate.offsets) + print(np.max(scores), round_inertia, inertia) + + top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:] + + for i in range(0, num_survivors): + src_index = top_n[i] + dest_index = num_candidates + i + if src_index == dest_index: + continue + src = candidates[src_index] + dest = candidates[dest_index] + candidates[dest_index] = src + candidates[src_index] = dest + + inputs = random_sample(sample_size, N) + populate_distances(inputs, distances, scratch) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + + candidate = probabilities.flatten() + for j in range(0, sample_size): + outputs[0][j] = evaluate(layers, candidate, inputs[j], layers_scratch, int_scratch) + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + + layers.insert(0, candidate) + layer += 1 + layers_scratch.insert(0, np.zeros(2 ** layer,).astype(np.int32)) + layers_scratch_base.insert(0, np.zeros(2 ** layer,).astype(np.int32)) + + for candidate in layers: + print(candidate.offsets) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations8.py b/mutations8.py new file mode 100644 index 0000000..253ce4d --- /dev/null +++ b/mutations8.py @@ -0,0 +1,451 @@ +from enum import unique +import hashlib +import math +import numpy as np +import random +import time + +N = 8 +M = 2 + +def vec_to_int(x): + global N + z = 0 + for i in range(0, N + 1): + z <<= 1 + z |= x[i] + return z + +def timeit(f): + def timed(*args, **kw): + ts = time.time() + result = f(*args, **kw) + te = time.time() + + print('func:%r took: %2.4f sec' % (f.__name__, te-ts)) + return result + return timed + +class Candidate: + def __init__(self, layer): + global N + self.layer = layer + self.node_count = 2 ** layer + self.offsets = np.zeros((self.node_count, N + 1)).astype(np.int32) + +class Probabilities: + def __init__(self, layer): + global N + self.layer = layer + self.node_count = 2 ** layer + self.p_offsets = np.zeros((self.node_count, N + 1)) + self.p_offsets.fill(0.5) + self.offset_coherences = np.zeros((2, self.node_count, N + 1, 2, self.node_count, N + 1)) + self.offset_coherences.fill(-1) + self.deltas = np.zeros((self.node_count, N + 1, 2, self.node_count, N + 1)) + + def inertia(self): + global N + total = 0 + for i in range(0, self.node_count): + for j in range(0, N + 1): + if self.p_offsets[i][j] > 1e-2 and self.p_offsets[i][j] < (1 - 1e-2): + total += abs(self.offset_coherences[1][i][j][1][i][j] - self.offset_coherences[0][i][j][0][i][j]) + return total + + def flatten(self): + candidate = Candidate(self.layer) + for i in range(0, self.node_count): + for j in range(0, N + 1): + candidate.offsets[i][j] = 1 if self.p_offsets[i][j] >= 0.5 else 0 + if self.node_count > 1: + for i in range(0, self.node_count): + if not candidate.offsets[i].any(): + q = i ^ 0b1 + candidate.offsets[q].fill(0) + return candidate + +def clamp(x, min_value = 0.01, max_value = 1): + return min(max(x, min_value), max_value) + +def encode(v): + global N + byte_values = [] + for i in range(0, math.ceil(N / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +# 00100111 x4 +# 00000110 x1 +def sha(v): + global M + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] % M + +def xor(x): + num_one_bits = 0 + for i in range(0, len(x)): + if i == 0: + continue + num_one_bits += x[i] + return num_one_bits % 2 + +def test_fn(x): + # 0 1 + # 2 | 3 + # 4 | 5 | 6 | 7 + # | | 0 | 7 | | | | + return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7]))) + +def candidate_fn(x): + return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2]) + +def true_fn(x): + return x[0] ^ x[1] ^ (x[3] * x[2]) + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def coherence(outputs, distances): + coherences = [] + for i in range(0, len(outputs)): + y_a = outputs[i] + numerator = 0 + denominator = 0 + for j in range(0, len(outputs)): + if i == j: + continue + y_b = outputs[j] + weight = distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def random_sample(m, n): + inputs = np.zeros((m, n + 1)).astype(np.int32) + for i in range(0, m): + for j in range(0, n): + inputs[i][j] = random.randint(0, 1) + inputs[i][n] = 1 + return inputs + +def populate_distances(inputs, distances, scratch): + for i in range(0, len(inputs)): + x_a = inputs[i] + for j in range(0, len(inputs)): + if i == j: + continue + x_b = inputs[j] + distance = hamming_distance(x_a, x_b, scratch) + distances[i][j] = 1.0 / (2 ** distance) + +def populate_layers_scratch(layers, x, layers_scratch, compute_scratch): + layers_scratch[0].fill(0) + for i in range(1, len(layers_scratch)): + scratch = layers_scratch[i] + layer = layers[i - 1] + for j in range(0, layer.node_count): + value = 0 + np.multiply(layer.offsets[j], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + left = layers_scratch[i - 1][j * 2] + right = layers_scratch[i - 1][j * 2 + 1] + value ^= left * right + scratch[j] = value + return layers_scratch[-1][0] + +def evaluate_cached(candidate, x, layers_scratch, layers_scratch_base, compute_scratch): + global N + maybe_evaluate = set() + for j in range(0, candidate.node_count, 2): + value = 0 + np.multiply(candidate.offsets[j], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + layers_scratch[0][j] = value + if candidate.node_count > 1: + value = 0 + np.multiply(candidate.offsets[j + 1], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + layers_scratch[0][j + 1] = value + if layers_scratch[0][j] == 1 and layers_scratch[0][j + 1] == 1: + maybe_evaluate.add(int(j / 2)) + + for i in range(1, len(layers_scratch)): + np.copyto(layers_scratch[i], layers_scratch_base[i]) + maybe_evaluate_next = set() + for j in maybe_evaluate: + left = layers_scratch[i - 1][j * 2] + right = layers_scratch[i - 1][j * 2 + 1] + child_value = left * right + left_base = layers_scratch_base[i - 1][j * 2] + right_base = layers_scratch_base[i - 1][j * 2 + 1] + child_base_value = left_base * right_base + if child_value != child_base_value: + layers_scratch[i][j] ^= 1 + maybe_evaluate_next.add(int(j / 2)) + maybe_evaluate = maybe_evaluate_next + return layers_scratch[-1][0] + +def evaluate(layers, candidate, x, layers_scratch, compute_scratch): + global N + for i in range(0, len(layers_scratch)): + scratch = layers_scratch[i] + if i == 0: + for j in range(0, candidate.node_count): + value = 0 + np.multiply(candidate.offsets[j], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + scratch[j] = value + else: + layer = layers[i - 1] + for j in range(0, layer.node_count): + value = 0 + np.multiply(layer.offsets[j], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + left = layers_scratch[i - 1][j * 2] + right = layers_scratch[i - 1][j * 2 + 1] + value ^= left * right + scratch[j] = value + return layers_scratch[-1][0] + +@timeit +def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch): + global M, N + scores.fill(0) + unique_candidates = {} + for j in range(0, num_candidates): + create_candidate(probabilities, candidates[j]) + unique_candidates[candidate_str(candidates[j])] = j + + for i in range(0, sample_size): + populate_layers_scratch(layers, inputs[i], layers_scratch_base, int_scratch) + for _, j in unique_candidates.items(): + candidate = candidates[j] + outputs[j][i] = evaluate_cached(candidate, inputs[i], layers_scratch, layers_scratch_base, int_scratch) + # if outputs[j][i] != evaluate(layers, candidate, inputs[i], layers_scratch, int_scratch): + # print('Uh-oh') + for _, j in unique_candidates.items(): + candidate = candidates[j] + np.subtract(outputs[j], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + scores[j] = coherence(output_xor, distances) + +@timeit +def update_probabilities(probabilities, candidates, inputs, scores, scale): + global N + num_candidates = len(candidates) + + probabilities.offset_coherences.fill(-1) + for p in range(0, num_candidates): + candidate = candidates[p] + score = scores[p] + if score == 0: + continue + for j in range(0, probabilities.node_count): + for k in range(0, N + 1): + i = candidate.offsets[j][k] + for m in range(0, probabilities.node_count): + for n in range(0, N + 1): + l = candidate.offsets[m][n] + probabilities.offset_coherences[i][j][k][l][m][n] = max(score, probabilities.offset_coherences[i][j][k][l][m][n]) + + # for i in range(0, 2): + # for j in range(0, probabilities.node_count): + # for k in range(0, N + 1): + # for l in range(0, 2): + # for m in range(0, probabilities.node_count): + # for n in range(0, N + 1): + # offset_max = 0 + # offset_sum = 0 + # offset_count = 0 + # for p in range(0, num_candidates): + # candidate = candidates[p] + # if candidate.offsets[j][k] != i: + # continue + # if candidate.offsets[m][n] != l: + # continue + # if scores[p] == 0: + # continue + # offset_max = max(offset_max, scores[p]) + # offset_sum += scores[p] + # offset_count += 1 + # if offset_max == 0: + # continue + # probabilities.offset_coherences[i][j][k][l][m][n] = offset_max + + for j in range(0, probabilities.node_count): + for k in range(0, N + 1): + for m in range(0, probabilities.node_count): + for n in range(0, N + 1): + # if j == m and k == n: + # continue + p_j1_if_m0 = probabilities.offset_coherences[1][j][k][0][m][n] + p_j0_if_m0 = probabilities.offset_coherences[0][j][k][0][m][n] + p_j1_if_m1 = probabilities.offset_coherences[1][j][k][1][m][n] + p_j0_if_m1 = probabilities.offset_coherences[0][j][k][1][m][n] + if p_j1_if_m0 >= 0 and p_j0_if_m0 >= 0: + delta_if_m0 = p_j1_if_m0 - p_j0_if_m0 + probabilities.deltas[j][k][0][m][n] = delta_if_m0 + if p_j1_if_m1 >= 0 and p_j0_if_m1 >= 0: + delta_if_m1 = p_j1_if_m1 - p_j0_if_m1 + probabilities.deltas[j][k][1][m][n] = delta_if_m1 + + p_offsets_next = np.zeros((probabilities.node_count, N + 1)) + p_offsets_next.fill(0.5) + for j in range(0, probabilities.node_count): + for k in range(0, N + 1): + for m in range(0, probabilities.node_count): + for n in range(0, N + 1): + # if j == m and k == n: + # continue + delta = probabilities.deltas[j][k][1][m][n] * probabilities.p_offsets[m][n] + probabilities.deltas[j][k][0][m][n] * (1 - probabilities.p_offsets[m][n]) + p_offsets_next[j][k] += delta * scale + # if delta > 0 and probabilities.node_count > 1: + # q = j ^ 0b1 + # p_offsets_next[q][k] -= delta * scale + + inertia = 0 + for j in range(0, probabilities.node_count): + for k in range(0, N + 1): + value = clamp(p_offsets_next[j][k], 0, 1) + inertia += abs(probabilities.p_offsets[j][k] - value) + probabilities.p_offsets[j][k] = value + + return inertia + +def create_candidate(probabilities, candidate): + global N + for i in range(0, probabilities.node_count): + for j in range(0, N + 1): + candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0 + +def copy_candidate(src, dest): + global N + for i in range(0, src.node_count): + for j in range(0, N + 1): + dest.offsets[i][j] = src.offsets[i][j] + +def p(x): + return math.ceil(x * 100) / 100 + +def p_a(x): + return [p(z) for z in x] + +def print_probabilities(probabilities): + print('=====================') + for i in range(0, probabilities.node_count): + print(i, p_a(probabilities.p_offsets[i])) + print('=====================') + +def candidate_str(candidate): + global N + build_str = '' + for i in range(0, candidate.node_count): + for j in range(0, N + 1): + build_str += str(candidate.offsets[i][j]) + return build_str + +def main(): + global N, M + sample_size = 64 + num_candidates = 100 + num_survivors = 8 + output_xor = np.zeros(sample_size,) + scratch = np.zeros((N + 1,)) + int_scratch = np.zeros((N + 1,)).astype(np.int32) + g = test_fn + expected_outputs = np.zeros((sample_size,)) + inputs = random_sample(sample_size, N) + distances = np.zeros((sample_size, sample_size)) + populate_distances(inputs, distances, scratch) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + outputs = np.zeros((num_candidates + num_survivors, sample_size,)) + scores = np.zeros((num_candidates + num_survivors,)) + + layers = [] + layers_scratch = [np.zeros(1, ).astype(np.int32)] + layers_scratch_base = [np.zeros(1, ).astype(np.int32)] + layer = 0 + + # for i in range(0, sample_size): + # outputs[0][i] = candidate_fn(inputs[i]) + + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + # print(score) + + # for i in range(0, sample_size): + # outputs[0][i] = true_fn(inputs[i]) + + # np.subtract(outputs[0], expected_outputs, output_xor) + # np.mod(output_xor, M, output_xor) + # score = coherence(output_xor, distances) + # print(score) + # return + + while score < 1: + probabilities = Probabilities(layer) + candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)] + inertia = 1 + epoch = 1 + while inertia > 0.001: + compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch) + round_inertia = update_probabilities(probabilities, candidates, inputs, scores, epoch / 1000.0) + inertia = 0.9 * inertia + 0.1 * round_inertia + + print_probabilities(probabilities) + for candidate in layers: + print(candidate.offsets) + print(np.max(scores), round_inertia, inertia) + + top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:] + + for i in range(0, num_survivors): + src_index = top_n[i] + dest_index = num_candidates + i + if src_index == dest_index: + continue + src = candidates[src_index] + dest = candidates[dest_index] + candidates[dest_index] = src + candidates[src_index] = dest + + inputs = random_sample(sample_size, N) + populate_distances(inputs, distances, scratch) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + epoch += 1 + + candidate = probabilities.flatten() + for j in range(0, sample_size): + outputs[0][j] = evaluate(layers, candidate, inputs[j], layers_scratch, int_scratch) + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + + layers.insert(0, candidate) + layer += 1 + layers_scratch.insert(0, np.zeros(2 ** layer,).astype(np.int32)) + layers_scratch_base.insert(0, np.zeros(2 ** layer,).astype(np.int32)) + + for candidate in layers: + print(candidate.offsets) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations9.py b/mutations9.py new file mode 100644 index 0000000..1427cc0 --- /dev/null +++ b/mutations9.py @@ -0,0 +1,414 @@ +from enum import unique +import hashlib +import math +import numpy as np +import random +import time + +N = 8 +M = 2 + +def vec_to_int(x): + global N + z = 0 + for i in range(0, N + 1): + z <<= 1 + z |= x[i] + return z + +def timeit(f): + def timed(*args, **kw): + ts = time.time() + result = f(*args, **kw) + te = time.time() + + print('func:%r took: %2.4f sec' % (f.__name__, te-ts)) + return result + return timed + +class Candidate: + def __init__(self, layer): + global N + self.layer = layer + self.node_count = layer + self.offsets = np.zeros((self.node_count, N + 1)).astype(np.int32) + +class Probabilities: + def __init__(self, layer): + global N + self.layer = layer + self.node_count = layer + self.p_offsets = np.zeros((self.node_count, N + 1)) + self.p_offsets.fill(0.5) + self.offset_coherences = np.zeros((2, self.node_count, N + 1, 2, self.node_count, N + 1)) + self.offset_coherences.fill(-1) + self.deltas = np.zeros((self.node_count, N + 1, 2, self.node_count, N + 1)) + + def has_converged(self): + global N + for i in range(0,self.node_count): + for j in range(0, N + 1): + if self.p_offsets[i][j] > 0.05 and self.p_offsets[i][j] < 0.95: + return False + return True + + def flatten(self): + global N + candidate = Candidate(self.layer) + for i in range(0, self.node_count): + for j in range(0, N + 1): + candidate.offsets[i][j] = 1 if self.p_offsets[i][j] >= 0.95 else 0 + return candidate + +def clamp(x, min_value = 0.01, max_value = 1): + return min(max(x, min_value), max_value) + +def encode(v): + global N + byte_values = [] + for i in range(0, math.ceil(N / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if index >= len(v): + continue + x <<= 1 + x |= int(v[index]) + byte_values.append(x) + return bytearray(byte_values) + +# 00100111 x4 +# 00000110 x1 +def sha(v): + global M + x = encode(v) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] % M + +def xor(x): + num_one_bits = 0 + for i in range(0, len(x)): + if i == 0: + continue + num_one_bits += x[i] + return num_one_bits % 2 + + +# 0 ^ 1 ^ (2 ^ (4 * (5 ^ 0 * 7))) * (3 ^ 6 * 7) +# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * (5 ^ 0 * 7)) ^ 4 * 6 * 7 * (5 ^ 0 * 7) +# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * 5 ^ 0 * 3 * 4 * 7 ^ 4 * 5 * 6 * 7 ^ 0 * 4 * 6 * 7 + +# 0 ^ 1 ^ 2*3 ^ 2*6*7 ^ 3*4*5 ^ 0*3*4*7 ^ 4*5*6*7 ^ 0*4*6*7 +def test_fn(x): + # 0 1 + # 2 | 3 + # 4 | 5 | 6 | 7 + # | | 0 | 7 | | | | + return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7]))) + +def candidate_fn(x): + return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2]) + +def true_fn(x): + return x[0] ^ x[1] ^ (x[3] * x[2]) + +def hamming_distance(a, b, scratch): + np.logical_xor(a, b, scratch) + return sum(scratch) + +def coherence(outputs, distances): + coherences = [] + for i in range(0, len(outputs)): + y_a = outputs[i] + numerator = 0 + denominator = 0 + for j in range(0, len(outputs)): + if i == j: + continue + y_b = outputs[j] + weight = distances[i][j] + denominator += weight + if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1: + numerator += weight + coherence = numerator / denominator if denominator > 0 else 0 + coherences.append(coherence) + return sum(coherences) / len(coherences) + +def random_sample(m, n): + inputs = np.zeros((m, n + 1)).astype(np.int32) + for i in range(0, m): + for j in range(0, n): + inputs[i][j] = random.randint(0, 1) + inputs[i][n] = 1 + return inputs + +def populate_distances(inputs, distances, scratch): + for i in range(0, len(inputs)): + x_a = inputs[i] + for j in range(0, len(inputs)): + if i == j: + continue + x_b = inputs[j] + distance = hamming_distance(x_a, x_b, scratch) + distances[i][j] = 1.0 / (2 ** distance) + +def evaluate(layers, candidate, x, compute_scratch): + global N + z = evaluate_layers(layers, x, compute_scratch) + z ^= evaluate_candidate(candidate, x, compute_scratch) + return z + +def evaluate_layers(layers, x, compute_scratch): + global N + z = 0 + for layer in layers: + z ^= evaluate_candidate(layer, x, compute_scratch) + return z + +def evaluate_candidate(candidate, x, compute_scratch): + y = 1 + for j in range(0, candidate.node_count): + value = 0 + np.multiply(candidate.offsets[j], x, compute_scratch) + value ^= np.sum(compute_scratch) % 2 + y &= value + return y + +@timeit +def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch): + global M, N + + for i in range(0, sample_size): + outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch) + for j in range(1, num_candidates): + np.copyto(outputs[j], outputs[0]) + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + base_score = coherence(output_xor, distances) + + scores.fill(0) + unique_candidates = {} + for j in range(0, num_candidates): + create_candidate(probabilities, candidates[j]) + unique_candidates[candidate_str(candidates[j])] = j + + for i in range(0, sample_size): + for _, j in unique_candidates.items(): + candidate = candidates[j] + outputs[j][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch) + for _, j in unique_candidates.items(): + candidate = candidates[j] + np.subtract(outputs[j], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + scores[j] = score + return base_score + + +def compute_uplift(candidate, layers, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch): + global M, N + + for i in range(0, sample_size): + outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch) + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + base_score = coherence(output_xor, distances) + + for i in range(0, sample_size): + outputs[0][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch) + + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + return (base_score, score) + +@timeit +def update_probabilities(probabilities, candidates, inputs, base_score, scores, scale): + global N + num_candidates = len(candidates) + + probabilities.offset_coherences.fill(-1) + for p in range(0, num_candidates): + candidate = candidates[p] + if scores[p] == 0: + continue + # score = max(scores[p], base_score) + score = scores[p] + for j in range(0, probabilities.node_count): + for k in range(0, N + 1): + i = candidate.offsets[j][k] + for m in range(0, probabilities.node_count): + for n in range(0, N + 1): + l = candidate.offsets[m][n] + probabilities.offset_coherences[i][j][k][l][m][n] = max(score, probabilities.offset_coherences[i][j][k][l][m][n]) + + p_offsets_next = np.zeros((probabilities.node_count, N + 1)) + inertia = 0 + for j in range(0, probabilities.node_count): + for k in range(0, N + 1): + delta = 0 + count = 0 + for m in range(0, probabilities.node_count): + for n in range(0, N + 1): + # if j == m and k == n: + # continue + p_j1_if_m0 = probabilities.offset_coherences[1][j][k][0][m][n] + p_j0_if_m0 = probabilities.offset_coherences[0][j][k][0][m][n] + p_j1_if_m1 = probabilities.offset_coherences[1][j][k][1][m][n] + p_j0_if_m1 = probabilities.offset_coherences[0][j][k][1][m][n] + if p_j1_if_m0 >= 0 and p_j0_if_m0 >= 0: + # delta_if_m0 = (p_j1_if_m0 - base_score) - (p_j0_if_m0 - base_score) + delta_if_m0 = p_j1_if_m0 - p_j0_if_m0 + delta += delta_if_m0 * (1.0 - probabilities.p_offsets[m][n]) * scale + count += 1 + if p_j1_if_m1 >= 0 and p_j0_if_m1 >= 0: + # delta_if_m1 = (p_j1_if_m1 - base_score) - (p_j0_if_m1 - base_score) + delta_if_m1 = p_j1_if_m1 - p_j0_if_m1 + delta += delta_if_m1 * probabilities.p_offsets[m][n] * scale + count += 1 + if count > 0: + delta /= count + p_offsets_next[j][k] = clamp(probabilities.p_offsets[j][k] + delta, 0, 1) + inertia += abs(p_offsets_next[j][k] - probabilities.p_offsets[j][k]) + + for j in range(0, probabilities.node_count): + for k in range(0, N + 1): + p_offset_next = 0.9 * probabilities.p_offsets[j][k] + 0.1 * p_offsets_next[j][k] + # if p_offset_next <= 0.05: + # p_offset_next = 0.0 + # elif p_offset_next >= 0.95: + # p_offset_next = 1.0 + probabilities.p_offsets[j][k] = p_offset_next + + return inertia + +def create_candidate(probabilities, candidate): + global N + for i in range(0, probabilities.node_count): + for j in range(0, N + 1): + candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0 + +def copy_candidate(src, dest): + global N + for i in range(0, src.node_count): + for j in range(0, N + 1): + dest.offsets[i][j] = src.offsets[i][j] + +def p(x): + return math.ceil(x * 100) / 100 + +def p_a(x): + return [p(z) for z in x] + +def print_probabilities(probabilities): + print('=====================') + for i in range(0, probabilities.node_count): + print(i, p_a(probabilities.p_offsets[i])) + print('=====================') + +def candidate_str(candidate): + global N + build_str = '' + for i in range(0, candidate.node_count): + for j in range(0, N + 1): + build_str += str(candidate.offsets[i][j]) + return build_str + +def main(): + global N, M + sample_size = 64 + num_candidates = 100 + num_survivors = 1 + uplift_sample_size = 100 + output_xor = np.zeros(sample_size,) + scratch = np.zeros((N + 1,)) + int_scratch = np.zeros((N + 1,)).astype(np.int32) + g = test_fn + expected_outputs = np.zeros((sample_size,)) + inputs = random_sample(sample_size, N) + distances = np.zeros((sample_size, sample_size)) + populate_distances(inputs, distances, scratch) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + outputs = np.zeros((num_candidates + num_survivors, sample_size,)).astype(np.int32) + scores = np.zeros((num_candidates + num_survivors,)) + + layers = [] + layer = 1 + + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + + while score < 1: + probabilities = Probabilities(layer) + candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)] + inertia = 1 + epoch = 1 + while inertia > 0.001 and epoch < 1000 and not probabilities.has_converged(): + base_score = compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch) + round_inertia = update_probabilities(probabilities, candidates, inputs, base_score, scores, 1 + 0.01 * epoch) + inertia = 0.9 * inertia + 0.1 * round_inertia + + print_probabilities(probabilities) + for candidate in layers: + print(candidate.offsets) + max_score = np.max(scores) + print(base_score, max_score,round_inertia, inertia) + + top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:] + + for i in range(0, num_survivors): + src_index = top_n[i] + dest_index = num_candidates + i + if src_index == dest_index: + continue + src = candidates[src_index] + dest = candidates[dest_index] + candidates[dest_index] = src + candidates[src_index] = dest + + inputs = random_sample(sample_size, N) + populate_distances(inputs, distances, scratch) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + epoch += 1 + + candidate = probabilities.flatten() + print(candidate.offsets) + for j in range(0, sample_size): + outputs[0][j] = evaluate(layers, candidate, inputs[j], int_scratch) + np.subtract(outputs[0], expected_outputs, output_xor) + np.mod(output_xor, M, output_xor) + score = coherence(output_xor, distances) + + average_base_score = 0 + average_score = 0 + for i in range(0, uplift_sample_size): + inputs = random_sample(sample_size, N) + populate_distances(inputs, distances, scratch) + for i in range(0, sample_size): + expected_outputs[i] = g(inputs[i]) + (base_score, score) = compute_uplift(candidate, layers, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch) + average_base_score += base_score + average_score += score + average_base_score /= uplift_sample_size + average_score /= uplift_sample_size + uplift = average_score - average_base_score + print(uplift) + + if uplift <= 0: + layer += 1 + continue + + layers.insert(0, candidate) + if layer == 1: + layer += 1 + + for candidate in layers: + print(candidate.offsets) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations_cuda.py b/mutations_cuda.py new file mode 100644 index 0000000..a396d73 --- /dev/null +++ b/mutations_cuda.py @@ -0,0 +1,269 @@ +# Sample source code from the Tutorial Introduction in the documentation. + +import hashlib +import numpy as np +import math +import pycuda.driver as cuda +from pycuda.driver import Stream +import pycuda.autoinit +from pycuda.compiler import SourceModule +import pycuda.gpuarray as gpuarray +import random + +''' +a = numpy.random.randn(4,4) + +a = a.astype(numpy.float32) + +a_gpu = cuda.mem_alloc(a.size * a.dtype.itemsize) + +cuda.memcpy_htod(a_gpu, a) + +mod = SourceModule(""" + __global__ void doublify(float *a) + { + int idx = threadIdx.x + threadIdx.y*4; + a[idx] *= 2; + } + """) + +func = mod.get_function("doublify") +func(a_gpu, block=(4,4,1)) + +a_doubled = numpy.empty_like(a) +cuda.memcpy_dtoh(a_doubled, a_gpu) +print("original array:") +print(a) +print("doubled with kernel:") +print(a_doubled) + +# alternate kernel invocation ------------------------------------------------- + +func(cuda.InOut(a), block=(4, 4, 1)) +print("doubled with InOut:") +print(a) + +# part 2 ---------------------------------------------------------------------- + +a_gpu = gpuarray.to_gpu(numpy.random.randn(4,4).astype(numpy.float32)) +a_doubled = (2*a_gpu).get() + +print("original array:") +print(a_gpu) +print("doubled with gpuarray:") +print(a_doubled) +''' + +N = 8 +M = 2 +sample_size = 64 + +def encode(v, offset): + byte_values = [] + for i in range(0, math.ceil(N / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + if offset + index >= len(v): + break + x <<= 1 + x |= int(v[offset + index]) + byte_values.append(x) + return bytearray(x) + +def sha(v, offset): + global M + x = encode(v, offset) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] % M + +def create_program_r(model, output_var): + global N, M + (constant, scalars, child) = model + program = 'int ' + output_var + ' = ' + str(constant) + ';\n' + scalars_part = ' + '.join([str(scalars[i]) + ' * x[gid * ' + str(N) + ' + ' + str(i) + ']' for i in range(0, len(scalars)) if scalars[i] > 0]) + if len(scalars_part) > 0: + program += output_var + ' += ' + scalars_part + ';\n' + if not child is None: + left_output = output_var + '0' + right_output = output_var + '1' + (left, right) = child + program += create_program_r(left, left_output) + program += create_program_r(right, right_output) + program += output_var + ' += ' + left_output + ' * ' + right_output + ';\n' + program += output_var + ' %= ' + str(M) + ';\n' + return program + +def create_program(model, name, offset): + output_var = 'output' + program = '__global__ void ' + name + '(const int *x, int *out) {\n' + program += 'int gid = threadIdx.x + blockIdx.x * blockDim.x;\n' + program += create_program_r(model, output_var) + program += 'out[' + str(offset) + ' + gid] = ' + output_var + ';\n' + program += '}\n' + return program + +def distances_program(): + global N, sample_size + program = "__global__ void p(const int *x, double *distances) {\n" + program += " int gid = threadIdx.x + blockIdx.x * blockDim.x;\n" + program += " int i = gid / " + str(sample_size) + ";\n" + program += " int j = gid % " + str(sample_size) + ";\n" + program += " if (i == j) {\n" + program += " distances[gid] = 0;\n" + program += " return;\n" + program += " }\n" + program += " int distance = 0;\n" + program += " for (int k = 0; k < " + str(N) + "; k++) {\n" + program += " distance += x[i * " + str(N) + " + k] ^ x[j * " + str(N) + " + k];\n" + program += " }\n" + program += " distances[gid] = pow((double)2.0, (double)-distance);\n" + program += "}\n" + return program + +def coherence_program(): + global sample_size + program = "__global__ void p(const int *y, const int *z, const double *distances, double *coherences) {\n" + program += " int gid = threadIdx.x + blockIdx.x * blockDim.x;\n" + program += " double numerator = 0;\n" + program += " double denominator = 0;\n" + program += " for (int i = 0; i < " + str(sample_size) + "; i++) {\n" + program += " int p = z[i] ^ y[gid * " + str(sample_size) + " + i];\n" + program += " for (int j = 0; j < " + str(sample_size) + "; j++) {\n" + program += " int q = z[j] ^ y[gid * " + str(sample_size) + " + j];\n" + program += " double distance = distances[i * " + str(sample_size) + " + j];\n" + program += " denominator += distance;\n" + program += " if (p == q) {\n" + program += " numerator += distance;\n" + program += " }\n" + program += " }\n" + program += " }\n" + program += " coherences[gid] = numerator / denominator;\n" + program += "}\n" + return program + +def random_sample(): + global N, sample_size + x = np.zeros((N * sample_size,)).astype(np.int32) + for i in range(0, len(x)): + x[i] = random.randint(0, 1) + return x + +def clone_model(model, p_mutation): + global N, M + + p_constant = p_mutation * random.random() + p_flip = p_mutation * random.random() + p_add_child = p_mutation * random.random() + p_drop_child = p_mutation * random.random() + + (constant, xors, child) = model + if random.random() < p_constant: + constant += random.randint(0, M - 1) + constant %= M + clone_xors = np.zeros((N,)) + np.copyto(clone_xors, xors) + for i in range(0, N): + if random.random() < p_flip: + offset = 1 if M == 2 else random.randint(1, M - 1) + clone_xors[i] += offset + clone_xors[i] %= M + if child is None: + if random.random() < p_add_child: + left = random_child(p_mutation) + right = random_child(p_mutation) + return (constant, clone_xors, (left, right)) + return (constant, clone_xors, None) + if random.random() < p_drop_child: + return (constant, clone_xors, None) + (left, right) = child + clone_left = clone_model(left, p_mutation) + clone_right = clone_model(right, p_mutation) + return (constant, clone_xors, (clone_left, clone_right)) + +def random_child(p_mutation): + global N, M + constant = random.randint(0, M - 1) + xors = np.zeros((N,)) + + p_flip = p_mutation * random.random() + p_child = p_mutation * random.random() + + index = random.randint(0, N - 1) + xors[index] = 1 if M == 2 else random.randint(1, M - 1) + for i in range(0, N): + if i != index and random.random() < p_flip: + xors[i] = 1 if M == 2 else random.randint(1, M - 1) + if random.random() < p_child: + left = random_child(p_mutation * random.random()) + right = random_child(p_mutation * random.random()) + return (constant, xors, (left, right)) + return (constant, xors, None) + +def null_candidate(): + global N + return (0, np.zeros((N,)), None) + +def main(): + global N, M, sample_size + epochs = 1000 + num_survivors = 100 + num_offspring = 10 + num_candidates = num_survivors + num_survivors * num_offspring + block_size = 1 + + x = random_sample() + z = np.zeros((sample_size,)).astype(np.int32) + coherences = np.zeros((num_candidates,)).astype(np.float64) + candidates = [null_candidate() for _ in range(0, num_candidates)] + + for i in range(0, sample_size): + z[i] = sha(x, N * i) + # print(z) + + x_gpu = cuda.mem_alloc(4 * N * sample_size) + cuda.memcpy_htod(x_gpu, x) + z_gpu = cuda.mem_alloc(4 * sample_size) + cuda.memcpy_htod(z_gpu, z) + distances_gpu = cuda.mem_alloc(8 * sample_size * sample_size) + coherences_gpu = cuda.mem_alloc(8 * num_candidates) + outputs_gpu = cuda.mem_alloc(4 * sample_size * num_candidates) + + distances_kernel = SourceModule(distances_program()).get_function('p') + coherence_kernel = SourceModule(coherence_program()).get_function('p') + + distances_kernel(x_gpu, distances_gpu, block=(block_size, 1, 1), grid=(int(sample_size * sample_size / block_size), 1, 1)) + # distances = np.zeros((sample_size,sample_size)).astype(np.double) + # cuda.memcpy_dtoh(distances, distances_gpu) + # print(distances) + + for epoch in range(0, epochs): + mod = SourceModule('\n'.join([create_program(candidates[i], 'k' + str(i), i * sample_size) for i in range(0, num_candidates)])) + stream = Stream() + for i in range(0, num_candidates): + f = mod.get_function('k' + str(i)) + f(x_gpu, outputs_gpu, stream=stream, block=(block_size, 1, 1), grid=(int(sample_size / block_size), 1, 1)) + stream.synchronize() + + # outputs = np.zeros((sample_size * num_candidates,)).astype(np.int32) + # cuda.memcpy_dtoh(outputs, outputs_gpu) + # print(outputs) + + coherence_kernel(outputs_gpu, z_gpu, distances_gpu, coherences_gpu, block=(block_size, 1, 1), grid=(int(num_candidates / block_size), 1, 1)) + cuda.memcpy_dtoh(coherences, coherences_gpu) + + top_n = sorted(range(len(coherences)), key=lambda i: coherences[i])[-num_survivors:] + survivors = [candidates[index] for index in top_n] + print(epoch, coherences[top_n[-1]]) + + for i in range(0, num_survivors): + candidate = survivors[i] + candidates[i] = candidate + for j in range(0, num_offspring): + index = num_survivors + j * num_survivors + i + candidates[index] = clone_model(candidate, random.random()) + +if __name__ == "__main__": + main() diff --git a/mutations_gpu.py b/mutations_gpu.py new file mode 100644 index 0000000..7b586df --- /dev/null +++ b/mutations_gpu.py @@ -0,0 +1,207 @@ +import hashlib +import numpy as np +import math +import pyopencl as cl +import random + +N = 8 +M = 2 +sample_size = 64 + +def encode(v, offset): + byte_values = [] + for i in range(0, math.ceil(N / 8)): + x = 0 + for j in range(0, 8): + index = i * 8 + j + x <<= 1 + x |= int(v[offset + index]) + byte_values.append(x) + return bytearray(x) + +def sha(v, offset): + global M + x = encode(v, offset) + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] % M + +def create_program_r(model, output_var): + global N, M + (constant, scalars, child) = model + program = 'int ' + output_var + ' = ' + str(constant) + ';\n' + scalars_part = ' + '.join([str(scalars[i]) + ' * x[gid * ' + str(N) + ' + ' + str(i) + ']' for i in range(0, len(scalars)) if scalars[i] > 0]) + if len(scalars_part) > 0: + program += output_var + ' += ' + scalars_part + ';\n' + if not child is None: + left_output = output_var + '0' + right_output = output_var + '1' + (left, right) = child + program += create_program_r(left, left_output) + program += create_program_r(right, right_output) + program += output_var + ' += ' + left_output + ' * ' + right_output + ';\n' + program += output_var + ' %= ' + str(M) + ';\n' + return program + +def create_program(model, name, offset): + output_var = 'output' + program = '__kernel void ' + name + '(__global const int *x, __global int *out) {\n' + program += 'int gid = get_global_id(0);\n' + program += create_program_r(model, output_var) + program += 'out[' + str(offset) + ' + gid] = ' + output_var + ';\n' + program += '}\n' + return program + +def distances_program(): + global N, sample_size + program = "__kernel void p(__global const int *x, __global float *distances) {\n" + program += " int gid = get_global_id(0);\n" + program += " int i = gid / " + str(sample_size) + ";\n" + program += " int j = gid % " + str(sample_size) + ";\n" + program += " float distance = 0;\n" + program += " if (i == j) {\n" + program += " distances[gid] = distance;\n" + program += " return;\n" + program += " }\n" + program += " for (int k = 0; k < " + str(N) + "; k++) {\n" + program += " distance += x[i * " + str(N) + " + k] ^ x[j * " + str(N) + " + k];\n" + program += " }\n" + program += " distances[gid] = pow(2, -distance);\n" + program += "}\n" + return program + +def coherence_program(): + global sample_size + program = "__kernel void p(__global const int *y, __global const int *z, __global const float *distances, __global float *coherences) {\n" + program += " int gid = get_global_id(0);\n" + program += " float numerator = 0;\n" + program += " float denominator = 0;\n" + program += " for (int i = 0; i < " + str(sample_size) + "; i++) {\n" + program += " int p = z[i] ^ y[gid * " + str(sample_size) + " + i];\n" + program += " for (int j = 0; j < " + str(sample_size) + "; j++) {\n" + program += " int q = z[j] ^ y[gid * " + str(sample_size) + " + j];\n" + program += " float distance = distances[i * " + str(sample_size) + " + j];\n" + program += " denominator += distance;\n" + program += " if (p == q) {\n" + program += " numerator += distance;\n" + program += " }\n" + program += " }\n" + program += " }\n" + program += " coherences[gid] = numerator / denominator;\n" + program += "}\n" + return program + +def random_sample(): + global N, sample_size + x = np.zeros((N * sample_size,)).astype(np.int32) + for i in range(0, len(x)): + x[i] = random.randint(0, 1) + return x + +def clone_model(model, p_mutation): + global N, M + + p_constant = p_mutation * random.random() + p_flip = p_mutation * random.random() + p_add_child = p_mutation * random.random() + p_drop_child = p_mutation * random.random() + + (constant, xors, child) = model + if random.random() < p_constant: + constant += random.randint(0, M - 1) + constant %= M + clone_xors = np.zeros((N,)) + np.copyto(clone_xors, xors) + for i in range(0, N): + if random.random() < p_flip: + offset = 1 if M == 2 else random.randint(1, M - 1) + clone_xors[i] += offset + clone_xors[i] %= M + if child is None: + if random.random() < p_add_child: + left = random_child(p_mutation) + right = random_child(p_mutation) + return (constant, clone_xors, (left, right)) + return (constant, clone_xors, None) + if random.random() < p_drop_child: + return (constant, clone_xors, None) + (left, right) = child + clone_left = clone_model(left, p_mutation) + clone_right = clone_model(right, p_mutation) + return (constant, clone_xors, (clone_left, clone_right)) + +def random_child(p_mutation): + global N, M + constant = random.randint(0, M - 1) + xors = np.zeros((N,)) + + p_flip = p_mutation * random.random() + p_child = p_mutation * random.random() + + index = random.randint(0, N - 1) + xors[index] = 1 if M == 2 else random.randint(1, M - 1) + for i in range(0, N): + if i != index and random.random() < p_flip: + xors[i] = 1 if M == 2 else random.randint(1, M - 1) + if random.random() < p_child: + left = random_child(p_mutation * random.random()) + right = random_child(p_mutation * random.random()) + return (constant, xors, (left, right)) + return (constant, xors, None) + +def null_candidate(): + global N + return (0, np.zeros((N,)), None) + +def main(): + global N, M, sample_size + epochs = 1000 + num_survivors = 100 + num_offspring = 10 + num_candidates = num_survivors + num_survivors * num_offspring + local_work_size = (512,) + + x = random_sample() + z = np.zeros((sample_size,)).astype(np.int32) + coherences = np.zeros((num_candidates,)).astype(np.float32) + ctx = cl.create_some_context() + queue = cl.CommandQueue(ctx) + mf = cl.mem_flags + candidates = [null_candidate() for _ in range(0, num_candidates)] + + for i in range(0, sample_size): + z[i] = sha(x, N * i) + + x_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=x) + z_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=z) + distances_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, 4 * sample_size * sample_size) + coherences_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, 4 * num_candidates) + outputs_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, 4 * sample_size * num_candidates) + + distances_kernel = cl.Program(ctx, distances_program()).build().p + coherence_kernel = cl.Program(ctx, coherence_program()).build().p + + distances_kernel(queue, (sample_size * sample_size,), local_work_size, x_gpu, distances_gpu) + + for epoch in range(0, epochs): + program = cl.Program(ctx, '\n'.join([create_program(candidates[i], 'k' + '{:0>9}'.format(i), i * sample_size) for i in range(0, num_candidates)])).build() + for knl in program.all_kernels(): + knl(queue, (sample_size,), local_work_size, x_gpu, outputs_gpu) + + coherence_kernel(queue, (num_candidates,), local_work_size, outputs_gpu, z_gpu, distances_gpu, coherences_gpu) + cl.enqueue_copy(queue, coherences, coherences_gpu) + + top_n = sorted(range(len(coherences)), key=lambda i: coherences[i])[-num_survivors:] + survivors = [candidates[index] for index in top_n] + print(epoch, coherences[top_n[-1]]) + + for i in range(0, num_survivors): + candidate = survivors[i] + candidates[i] = candidate + for j in range(0, num_offspring): + index = num_survivors + j * num_survivors + i + candidates[index] = clone_model(candidate, random.random()) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mutations_opencl.py b/mutations_opencl.py new file mode 100644 index 0000000..68eeedd --- /dev/null +++ b/mutations_opencl.py @@ -0,0 +1,5 @@ +def main(): + print('test') + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/shifts.py b/shifts.py new file mode 100644 index 0000000..b68d9df --- /dev/null +++ b/shifts.py @@ -0,0 +1,29 @@ +def remove_bit(i, n): + return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1) + +def main(): + N = 65 + mappings = {} + for i in range(0, N): + n = 0 + g = remove_bit(i, n) + paths_set = set() + while g < i: + paths_set.add(g) + n += 1 + g = remove_bit(i, n) + paths = sorted(list(paths_set)) + mappings[i] = paths + + visited_set = set() + stack = [paths[:]] + while len(stack) > 0: + for h in stack.pop(): + if not h in visited_set: + visited_set.add(h) + stack.append(mappings[h]) + visited = sorted(list(visited_set)) + print(i, len(visited)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/space_analysis.py b/space_analysis.py new file mode 100644 index 0000000..384e5e3 --- /dev/null +++ b/space_analysis.py @@ -0,0 +1,142 @@ +import numpy as np + +def decode(x, N): + index = 0 + output = np.zeros((N)) + while x > 0 and index < N: + output[index] = x & 0b1 + x >>= 1 + index += 1 + return output + +def hamming_distance(a, b): + return np.sum(np.logical_xor(a, b)) + +def xor(x, bits): + return np.sum(x[:bits]) % 2 + +# 2 +# 4, 4, +# 6, 8, 6 +# 8, 12, 12, 8 +# 10, 16, 18, 16, 10 +# 12, 20, 24, 24, 20, 12 +# 14, 24, 30, 32, 30, 24, 14 +# 16, 28, 36, 40, 40, 36, 28, 16 + +# 1 +# 2, 2 +# 3, 4, 3 +# 4, 6, 6, 4 +# 5, 8, 9, 8, 5 +# 6, 10, 12, 12, 10, 6 +# 7, 12, 15, 16, 15, 12, 7 + +# 6, 0, 6 +# 24, 12, 12, 24 +# 60, 48, 36, 48, 60 +# 120, 120, 96, 96, 120, 120 +# 210, 240, 210, 192, 210, 240, 210 +# 336, 420, 396, 360, 360, 396, 420, 336 +# 504, 672, 672, 624, 600, 624, 672, 672, 504 + + +# 1, 0, 1 +# 4, 2, 2, 4 +# 10, 8, 6, 8, 10 +# 20, 20, 16, 16, 20, 20 +# 35, 40, 35, 32, 35, 40, 35 +# 56, 70, 66, 60, 60, 66, 70, 56 +# 84, 112, 112, 104, 100, 104, 112, 112, 84 + +# +# 20, 0, 20, 0, 20, +# 120, 40, 80, 80, 40, 120 +# 420, 240, 260, 320, 260, 240, 420 +# 1120, 840, 760, 880, 880, 760, 840, 1120 + +# 1, 0, 1, 0, 1 +# 6, 2, 4, 4, 2, 6 +# 21, 12, 13, 16, 13, 12, 21 +# 56, 42, 38, 44, 44, 38, 42, 56 + +# 70, 0, 70, 0, 70, 0, 70 +# 560, 140, 420, 280, 280, 420, 140, 560 + +# 252, 0, 252, 0, 252, 0, 252, 0, 252 +# 2520, 504, 2016, 1008, 1512, 1512, 1008, 2016, 504, 2520 + +# 1, 2, 3, 4, +# 1, 3, 6, 10 +# 1, 4, 10, 20 +# 1, 5, 15, 35 +# 1, 6, + +# 1, 2, 1 +# 1, 3, 3, 1 +# 1, 4, 6, 4, 1 +# 1, 5, 10, 10, 5, 1 +# 1, 6, 15, 20, 15, 6, 1 + +# 2, 6, 12, 20, 30, 42, 56 +# 6, 30, 90, 210, 420 +# 20, 140, 560, +# 70 + +# 1, 3, 6, 10, 15, 21, 28 +# 1, 5, 15, 35 + +def main(): + N = 8 + points = [] + for i in range(0, 2 ** N): + points.append(decode(i, N)) + + bands = [[[] for _ in range(0, N + 1)] for _ in range(0, len(points))] + for i in range(0, len(points)): + a = points[i] + for j in range(0, len(points)): + if i == j: + continue + b = points[j] + distance = hamming_distance(a, b) + bands[i][distance].append(b) + + incoherent_distances = np.zeros((N + 1, N + 1)) + for k in range(0, N + 1): + print(k, '================================') + for t in range(0, 1): + x_a = points[t] + y_a = xor(x_a, k) + incoherent_bands = np.zeros((N + 1, N + 1)).astype(np.int32) + total_bands = np.zeros((N + 1, N + 1)).astype(np.int32) + for distance in range(0, N + 1): + band = bands[0][distance] + for x_b in band: + y_b = xor(x_b, k) + if y_a != y_b: + incoherent_distances[k][distance] += 1 + + if len(band) < 2: + continue + for band_origin in range(0, len(band)): + x_p = band[band_origin] + y_p = xor(x_p, k) + for i in range(0, len(band)): + if i == band_origin: + continue + x_q = band[i] + y_q = xor(x_q, k) + band_distance = hamming_distance(x_p, x_q) + total_bands[distance][band_distance] += 1 + if y_p != y_q: + incoherent_bands[distance][band_distance] += 1 + print(incoherent_bands) + print(total_bands) + # print(distance, hamming_distance(x_p, x_q), y_p, y_q) + + print(incoherent_distances) + # print(bands) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/space_analysis2.py b/space_analysis2.py new file mode 100644 index 0000000..7b88e14 --- /dev/null +++ b/space_analysis2.py @@ -0,0 +1,255 @@ +import math +import numpy as np + +def decode(x, N): + index = 0 + output = np.zeros((N)) + while x > 0 and index < N: + output[index] = x & 0b1 + x >>= 1 + index += 1 + return output + +def hamming_distance(a, b): + return np.sum(np.logical_xor(a, b)) + +def xor(x, bits): + return np.sum(x[:bits]) % 2 + +def compute_pyramids(N): + num_orders = max(int(N / 2), 1) + pyramids = np.zeros((num_orders, N, N)).astype(np.int32) + for i in range(2, N): + for j in range(1, i): + pyramids[0][i][j] = j + for order in range(1, num_orders): + # build out the first column + acc = 0 + for i in range(order * 2 + 2, N): + acc += pyramids[order - 1][i - 2][1] + pyramids[order][i][1] = acc + # accumulate the first column and place it on the diagonal(s) + for k in range(0, int(order / 2) + 1): + acc = 0 + for i in range(order * 2 + 2, N): + acc += pyramids[order][i][1] + pyramids[order][i][i - 1 - 2 * k] = acc + # for odd, copy the first column to the first diagonal + if order % 2 == 1: + k += 1 + for i in range(order * 2 + 2, N): + pyramids[order][i][i - 1 - 2 * k] = pyramids[order][i][1] + # integrate under the diagonal + inset = 1 + for j in reversed(range(2, N - 2 * k - 2)): + acc = pyramids[order][N - inset - 1][j] + for i in range(N - inset, N): + acc += pyramids[order - 1][i - 2][j] + pyramids[order][i][j] = acc + if order * 2 + 2 < N - inset: + inset += 1 + return pyramids + +def compute_pyramids_full(N): + num_orders = max(int(N / 2), 1) + pyramids = np.zeros((num_orders, N, N)).astype(np.int32) + # 1st order can be filled in as multiplication and forms the base case + for i in range(0, N): + for j in range(0, i + 1): + pyramids[0][i][j] = (i - j + 1) * (j + 1) + for order in range(1, num_orders): + offset = order * 2 + + # fill in the LHS and diagonal + for i in range(0, N - offset): + value = math.comb(2 * (order + 1) + i - 1, i) + pyramids[order][i + offset][0] = value + # mirror + pyramids[order][i + offset][i + offset] = value + + # accumulate along the diagonals + for i in range(1, N): + value = pyramids[order][i][0] + acc = value + for j in range(1, N - i): + value += acc + pyramids[order][i + j][j] = value + acc += pyramids[order - 1][i + j - 1][j - 1] + + return pyramids + +def get_total_band_count_2(distance, band_distance, N): + if band_distance % 2 == 1: + return 0 + order = int(band_distance / 2) - 1 + if order < 0: + return 0 + if distance < order + 1: + return 0 + if distance > N - order - 1: + return 0 + order_root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2 + scale = math.comb(N - (order + 1) * 2, distance - order - 1) + value = math.comb(2 * (order + 1) + N - 2 * (order + 1), N - 2 * (order + 1)) + return order_root * scale * value + +def get_incoherent_band_count_2(pyramids, distance, band_distance, k, N): + if k == 0 or k == N or band_distance % 2 == 1: + return 0 + order = int(band_distance / 2) - 1 + if order < 0: + return 0 + if distance < order + 1: + return 0 + if distance > N - order - 1: + return 0 + order_root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2 + scale = math.comb(N - (order + 1) * 2, distance - order - 1) + value = pyramids[order][N - 2][k - 1] + return order_root * scale * value + + # pyramid = pyramids[order] + # offset = (N - 1 - order) - distance + # multiplier = pyramid[2 * order + offset][2 * order + 1 + offset] + # row = N - offset + # column = k + # value = pyramid[row][column] + # return multiplier * value + +def get_incoherent_band_count(pyramids, distance, band_distance, k, N): + if k == 0 or k == N or band_distance % 2 == 1: + return 0 + order = int(band_distance / 2) - 1 + if order < 0: + return 0 + if distance < order + 1: + return 0 + if distance > N - order - 1: + return 0 + if distance < k: + distance = N - distance + k = N - k + pyramid = pyramids[order] + offset = (N - 1 - order) - distance + multiplier = pyramid[2 * order + 2 + offset][2 * order + 1 + offset] + row = N - offset + column = k + value = pyramid[row][column] + return multiplier * value + +def get_total_band_count(pyramids, distance, band_distance, N): + if band_distance % 2 == 1: + return 0 + order = int(band_distance / 2) - 1 + if order < 0: + return 0 + if distance < order + 1: + return 0 + if distance > N - order - 1: + return 0 + pyramid = pyramids[order] + offset = (N - 1 - order) - distance + length = N + 1 - 2 * (order + 1) + a = pyramid[2 * order + 2 + offset][2 * order + 1 + offset] + b = pyramid[2 * order + 2 + (length - offset - 1)][2 * order + 1 + (length - offset - 1)] + return a * b + +# def compute_band_distances(pyramids, N): +# num_orders = max(int(N / 2), 1) +# incoherent_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32) +# for order in range(0, num_orders): +# band_distance = (order + 1) * 2 +# for k in range(1, N): + + +# for k in range(0, N + 1): +# for distance in range() + +def main(): + # N = 8 + # print(compute_pyramids_full(N)) + # total_distances = np.zeros((N + 1, N + 1)).astype(np.int32) + # for i in range(0, N + 1): + # for j in range(0, N + 1): + # total_distances[i][j] = get_total_band_count_2(i, j, N) + # print(total_distances) + # return + + max_N = 8 + orders = [np.zeros((max_N + 1, max_N + 1)).astype(np.int32) for _ in range(0, max_N)] + + print('Attempting discrete solution...') + pyramids = compute_pyramids_full(max_N + 1) + + for N in range(max_N, max_N + 1): + # for N in range(2, max_N + 1): + print('=============================') + print('N@', N) + print('Generating points...') + points = [] + for i in range(0, 2 ** N): + points.append(decode(i, N)) + + print('Computing bands...') + bands = [[] for _ in range(0, N + 1)] + for i in range(1, len(points)): + distance = hamming_distance(points[0], points[i]) + bands[distance].append(points[i]) + + print('Computing band distances...') + incoherent_distances = np.zeros((N + 1, N + 1)) + for k in range(0, N + 1): + print('k@', k) + # print(k, '================================') + x_a = points[0] + y_a = xor(x_a, k) + incoherent_bands = np.zeros((N + 1, N + 1)).astype(np.int32) + precomputed_incoherent_bands = np.zeros((N + 1, N + 1)).astype(np.int32) + total_bands = np.zeros((N + 1, N + 1)).astype(np.int32) + precomputed_total_bands = np.zeros((N + 1, N + 1)).astype(np.int32) + for distance in range(0, N + 1): + band = bands[distance] + for x_b in band: + y_b = xor(x_b, k) + if y_a != y_b: + incoherent_distances[k][distance] += 1 + + if len(band) < 2: + continue + for band_origin in range(0, len(band)): + x_p = band[band_origin] + # print(x_p) + y_p = xor(x_p, k) + for i in range(0, len(band)): + if i == band_origin: + continue + x_q = band[i] + y_q = xor(x_q, k) + band_distance = hamming_distance(x_p, x_q) + total_bands[distance][band_distance] += 1 + if y_p != y_q: + incoherent_bands[distance][band_distance] += 1 + for band_distance in range(0, N + 1): + precomputed_incoherent_bands[distance][band_distance] = get_incoherent_band_count_2(pyramids, distance, band_distance, k, N) + precomputed_total_bands[distance][band_distance] = get_total_band_count_2(distance, band_distance, N) + # print(incoherent_bands) + for order in range(0, int(N / 2)): + root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2 + index = order * 2 + 2 + orders[order][N][k] = incoherent_bands[-2 - order][index] / root + + print(incoherent_bands) + print(precomputed_incoherent_bands) + print(total_bands) + print(precomputed_total_bands) + # print(total_bands) + # print(distance, hamming_distance(x_p, x_q), y_p, y_q) + # for i in range(0, len(orders)): + # print(orders[i]) + # # print(pyramids[i]) + # print('========================================') + # print(incoherent_distances) + # print(bands) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/space_analysis3.py b/space_analysis3.py new file mode 100644 index 0000000..90f3436 --- /dev/null +++ b/space_analysis3.py @@ -0,0 +1,385 @@ +import math +import numpy as np +import sys + +np.set_printoptions(threshold=sys.maxsize) + +cache = {} +def p_bernoulli(n, k, m, j): + key = (n, k, m, j) + if key in cache: + return cache[key] + probabilities = np.zeros((n + 1, n + 1)) + probabilities.fill(-1) + stack = [(0,0)] + while len(stack) > 0: + (a, b) = stack.pop() + if a + b == n: + probabilities[a][b] = 1 if a == k else 0 + elif a > j: + probabilities[a][b] = 0 + elif b > (m - j): + probabilities[a][b] = 0 + else: + p_left = probabilities[a + 1][b] + p_right = probabilities[a][b + 1] + if p_left >= 0 and p_right >= 0: + p = (j - a) / (m - a - b) + probabilities[a][b] = p_left * p + p_right * (1 - p) + else: + stack.append((a, b)) + if p_left < 0: + stack.append((a + 1, b)) + if p_right < 0: + stack.append((a, b + 1)) + # if len(cache) % 100 == 0: + # print('Cache size: ', len(cache), math.floor(10000 * hits / (hits + misses)) / 100, '%') + cache[key] = probabilities[0][0] + return probabilities[0][0] + +def decode(x, N): + index = 0 + output = np.zeros((N)) + while x > 0 and index < N: + output[index] = x & 0b1 + x >>= 1 + index += 1 + return output + +def hamming_distance(a, b): + return np.sum(np.logical_xor(a, b)) + +def xor(x, bits): + return np.sum(x[:bits]) % 2 + +def compute_pseudopascal(N): + dist = np.zeros((N, N)) + for j in range(0, N): + dist[0][j] = math.comb(N - 1, j) + dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2)) + for i in range(1, N): + for j in range(0, i + 1): + dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2)) + for k in range(i + 1, N): + for j in reversed(range(0, k)): + dist[i][j+1] = dist[i][j] + dist[i][j+1] + return dist + +def compute_pyramids(N): + num_orders = max(int(N / 2), 1) + pyramids = np.zeros((num_orders, N, N)).astype(np.int32) + # 1st order can be filled in as multiplication and forms the base case + for i in range(0, N): + for j in range(0, i + 1): + pyramids[0][i][j] = (i - j + 1) * (j + 1) + for order in range(1, num_orders): + offset = order * 2 + + # fill in the LHS and diagonal + for i in range(0, N - offset): + value = math.comb(2 * (order + 1) + i - 1, i) + pyramids[order][i + offset][0] = value + # mirror + pyramids[order][i + offset][i + offset] = value + + # accumulate along the diagonals + for i in range(1, N): + value = pyramids[order][i][0] + acc = value + for j in range(1, N - i): + value += acc + pyramids[order][i + j][j] = value + acc += pyramids[order - 1][i + j - 1][j - 1] + + return pyramids + +def compute_string_key(key): + return ','.join([str(x) for x in key]) + +def generate_bands(points): + all_bands = [{} for _ in range(0, len(points))] + for origin_index in range(0, len(points)): + bands = all_bands[origin_index] + key = [] + group = [index for index in range(0, len(points)) if index != origin_index] + stack = [(origin_index, key, group)] + while len(stack) > 0: + (origin_index, key, group) = stack.pop() + distance = hamming_distance(points[origin_index], points[group[0]]) + in_band = [] + out_of_band = [] + for index in group: + if distance == hamming_distance(points[origin_index], points[index]): + in_band.append(index) + else: + out_of_band.append(index) + if len(out_of_band) > 0: + stack.append((origin_index, key, out_of_band)) + key = key[:] + key.append(distance) + string_key = compute_string_key(key) + if string_key not in bands: + bands[string_key] = 0 + bands[string_key] += len(in_band) + if len(in_band) < 2: + continue + for origin_index in in_band: + group = [index for index in in_band if index != origin_index] + stack.append((origin_index, key, group)) + return all_bands + +def test(): + N = 8 + points = [decode(x, N) for x in range(0, 2 ** N)] + print(generate_bands(points)[0]) + + +# 2 +# 4, 4, +# 6, 8, 6 +# 8, 12, 12, 8 +# 10, 16, 18, 16, 10 +# 12, 20, 24, 24, 20, 12 +# 14, 24, 30, 32, 30, 24, 14 +# 16, 28, 36, 40, 40, 36, 28, 16 + +# 1 +# 2, 2 +# 3, 4, 3 +# 4, 6, 6, 4 +# 5, 8, 9, 8, 5 +# 6, 10, 12, 12, 10, 6 +# 7, 12, 15, 16, 15, 12, 7 + +# 6, 0, 6 +# 24, 12, 12, 24 +# 60, 48, 36, 48, 60 +# 120, 120, 96, 96, 120, 120 +# 210, 240, 210, 192, 210, 240, 210 +# 336, 420, 396, 360, 360, 396, 420, 336 +# 504, 672, 672, 624, 600, 624, 672, 672, 504 + + +# 1, 0, 1 +# 4, 2, 2, 4 +# 10, 8, 6, 8, 10 +# 20, 20, 16, 16, 20, 20 +# 35, 40, 35, 32, 35, 40, 35 +# 56, 70, 66, 60, 60, 66, 70, 56 +# 84, 112, 112, 104, 100, 104, 112, 112, 84 + +# +# 20, 0, 20, 0, 20, +# 120, 40, 80, 80, 40, 120 +# 420, 240, 260, 320, 260, 240, 420 +# 1120, 840, 760, 880, 880, 760, 840, 1120 + +# 1, 0, 1, 0, 1 +# 6, 2, 4, 4, 2, 6 +# 21, 12, 13, 16, 13, 12, 21 +# 56, 42, 38, 44, 44, 38, 42, 56 + +# 70, 0, 70, 0, 70, 0, 70 +# 560, 140, 420, 280, 280, 420, 140, 560 + +# 252, 0, 252, 0, 252, 0, 252, 0, 252 +# 2520, 504, 2016, 1008, 1512, 1512, 1008, 2016, 504, 2520 + +# 1, 2, 3, 4, +# 1, 3, 6, 10 +# 1, 4, 10, 20 +# 1, 5, 15, 35 +# 1, 6, + +# 1, 2, 1 +# 1, 3, 3, 1 +# 1, 4, 6, 4, 1 +# 1, 5, 10, 10, 5, 1 +# 1, 6, 15, 20, 15, 6, 1 + +# 2, 6, 12, 20, 30, 42, 56 +# 6, 30, 90, 210, 420 +# 20, 140, 560, +# 70 + +# 1, 3, 6, 10, 15, 21, 28 +# 1, 5, 15, 35 + +def main(): + test() + return + + N = 5 + + # print(compute_pseudopascal(10)) + # print(compute_pyramids(10)) + + points = [] + for i in range(0, 2 ** N): + points.append(decode(i, N)) + + bands = [[[] for _ in range(0, N + 1)] for _ in range(0, len(points))] + for i in range(0, len(points)): + a = points[i] + for j in range(0, len(points)): + if i == j: + continue + b = points[j] + distance = hamming_distance(a, b) + bands[i][distance].append(b) + + golden_incoherent_distances = None + golden_total_distances = None + golden_incoherent_bands = None + golden_total_bands = None + golden_incoherent_sub_bands = None + golden_total_sub_bands = None + # for t in range(0, len(points)): + for t in range(0, 1): + incoherent_distances = np.zeros((N + 1, N + 1)).astype(np.int32) + total_distances = np.zeros((N + 1)).astype(np.int32) + if t == 0: + golden_incoherent_distances = incoherent_distances + golden_total_distances = total_distances + incoherent_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32) + total_bands = np.zeros((N + 1, N + 1)).astype(np.int32) + if t == 0: + golden_incoherent_bands = incoherent_bands + golden_total_bands = total_bands + incoherent_sub_bands = np.zeros((N + 1, N + 1, N + 1, N + 1)).astype(np.int32) + total_sub_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32) + if t == 0: + golden_incoherent_sub_bands = incoherent_sub_bands + golden_total_sub_bands = total_sub_bands + # print(t) + for k in range(1, N + 1): + # print(k, '================================') + x_a = points[t] + y_a = xor(x_a, k) + for distance in range(0, N + 1): + # print('distance', distance) + band = bands[t][distance] + for x_b in band: + y_b = xor(x_b, k) + if k == 1: + total_distances[distance] += 1 + if y_a != y_b: + incoherent_distances[k][distance] += 1 + + if len(band) < 2: + continue + for band_origin in range(0, len(band)): + x_p = band[band_origin] + y_p = xor(x_p, k) + sub_bands = [[] for _ in range(0, N + 1)] + for i in range(0, len(band)): + if i == band_origin: + continue + x_q = band[i] + y_q = xor(x_q, k) + band_distance = hamming_distance(x_p, x_q) + if k == 1: + total_bands[distance][band_distance] += 1 + if y_p != y_q: + incoherent_bands[k][distance][band_distance] += 1 + sub_bands[band_distance].append(x_q) + + # incoherent_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32) + # total_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32) + for band_distance in range(0, N + 1): + sub_band = sub_bands[band_distance] + if len(sub_band) < 2: + continue + for sub_band_origin in range(0, len(sub_band)): + x_u = sub_band[sub_band_origin] + y_u = xor(x_u, k) + for i in range(0, len(sub_band)): + if i == sub_band_origin: + continue + x_v = sub_band[i] + y_v = xor(x_v, k) + sub_band_distance = hamming_distance(x_v, x_u) + if k == 1: + total_sub_bands[band_distance][sub_band_distance] += 1 + if y_u != y_v: + incoherent_sub_bands[k][distance][band_distance][sub_band_distance] += 1 + # print(incoherent_sub_bands) + # print(total_sub_bands) + # print('==========================') + if t != 0: + if not np.array_equal(golden_incoherent_sub_bands, incoherent_sub_bands): + print(golden_incoherent_sub_bands) + print(incoherent_sub_bands) + raise Exception('Not symmetric') + + if not np.array_equal(golden_incoherent_bands, incoherent_bands): + print(golden_incoherent_bands) + print(incoherent_bands) + raise Exception('Not symmetric') + # print(incoherent_bands) + # print(total_bands) + # print(distance, hamming_distance(x_p, x_q), y_p, y_q) + if not np.array_equal(golden_incoherent_distances, incoherent_distances): + print(golden_incoherent_distances) + print(incoherent_distances) + raise Exception('Not symmetric') + + # print(golden_total_distances) + # print(golden_incoherent_distances) + + # print(golden_total_bands) + # print(golden_incoherent_bands) + # print(golden_total_bands) + + p = np.ones((2 ** N, N + 1)) + for sample_size in range(0, 2 ** N): + for k in range(0, N + 1): + for d1 in range(0, N + 1): + if golden_total_distances[d1] == 0: + continue + m = golden_total_distances[d1] + j = golden_incoherent_distances[k][d1] + n = min(sample_size, m) + l = int(n * j / m) + p[sample_size][k] *= p_bernoulli(n, l, m, j) + print(np.around(p, 2)) + + p = np.ones((4 ** N, N + 1)) + for sample_size in range(0, 4 ** N): + for k in range(0, N + 1): + for d1 in range(0, N + 1): + for d2 in range(0, N + 1): + if golden_total_bands[d1][d2] == 0: + continue + m = golden_total_bands[d1][d2] + j = golden_incoherent_bands[k][d1][d2] + n = min(sample_size, m) + l = int(n * j / m) + p[sample_size][k] *= p_bernoulli(n, l, m, j) + print(np.around(p, 3)) + + # p = np.ones((N + 1)) + # for k in range(0, N + 1): + # for d1 in range(0, N + 1): + # for d2 in range(0, N + 1): + # if golden_total_bands[d1][d2] == 0: + # continue + # partial = golden_incoherent_bands[k][d1][d2] / golden_total_bands[d1][d2] + # p[k] *= max(partial, 1 - partial) + # print(p) + + # p = np.ones((N + 1)) + # for k in range(0, N + 1): + # for d1 in range(0, N + 1): + # for d2 in range(0, N + 1): + # for d3 in range(0, N + 1): + # if golden_total_sub_bands[d1][d2][d3] == 0: + # continue + # partial = golden_incoherent_sub_bands[k][d1][d2][d3] / golden_total_sub_bands[d1][d2][d3] + # p[k] *= max(partial, 1 - partial) + # print(p) + + # print(bands) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/space_analysis4.py b/space_analysis4.py new file mode 100644 index 0000000..146244e --- /dev/null +++ b/space_analysis4.py @@ -0,0 +1,229 @@ +import math +import numpy as np +import sys + +np.set_printoptions(threshold=sys.maxsize) + +def decode(x, N): + index = 0 + output = np.zeros((N)) + while x > 0 and index < N: + output[index] = x & 0b1 + x >>= 1 + index += 1 + return output + +def hamming_distance(a, b): + return np.sum(np.logical_xor(a, b)) + +def xor(x, bits): + return np.sum(x[:bits]) % 2 + +def compute_pseudopascal(N): + dist = np.zeros((N, N)) + for j in range(0, N): + dist[0][j] = math.comb(N - 1, j) + dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2)) + for i in range(1, N): + for j in range(0, i + 1): + dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2)) + for k in range(i + 1, N): + for j in reversed(range(0, k)): + dist[i][j+1] = dist[i][j] + dist[i][j+1] + return dist + +def compute_pyramids(N): + num_orders = max(int(N / 2), 1) + pyramids = np.zeros((num_orders, N, N)).astype(np.int32) + # 1st order can be filled in as multiplication and forms the base case + for i in range(0, N): + for j in range(0, i + 1): + pyramids[0][i][j] = (i - j + 1) * (j + 1) + for order in range(1, num_orders): + offset = order * 2 + + # fill in the LHS and diagonal + for i in range(0, N - offset): + value = math.comb(2 * (order + 1) + i - 1, i) + pyramids[order][i + offset][0] = value + # mirror + pyramids[order][i + offset][i + offset] = value + + # accumulate along the diagonals + for i in range(1, N): + value = pyramids[order][i][0] + acc = value + for j in range(1, N - i): + value += acc + pyramids[order][i + j][j] = value + acc += pyramids[order - 1][i + j - 1][j - 1] + + return pyramids + +# 2 +# 4, 4, +# 6, 8, 6 +# 8, 12, 12, 8 +# 10, 16, 18, 16, 10 +# 12, 20, 24, 24, 20, 12 +# 14, 24, 30, 32, 30, 24, 14 +# 16, 28, 36, 40, 40, 36, 28, 16 + +# 1 +# 2, 2 +# 3, 4, 3 +# 4, 6, 6, 4 +# 5, 8, 9, 8, 5 +# 6, 10, 12, 12, 10, 6 +# 7, 12, 15, 16, 15, 12, 7 + +# 6, 0, 6 +# 24, 12, 12, 24 +# 60, 48, 36, 48, 60 +# 120, 120, 96, 96, 120, 120 +# 210, 240, 210, 192, 210, 240, 210 +# 336, 420, 396, 360, 360, 396, 420, 336 +# 504, 672, 672, 624, 600, 624, 672, 672, 504 + + +# 1, 0, 1 +# 4, 2, 2, 4 +# 10, 8, 6, 8, 10 +# 20, 20, 16, 16, 20, 20 +# 35, 40, 35, 32, 35, 40, 35 +# 56, 70, 66, 60, 60, 66, 70, 56 +# 84, 112, 112, 104, 100, 104, 112, 112, 84 + +# +# 20, 0, 20, 0, 20, +# 120, 40, 80, 80, 40, 120 +# 420, 240, 260, 320, 260, 240, 420 +# 1120, 840, 760, 880, 880, 760, 840, 1120 + +# 1, 0, 1, 0, 1 +# 6, 2, 4, 4, 2, 6 +# 21, 12, 13, 16, 13, 12, 21 +# 56, 42, 38, 44, 44, 38, 42, 56 + +# 70, 0, 70, 0, 70, 0, 70 +# 560, 140, 420, 280, 280, 420, 140, 560 + +# 252, 0, 252, 0, 252, 0, 252, 0, 252 +# 2520, 504, 2016, 1008, 1512, 1512, 1008, 2016, 504, 2520 + +# 1, 2, 3, 4, +# 1, 3, 6, 10 +# 1, 4, 10, 20 +# 1, 5, 15, 35 +# 1, 6, + +# 1, 2, 1 +# 1, 3, 3, 1 +# 1, 4, 6, 4, 1 +# 1, 5, 10, 10, 5, 1 +# 1, 6, 15, 20, 15, 6, 1 + +# 2, 6, 12, 20, 30, 42, 56 +# 6, 30, 90, 210, 420 +# 20, 140, 560, +# 70 + +# 1, 3, 6, 10, 15, 21, 28 +# 1, 5, 15, 35 + +def main(): + last_incoherent_distances = None + last_incoherent_bands = None + last_incoherent_sub_bands = None + for N in range(4, 5): + # print(compute_pseudopascal(10)) + # print(compute_pyramids(10)) + + points = [] + for i in range(0, 2 ** N): + points.append(decode(i, N)) + + bands = [[[] for _ in range(0, N + 1)] for _ in range(0, len(points))] + for i in range(0, len(points)): + a = points[i] + for j in range(0, len(points)): + if i == j: + continue + b = points[j] + distance = hamming_distance(a, b) + bands[i][distance].append(b) + + # for t in range(0, len(points)): + for t in range(0, 1): + incoherent_distances = np.zeros((N + 1, N + 1)) + incoherent_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32) + incoherent_sub_bands = np.zeros((N + 1, N + 1, N + 1, N + 1)).astype(np.int32) + for k in range(1, N + 1): + # print(k, '================================') + x_a = points[t] + y_a = xor(x_a, k) + total_bands = np.zeros((N + 1, N + 1)).astype(np.int32) + for distance in range(0, N + 1): + # print('distance', distance) + band = bands[t][distance] + for x_b in band: + y_b = xor(x_b, k) + if y_a != y_b: + incoherent_distances[k][distance] += 1 + + if len(band) < 2: + continue + for band_origin in range(0, len(band)): + x_p = band[band_origin] + y_p = xor(x_p, k) + sub_bands = [[] for _ in range(0, N + 1)] + for i in range(0, len(band)): + if i == band_origin: + continue + x_q = band[i] + y_q = xor(x_q, k) + band_distance = hamming_distance(x_p, x_q) + total_bands[distance][band_distance] += 1 + if y_p != y_q: + incoherent_bands[k][distance][band_distance] += 1 + sub_bands[band_distance].append(x_q) + + # incoherent_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32) + # total_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32) + for band_distance in range(0, N + 1): + sub_band = sub_bands[band_distance] + if len(sub_band) < 2: + continue + for sub_band_origin in range(0, len(sub_band)): + x_u = sub_band[sub_band_origin] + y_u = xor(x_u, k) + for i in range(0, len(sub_band)): + if i == sub_band_origin: + continue + x_v = sub_band[i] + y_v = xor(x_v, k) + sub_band_distance = hamming_distance(x_v, x_u) + # total_sub_bands[band_distance][sub_band_distance] += 1 + if y_u != y_v: + incoherent_sub_bands[k][distance][band_distance][sub_band_distance] += 1 + # print(incoherent_sub_bands) + # print(total_sub_bands) + # print('==========================') + + if last_incoherent_sub_bands is not None: + for distance in range(1, int(N / 2) + 1): + for band_distance in range(0, N + 1): + for sub_band_distance in range (0, N + 1): + if band_distance >= N or sub_band_distance >= N or last_incoherent_sub_bands[1][distance][band_distance][sub_band_distance] == 0: + value = incoherent_sub_bands[1][distance][band_distance][sub_band_distance] + if value > 0: + print(N, value, (distance, band_distance, sub_band_distance)) + + last_incoherent_distances = incoherent_distances + last_incoherent_bands = incoherent_bands + last_incoherent_sub_bands = incoherent_sub_bands + + # print(bands) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/train_generator.py b/train_generator.py new file mode 100644 index 0000000..f4e790a --- /dev/null +++ b/train_generator.py @@ -0,0 +1,164 @@ +import hashlib +import secrets +from struct import pack, pack_into, unpack_from + +def sha(x): + m = hashlib.sha256() + m.update(x) + result = m.digest() + return result[0] & 0b1 + +def bit_at_index(buffer, index): + offset = (index >> 3) % len(buffer) + return buffer[offset] & (1 << (index & 0b111)) != 0 + +def evaluate(f, x): + stack = [] + offset = 0 + value = 0 + while offset < len(f): + opcode = f[offset] + offset += 1 + if opcode == 0 or opcode == 1: + stack.append((opcode, value)) + value = 0 + elif opcode == 2: + if len(stack) == 0: + return (value, offset) + (last_opcode, _) = stack[-1] + if last_opcode > 0: + stack.append((0, value)) + value = 0 + continue + right = value + (_, left) = stack.pop() + (opcode, value) = stack.pop() + value ^= ((left & right) ^ (opcode & 0b1)) + else: + try: + index = unpack_from('I', f, offset)[0] + offset += 4 + if bit_at_index(x, index): + value ^= 1 + except: + break + + while len(stack) > 0: + (opcode, other_value) = stack.pop() + if opcode == 0: + right = other_value + (opcode, left) = stack.pop() + value ^= ((left & right) ^ (opcode & 0b1)) + value ^= other_value ^ (opcode & 0b1) + return (value, offset) + +def random_generator(): + return secrets.token_bytes(256) + +def random_input(): + return secrets.token_bytes(4) + +def generate(generator, sample): + f_size = 1024 + f = bytearray(f_size) + x = bytearray(4) + sample + for i in range(0, f_size): + build_value = 0 + for j in range(0, 8): + step = i * 8 + j + pack_into('H', x, 0, step) + (value, _) = evaluate(generator, x) + build_value <<= 1 + build_value |= value + f[i] = build_value + return f + +def sample(N): + inputs = [random_input() for i in range(0, N)] + outputs = [sha(x) for x in inputs] + return (inputs, outputs) + +def augment_inputs(inputs, layers): + augmented_inputs = [] + for x in inputs: + x_n = bytearray(1) + x + for layer in layers: + build_value = 0 + for candidate in layer: + (value, _) = evaluate(candidate, x_n) + build_value <<= 1 + build_value |= value + x_n[0] = build_value + augmented_inputs.append(x_n) + return augmented_inputs + +def pack_sample(inputs, outputs): + sample = bytearray() + for i in range(0, len(inputs)): + sample += inputs[i] + sample += bytearray([outputs[i]]) + return sample + +def compute_score(f, inputs, outputs): + correct = 0.0 + for i in range(0, len(inputs)): + (value, _) = evaluate(f, inputs[i]) + if value == outputs[i]: + correct += 1 + return correct / len(outputs) + +def evaluate_generator(g): + num_candidates = 8 + num_train_samples = 64 + num_test_samples = 1000 + num_epochs = 10 + threshold = 0 + + layers = [] + for epoch in range(0, num_epochs): + difficulty = 0 + layer = [] + candidate = 0 + scores = [] + while candidate < num_candidates: + (x, y) = sample(num_train_samples) + x_n = augment_inputs(x, layers) + f = generate(g, pack_sample(x_n, y)) + print(f) + + (x, y) = sample(num_test_samples) + x_n = augment_inputs(x, layers) + score = compute_score(f, x_n, y) + + if score < threshold - difficulty * 0.0001: + difficulty += 1 + continue + + print(epoch, score, difficulty) + + layer.append(f) + scores.append(score) + difficulty = 0 + candidate += 1 + threshold = sum(scores) / len(scores) + layers.append(layer) + return threshold + +def main(): + num_random_candidates = 1000 + + g = None + score = 0 + + for i in range(0, num_random_candidates): + g_n = random_generator() + print(g_n) + score_n = evaluate_generator(g_n) + print(i, score_n) + if score > score_n: + score = score_n + g = g_n + + +if __name__ == "__main__": + main() \ No newline at end of file