Add probabilities work to git

2023-01-01 18:45:51 -05:00 · 2023-01-01 18:45:51 -05:00 · fd2045dfca
commit fd2045dfca
49 changed files with 18887 additions and 0 deletions
--- a/2_point_plot.py
+++ b/2_point_plot.py
@ -0,0 +1,77 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+def flip(n, index):
+    return n ^ (1 << index)
+
+def distance(i, j):
+    return bin(i ^ j).count('1')
+
+def matrix_system_with_two_knowns(p, q, N):
+    S = 2 ** N
+    mat = np.zeros((S, S))
+    val = np.zeros(S)
+    for i in range(0, S):
+        if i == p:
+            mat[i][i] = 1.0
+            val[i] = 1.0
+        elif i == q:
+            mat[i][i] = 1.0
+        else:
+            mat[i][i] = -1.0
+            for j in range(0, N):
+                mat[i][flip(i,j)] = 1.0 / N
+    return (mat, val)
+
+def main():
+    final_values = []
+    final_x = []
+    final_y = []
+
+    for N in range(11, 12):
+        print(N)
+        S = 2 ** N
+        distances = np.zeros((S, S))
+        for i in range(0, S):
+            for j in range(0, S):
+                distances[i][j] = distance(i,j)
+
+        # final_values = []
+        # final_basis = []
+        visited_distances = set()
+        for p in range(0, S):
+            for q in range(p + 1, S):
+                pq_distance = distances[p, q]
+                if pq_distance in visited_distances:
+                    continue
+                visited_distances.add(pq_distance)
+                (mat, val) = matrix_system_with_two_knowns(p, q, N)
+                solution = np.linalg.inv(mat).dot(val)
+                for i in range(0, len(solution)):
+                    final_x.append(distances[i, p] / N)
+                    final_y.append(distances[i, q] / N)
+                    final_values.append(solution[i])
+
+            # values = list(set(solution))
+            # values.sort()
+            # if len(values) <= 1:
+            #     continue
+            # basis = [1.0 * i / (len(values) - 1) for i in range(len(values))]
+
+            # final_values.extend(values)
+            # final_basis.extend(basis)
+
+    # fig, ax = plt.subplots()
+    # ax.scatter(final_values, final_basis)
+
+    # print(np.linalg.lstsq((final_x, final_y), final_values))
+
+    fig = plt.figure()
+    ax = fig.add_subplot(projection='3d')
+    ax.scatter(final_x, final_y, final_values)
+
+    ax.grid(True)
+    plt.show()
+
+if __name__ == "__main__":
+    main()
--- a/6
+++ b/6
@ -0,0 +1,6 @@
+FROM nvidia/cuda:11.6.0-devel-ubuntu20.04
+RUN apt-get update && apt-get install -y python3 python3-pip
+RUN pip install numpy pycuda
+WORKDIR /app
+COPY mutations_cuda.py /app/mutations_cuda.py
+CMD ["python3", "-u", "mutations_cuda.py"]
--- a/README.md
+++ b/README.md
@ -0,0 +1,43 @@
+Terminology:
+
+Sample space 'S' has 'n' bits, and there is a function 'f', that maps 'x' (an n-bit vector) in 'S' to 'y'.
+
+f(x) = y
+
+We can use PCA to generate candidates for some sub-sample of 'S', 'P'. Candidates that exhibit generalization
+properties (score higher than the previous generation on a sub-sample they haven't seen before, 'Q') can be
+cascaded into the input for training the next generation of candidates.
+
+This candidate generation process is 'G'. 'G' is considered to perform well if the candidates that it
+generates exhibit generalization properties.
+
+To bootstrap, we can use PCA for 'G' and store the state machine instructions 'S_G' for creating the highest-performing
+candidates on a particular problem 'f' as a sample space for training a new generator 'G_n'.
+
+Use 'G' to generate candidates for 'G_n'. Training samples come from 'S_G', but candidates should be evaluated
+based on how well the candidates they generate perform on 'f'.
+
+So, we need to be able to score a particular g e G_n. We can evaluate for a fixed number of epochs and use some combination
+of the average difficulty and evaluation score.
+
+A generator G is a state machine with input
+
+G(|j-bit step|m * n-bit inputs|) = y
+
+Where y is a bit in an instruction.
+
+'a' is an address in 'A' |log2(n)|
+
+|opcode 2-bit|
+|00 - xor|
+|01 - end|
+|10 - and|
+|11 - nand|
+
+xor is followed by an address 'a' for an input bit.
+
+This process can be repeated indefinitely, replacing 'G' with 'G_n' to create new generators that outperform the previous
+generation for solving 'f'.
+
+A candidate is a state machine with input
+f(|n-bit input|) = y
--- a/model.txt
+++ b/model.txt
--- a/model_probabilities.py
+++ b/model_probabilities.py
@ -0,0 +1,171 @@
+import math
+from statistics import median
+
+def count_one_bits(n):
+    return bin(n).count("1")
+
+def compute_distance(a, b):
+    distance = count_one_bits(a ^ b)
+    # return 1 / (8 ** distance)
+    return 1 / (2 ** distance)
+
+def xor_n(n):
+    return count_one_bits(n) % 2
+
+def compute_distances(N):
+    return [[compute_distance(i, j) for j in range(N)] for i in range(N)]
+
+def compute_nn_probabilities(i, knowns, distances):
+    total = 0.0
+    total_zero = 0.0
+    total_one = 0.0
+    for known in knowns:
+        j = known[0]
+        distance = distances[i][j]
+        total += distance
+        if known[1] == 0:
+            total_zero += distance
+        else:
+            total_one += distance
+    p_zero = total_zero / total
+    p_one = total_one / total
+    return (p_zero, p_one)
+
+def compute_est_coherence(i, knowns, coherences, distances):
+    total = 0.0
+    coherence = 0.0
+    for known in knowns:
+        j = known[0]
+        distance = distances[i][j]
+        total += distance
+        coherence += distance * coherences[j]
+    return coherence / total
+
+def compute_est_coherences(N, knowns, distances):
+    nn_probabilities = [None for i in range(N)]
+    est_coherences = [None for i in range(N)]
+
+    # for known in knowns:
+    #     i = known[0]
+    #     nn_probabilities[i] = compute_nn_probabilities(i, knowns, distances)
+    for known in knowns:
+        i = known[0]
+        nn_probabilities[i] = (1.0 - known[1], 1.0 * known[1])
+    
+    for i in range(len(nn_probabilities)):
+        if not nn_probabilities[i] is None:
+            continue
+        nn_probabilities[i] = compute_nn_probabilities(i, knowns, distances)
+
+    print(nn_probabilities)
+        
+    for i in range(len(nn_probabilities)):
+        total = 0.0
+        coherence = 0.0
+        p_i = nn_probabilities[i]
+        for j in range(len(nn_probabilities)):
+            if i == j:
+                continue
+            p_j = nn_probabilities[j]
+            distance = distances[i][j]
+            total += distance
+            coherence += (p_i[0] * p_j[0] + p_i[1] * p_j[1]) * distance
+        # print(coherence, total)
+        est_coherences[i] = coherence / total
+
+    # for known in knowns:
+    #     i = known[0]
+    #     est_coherences[i] = nn_probabilities[i][known[1]]
+
+    # for i in range(len(est_coherences)):
+    #     if not est_coherences[i] is None:
+    #         continue
+    #     est_coherences[i] = compute_est_coherence(i, knowns, est_coherences, distances)
+
+    # print(est_coherences)
+
+    return est_coherences
+
+def score(coherences):
+    # while len(coherences) > 1:
+    #     coherences = [(coherences[i] + coherences[i + 1]) / 2 for i in range(0, len(coherences), 2)]
+    # return coherences[0]
+
+    # return median(coherences)
+    return sum(coherences) / len(coherences)
+
+def xor_by_index(knowns, index):
+    mask = 1 << index
+    knowns = knowns[:]
+    for i in range(len(knowns)):
+        known = knowns[i]
+        if known[0] & mask:
+            knowns[i] = (known[0], known[1] ^ 1)
+    return knowns
+
+def main():
+    n = 3
+    N = 2 ** n
+    distances = compute_distances(N)
+
+    knowns = [(i, xor_n(i)) for i in [
+        0, 3, 4, 5, 7
+        # 3, 5, 6, 10, 12, 14
+        # 1, 3, 7, 10, 14, 15
+        # 0, 3, 5, 6, 10, 11, 12
+        # 0, 3, 5, 6, 10, 11, 12, 24, 30
+    ]]
+    print(knowns)
+    print()
+
+    # knowns = [
+    #     (1, 1),
+    #     (3, 0),
+    #     (7, 1),
+    #     (10, 0),
+    #     (14, 1),
+    #     (15, 0)
+    # ]
+
+    # knowns = [
+    #     (0, 0),
+    #     (3, 0),
+    #     (4, 1),
+    #     (5, 0),
+    #     (7, 1)
+    # ]
+
+    # knowns = [
+    #     (0, 0),
+    #     (1, 1),
+    #     (2, 1),
+    #     (3, 0),
+    #     (4, 1),
+    #     (5, 0),
+    #     (6, 0),
+    #     (7, 1)
+    # ]
+
+    coherences = compute_est_coherences(N, knowns, distances)
+    best_coherence = score(coherences)
+    print(best_coherence)
+
+    while best_coherence < 1.0:
+        print()
+        # print(knowns)
+        # print()
+        best_index = -1
+        for i in range(0, n):
+            coherences = compute_est_coherences(N, xor_by_index(knowns, i), distances)
+            coherence = score(coherences)
+            print(coherence)
+            if coherence > best_coherence:
+                best_coherence = coherence
+                best_index = i
+        if best_index < 0:
+            break
+        knowns = xor_by_index(knowns, best_index)
+
+
+if __name__ == "__main__":
+    main()
--- a/model_probabilities2.py
+++ b/model_probabilities2.py
@ -0,0 +1,260 @@
+import math
+from statistics import median, stdev
+
+def count_one_bits(n):
+    return bin(n).count("1")
+
+def compute_distance(a, b):
+    distance = count_one_bits(a ^ b)
+    # return 1 / (8 ** distance)
+    if distance == 0:
+        return 0
+    # return 1 / (64 ** (distance - 1))
+    return  distance
+
+def xor_n(n):
+    return count_one_bits(n) % 2
+
+def compute_distances(N):
+    return [[compute_distance(i, j) for j in range(N)] for i in range(N)]
+
+def compute_nn_probabilities(i, knowns, distances):
+    total = 0.0
+    total_zero = 0.0
+    total_one = 0.0
+    for known in knowns:
+        j = known[0]
+        if i == j:
+            continue
+        distance = distances[i][j]
+        total += distance
+        if known[1] == 0:
+            total_zero += distance
+        else:
+            total_one += distance
+    p_zero = total_zero / total
+    p_one = total_one / total
+    return (p_zero, p_one)
+
+def interpolate_probabilities(i, knowns, distances, probabilities, dim):
+    total = 0.0
+    total_dim = [0.0] * dim
+    for known in knowns:
+        j = known[0]
+        if i == j:
+            continue
+        distance = distances[i][j]
+        total += distance
+        probability = probabilities[j]
+        for index in range(dim):
+            total_dim[index] += distance * probability[index]
+    for index in range(dim):
+        total_dim[index] /= total
+    return total_dim
+
+def compute_est_coherence(i, knowns, coherences, distances):
+    total = 0.0
+    coherence = 0.0
+    for known in knowns:
+        j = known[0]
+        distance = distances[i][j]
+        total += distance
+        coherence += distance * coherences[j]
+    return coherence / total
+
+def compute_est_coherences(N, knowns, distances):
+    nn_probabilities = [None for i in range(N)]
+    nn_correct_probabilities = [None for i in range(N)]
+    coherences = []
+
+    for known in knowns:
+        i = known[0]
+        nn_probabilities[i] = compute_nn_probabilities(i, knowns, distances)
+    
+    # for i in range(len(nn_probabilities)):
+    #     if not nn_probabilities[i] is None:
+    #         continue
+    #     nn_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_probabilities, 2)
+    
+    for known in knowns:
+        i = known[0]
+        nn_correct_probabilities[i] = [nn_probabilities[i][known[1]]]
+
+    # for i in range(len(nn_correct_probabilities)):
+    #     if not nn_correct_probabilities[i] is None:
+    #         continue
+    #     nn_correct_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_correct_probabilities, 1)
+
+    coherences_0 = []
+    coherences_1 = []
+    for known_i in knowns:
+        i = known_i[0]
+        coherence = 0.0
+        total = 0.0
+        for known_j in knowns:
+            j = known_j[0]
+            if i == j:
+                continue
+
+            distance = distances[i][j]
+            total += distance
+
+            nn_p_i_0 = nn_probabilities[i][0]
+            nn_p_i_1 = nn_probabilities[i][1]
+            nn_c_p_i = nn_correct_probabilities[i][0]
+
+            nn_p_j_0 = nn_probabilities[j][0]
+            nn_p_j_1 = nn_probabilities[j][1]
+            nn_c_p_j = nn_correct_probabilities[j][0]
+
+            p_i_0 = nn_p_i_0 * nn_c_p_i + nn_p_i_1 * (1 - nn_c_p_i)
+            p_i_1 = nn_p_i_1 * nn_c_p_i + nn_p_i_0 * (1 - nn_c_p_i)
+
+            p_j_0 = nn_p_j_0 * nn_c_p_j + nn_p_j_1 * (1 - nn_c_p_j)
+            p_j_1 = nn_p_j_1 * nn_c_p_j + nn_p_j_0 * (1 - nn_c_p_j)
+
+            coherence += distance * (p_i_0 * p_j_0 + p_i_1 * p_j_1)
+        coherences.append(coherence / total)
+        if known_i[1] == 0:
+            coherences_0.append(coherence / total)
+        else:
+            coherences_1.append(coherence / total)
+
+    return coherences
+
+def score(coherences, knowns, distances):
+    # while len(coherences) > 1:
+    #     coherences = [(coherences[i] + coherences[i + 1]) / 2 for i in range(0, len(coherences), 2)]
+    # return coherences[0]
+
+    # return median(coherences)
+    # return sum(coherences) / len(coherences)
+    if len(coherences) == 0:
+        return 1.0
+    numerator_0 = 0.0
+    denominator_0 = 0.0
+    numerator_1 = 0.0
+    denominator_1 = 0.0
+    count_0 = 0.0
+    count_1 = 0.0
+    for i in range(len(knowns)):
+        weight = 0
+        for j in range(len(knowns)):
+            weight += distances[knowns[i][0]][knowns[j][0]]
+        print(weight, end=' ')
+        if knowns[i][1] == 0:
+            denominator_0 += weight
+            numerator_0 += weight * coherences[i]
+            count_0 += 1
+        else:
+            denominator_1 += weight
+            numerator_1 += weight * coherences[i]
+            count_1 += 1
+    # print()
+    if count_0 == 0 or count_1 == 0:
+        return 1.0
+
+    # return ((sum(coherences[0]) / len(coherences[0])) + (sum(coherences[1]) / len(coherences[1]))) / 2.0
+    # return (sum(coherences[0]) + sum(coherences[1])) / (len(coherences[0]) + len(coherences[1]))
+    # div_0 = (numerator_0 / denominator_0 if denominator_0 > 0 else 1.0) * 0.5
+    # div_1 = (numerator_1 / denominator_1 if denominator_1 > 0 else 1.0) * 0.5
+    # return div_0 + div_1
+    # aligned = 1.0 - abs(0.5 - max(count_0 / (count_0 + count_1), count_1 / (count_0 + count_1)))
+    # return ((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) * (aligned ** 0.1)
+    # return (((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) + 0.12 * aligned) * (1.0 / 1.12)
+    return (numerator_0 + numerator_1) / (denominator_0 + denominator_1)
+
+def xor_by_index(knowns, index):
+    mask = 1 << index
+    knowns = knowns[:]
+    for i in range(len(knowns)):
+        known = knowns[i]
+        if known[0] & mask:
+            knowns[i] = (known[0], known[1] ^ 1)
+    return knowns
+
+def main():
+    n = 8
+    N = 2 ** n
+    distances = compute_distances(N)
+
+    knowns = [(i, xor_n(i)) for i in [
+        # 0, 3, 4, 5, 7
+        # 3, 5, 6, 10, 12, 14
+        # 1, 3, 7, 10, 14, 15
+        # 0, 3, 5, 6, 10, 11, 12
+        0, 3, 5, 6, 10, 11, 12, 24, 30
+        # 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127
+        # 128, 131, 248, 0, 7, 13, 17, 19
+    ]]
+
+    for known_i in knowns:
+        i = known_i[0]
+        for known_j in knowns:
+            j = known_j[0]
+            print(distances[i][j], end=' ')
+        print()
+
+    print(knowns)
+    print()
+
+    # knowns = [
+    #     (1, 1),
+    #     (3, 0),
+    #     (7, 1),
+    #     (10, 0),
+    #     (14, 1),
+    #     (15, 0)
+    # ]
+
+    # knowns = [
+    #     (0, 0),
+    #     (3, 0),
+    #     (4, 1),
+    #     (5, 0),
+    #     (7, 1)
+    # ]
+
+    # knowns = [
+    #     (0, 0),
+    #     (1, 1),
+    #     (2, 1),
+    #     (3, 0),
+    #     (4, 1),
+    #     (5, 0),
+    #     (6, 0),
+    #     (7, 1)
+    # ]
+
+    coherences = compute_est_coherences(N, knowns, distances)
+    best_coherence = score(coherences, knowns, distances)
+    print(best_coherence)
+
+    flipped = []
+    while best_coherence < 1.0:
+        print()
+        # print(knowns)
+        # print()
+        best_index = -1
+        # best_coherence = 0
+        for i in range(0, n):
+            if i in flipped:
+                continue
+            mutated_knowns = xor_by_index(knowns, i)
+            coherences = compute_est_coherences(N, mutated_knowns, distances)
+            coherence = score(coherences, mutated_knowns, distances)
+            # print(coherence)
+            print(coherence, end=' ')
+            print(mutated_knowns)
+            if coherence > best_coherence:
+                best_coherence = coherence
+                best_index = i
+        if best_index < 0:
+            break
+        knowns = xor_by_index(knowns, best_index)
+        # flipped.append(best_index)
+        print(knowns)
+
+
+if __name__ == "__main__":
+    main()
--- a/model_probabilities3.py
+++ b/model_probabilities3.py
@ -0,0 +1,463 @@
+import hashlib
+import math
+from statistics import median, stdev
+import numpy as np
+
+def count_one_bits(n):
+    return bin(n).count("1")
+
+def compute_distance(a, b):
+    distance = count_one_bits(a ^ b)
+    # return 1 / (8 ** distance)
+    if distance == 0:
+        return 0
+    # return 1 / (64 ** (distance - 1))
+    return  distance
+
+def xor_n(n):
+    return count_one_bits(n) % 2
+
+def sha_n(n):
+    m = hashlib.sha256()
+    m.update(str(n).encode("utf-8"))
+    result = m.digest()
+    return result[0] & 0b1
+
+def compute_distances(N):
+    return [[compute_distance(i, j) for j in range(N)] for i in range(N)]
+
+def compute_nn_probabilities(i, knowns, distances):
+    total = 0.0
+    total_zero = 0.0
+    total_one = 0.0
+    for known in knowns:
+        j = known[0]
+        if i == j:
+            continue
+        distance = distances[i][j]
+        total += distance
+        if known[1] == 0:
+            total_zero += distance
+        else:
+            total_one += distance
+    p_zero = total_zero / total
+    p_one = total_one / total
+    return (p_zero, p_one)
+
+def interpolate_probabilities(i, knowns, distances, probabilities, dim):
+    total = 0.0
+    total_dim = [0.0] * dim
+    for known in knowns:
+        j = known[0]
+        if i == j:
+            continue
+        distance = distances[i][j]
+        total += distance
+        probability = probabilities[j]
+        for index in range(dim):
+            total_dim[index] += distance * probability[index]
+    for index in range(dim):
+        total_dim[index] /= total
+    return total_dim
+
+def compute_est_coherence(i, knowns, coherences, distances):
+    total = 0.0
+    coherence = 0.0
+    for known in knowns:
+        j = known[0]
+        distance = distances[i][j]
+        total += distance
+        coherence += distance * coherences[j]
+    return coherence / total
+
+def compute_est_coherences(N, knowns, distances):
+    nn_probabilities = [None for i in range(N)]
+    nn_correct_probabilities = [None for i in range(N)]
+    coherences = []
+
+    for known in knowns:
+        i = known[0]
+        nn_probabilities[i] = compute_nn_probabilities(i, knowns, distances)
+    
+    # for i in range(len(nn_probabilities)):
+    #     if not nn_probabilities[i] is None:
+    #         continue
+    #     nn_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_probabilities, 2)
+    
+    for known in knowns:
+        i = known[0]
+        nn_correct_probabilities[i] = [nn_probabilities[i][known[1]]]
+
+    # for i in range(len(nn_correct_probabilities)):
+    #     if not nn_correct_probabilities[i] is None:
+    #         continue
+    #     nn_correct_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_correct_probabilities, 1)
+
+    coherences_0 = []
+    coherences_1 = []
+    for known_i in knowns:
+        i = known_i[0]
+        coherence = 0.0
+        total = 0.0
+        for known_j in knowns:
+            j = known_j[0]
+            if i == j:
+                continue
+
+            distance = distances[i][j]
+            total += distance
+
+            nn_p_i_0 = nn_probabilities[i][0]
+            nn_p_i_1 = nn_probabilities[i][1]
+            nn_c_p_i = nn_correct_probabilities[i][0]
+
+            nn_p_j_0 = nn_probabilities[j][0]
+            nn_p_j_1 = nn_probabilities[j][1]
+            nn_c_p_j = nn_correct_probabilities[j][0]
+
+            p_i_0 = nn_p_i_0 * nn_c_p_i + nn_p_i_1 * (1 - nn_c_p_i)
+            p_i_1 = nn_p_i_1 * nn_c_p_i + nn_p_i_0 * (1 - nn_c_p_i)
+
+            p_j_0 = nn_p_j_0 * nn_c_p_j + nn_p_j_1 * (1 - nn_c_p_j)
+            p_j_1 = nn_p_j_1 * nn_c_p_j + nn_p_j_0 * (1 - nn_c_p_j)
+
+            coherence += distance * (p_i_0 * p_j_0 + p_i_1 * p_j_1)
+        coherences.append(coherence / total)
+        if known_i[1] == 0:
+            coherences_0.append(coherence / total)
+        else:
+            coherences_1.append(coherence / total)
+
+    return coherences
+
+def score(coherences, knowns, distances):
+    # while len(coherences) > 1:
+    #     coherences = [(coherences[i] + coherences[i + 1]) / 2 for i in range(0, len(coherences), 2)]
+    # return coherences[0]
+
+    # return median(coherences)
+    # return sum(coherences) / len(coherences)
+    if len(coherences) == 0:
+        return 1.0
+    numerator_0 = 0.0
+    denominator_0 = 0.0
+    numerator_1 = 0.0
+    denominator_1 = 0.0
+    count_0 = 0.0
+    count_1 = 0.0
+    for i in range(len(knowns)):
+        weight = 0
+        for j in range(len(knowns)):
+            weight += distances[knowns[i][0]][knowns[j][0]]
+        print(weight, end=' ')
+        if knowns[i][1] == 0:
+            denominator_0 += weight
+            numerator_0 += weight * coherences[i]
+            count_0 += 1
+        else:
+            denominator_1 += weight
+            numerator_1 += weight * coherences[i]
+            count_1 += 1
+    # print()
+    if count_0 == 0 or count_1 == 0:
+        return 1.0
+
+    # return ((sum(coherences[0]) / len(coherences[0])) + (sum(coherences[1]) / len(coherences[1]))) / 2.0
+    # return (sum(coherences[0]) + sum(coherences[1])) / (len(coherences[0]) + len(coherences[1]))
+    # div_0 = (numerator_0 / denominator_0 if denominator_0 > 0 else 1.0) * 0.5
+    # div_1 = (numerator_1 / denominator_1 if denominator_1 > 0 else 1.0) * 0.5
+    # return div_0 + div_1
+    # aligned = 1.0 - abs(0.5 - max(count_0 / (count_0 + count_1), count_1 / (count_0 + count_1)))
+    # return ((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) * (aligned ** 0.1)
+    # return (((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) + 0.12 * aligned) * (1.0 / 1.12)
+    return (numerator_0 + numerator_1) / (denominator_0 + denominator_1)
+
+def xor_by_index(knowns, index, reverse=False):
+    mask = 1 << index
+    knowns = knowns[:]
+    for i in range(len(knowns)):
+        known = knowns[i]
+        if known[0] & mask or (not (known[0] & mask) and reverse):
+            knowns[i] = (known[0], known[1] ^ 1)
+    return knowns
+
+def flip(n, index):
+    return n ^ (1 << index)
+
+def matrix_from_knowns(knowns, N):
+    S = 2 ** N
+    mat = np.zeros((S, S))
+    val = np.zeros(S)
+    unknowns = set([i for i in range(0, S)])
+    for (i, value) in knowns:
+        mat[i][i] = 1.0
+        val[i] = value
+        unknowns.remove(i)
+    for i in unknowns:
+        mat[i][i] = -1.0
+        for j in range(0, N):
+            mat[i][flip(i,j)] = 1.0 / N
+    return (mat, val)
+
+def compute_splits(knowns, N):
+    splits = []
+    for i in range(0, N):
+        mask = 1 << i
+        left_0 = 0
+        left_1 = 0
+        right_0 = 0
+        right_1 = 0
+        for (j, value) in knowns:
+            if j & mask == 0:
+                if value == 0:
+                    left_0 += 1
+                else:
+                    left_1 += 1
+            else:
+                if value == 0:
+                    right_0 += 1
+                else:
+                    right_1 += 1
+        print((left_0, left_1), (right_0, right_1))
+        left_ratio = min(left_0, left_1) / (left_0 + left_1)
+        right_ratio = min(right_0, right_1) / (right_0 + right_1)
+        # print(left_ratio, right_ratio)
+        splits.append((left_ratio + right_ratio) / 2)
+    return splits
+
+def compute_coherence(knowns, N):
+    S = 2 ** N
+    # (mat, val) = matrix_from_knowns(knowns, N)
+    # solution = np.linalg.inv(mat).dot(val)
+    # for it in range(0, 1000):
+    #     next = np.zeros(len(solution))
+    #     for i in range(0, len(solution)):
+    #         sum = 0.0
+    #         for j in range(0, N):
+    #             sum += solution[flip(i,j)]
+    #         next[i] = sum / N
+    #     solution = next
+    # return 0.0
+
+    # coherence_0 = 0.0
+    # coherence_1 = 0.0
+    # zeros = 0.0
+    # ones = 0.0
+    # lowest = 1.0
+    # print()
+    (mat, val) = matrix_from_knowns(knowns, N)
+    A = np.linalg.inv(mat).dot(val)
+    knowns_nn = []
+    for known_index in range(0, len(knowns)):
+        (mat, val) = matrix_from_knowns(knowns[:known_index] + knowns[known_index + 1:], N)
+        solution = np.linalg.inv(mat).dot(val)
+        (i, value) = knowns[known_index]
+        value_nn = solution[i]
+        knowns_nn.append((i, value_nn))
+    (mat, val) = matrix_from_knowns(knowns_nn, N)
+    B = np.linalg.inv(mat).dot(val)
+    return 1.0 - (sum(abs(A - B)) / len(A))
+    # # print(A)
+    # # print(B)
+    # A_sub_B = A - B
+    # print(A)
+    # print(B)
+    # print(A)
+    # print(B)
+    # print(np.dot(A, B) / len(A))
+    # return 1.0 - (np.dot(A_sub_B, A_sub_B) / len(A))
+        # print(i, value, value_nn, partial)
+        # coherence += ((value * value_nn) + ((1 - value) * (1 - value_nn))) / len(knowns)
+        # if value == 0:
+        #     coherence_0 += partial
+        #     zeros += 1
+        # else:
+        #     coherence_1 += partial
+        #     ones += 1
+    # if zeros == 0 or ones == 0:
+    #     return 1.0    
+    # return 0.5 * coherence_0 / zeros + 0.5 * coherence_1 / ones
+
+    # coherences = np.zeros(S)
+    # (mat, val) = matrix_from_knowns(knowns, N)
+    # solution = np.linalg.inv(mat).dot(val)
+    # print(solution)
+    # for i in range(0, S):
+    #     p = solution[i]
+    #     coherence = 0.0
+    #     for j in range(0, N):
+    #         q = solution[flip(i,j)]
+    #         coherence += ((p * q) + ((1 - p) * (1 - q))) / N
+    #     coherences[i] = coherence
+    # print(coherences)
+    # return sum(coherences) / len(coherences)
+
+def compute_split_knowns(knowns, N):
+    sum = 0
+    splits = []
+    for i in range(0, N):
+        mask = 1 << i
+        left = []
+        right = []
+        for (j, value) in knowns:
+            k = (j & ((1 << i) - 1)) | ((j & ~((1 << (i + 1)) - 1)) >> 1)
+            masked_known = (k, value)
+            if j & mask == 0:
+                left.append(masked_known)
+            else:
+                right.append(masked_known)
+        left_coherence = compute_coherence(left, N - 1)
+        right_coherence = compute_coherence(right, N - 1)
+        splits.append((left_coherence, right_coherence))
+        sum += min(left_coherence, right_coherence) * (1.0 - abs(left_coherence - right_coherence))
+    # print()
+    # print(splits)
+    # print()
+    return sum / N
+
+def remove_bit(i, n):
+    return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
+
+def compute_split_knowns_r(knowns, N):
+    if len(knowns) == 0:
+        raise ValueError('This should never happen')
+
+    hist = np.zeros(N)
+    for i in range(0, N):
+        mask = 1 << i
+        for (j, value) in knowns:
+            if j & mask == 0:
+                hist[i] += 1
+
+    constant_bits = []
+    for i in range(0, N):
+        if hist[i] == 0 or hist[i] == len(knowns):
+            constant_bits.append(i)
+
+    if len(constant_bits) > 0:
+        constant_bits.reverse()
+        for n in constant_bits:
+            reduced_knowns = []
+            for (j, value) in knowns:
+                reduced_knowns.append((remove_bit(j, n), value))
+            knowns = reduced_knowns
+        return compute_split_knowns_r(knowns, N - len(constant_bits))
+
+    if len(knowns) == 1:
+        return 1.0
+    if len(knowns) == 2:
+        if knowns[0][1] == knowns[1][1]:
+            return 1.0
+        else:
+            return 0.0
+
+    sum = 0
+    for i in range(0, N):
+        mask = 1 << i
+        left = []
+        right = []
+        for (j, value) in knowns:
+            k = remove_bit(j, i)
+            masked_known = (k, value)
+            if j & mask == 0:
+                left.append(masked_known)
+            else:
+                right.append(masked_known)
+
+        # left_correctness = max(left_0_count, left_1_count) / (left_0_count + left_1_count) if left_0_count > 0 and left_1_count > 0 else 1.0
+        # right_correctness = max(right_0_count, right_1_count) / (right_0_count + right_1_count) if right_0_count > 0 and right_1_count > 0 else 1.0
+        left_coherence = compute_split_knowns_r(left, N - 1)
+        right_coherence = compute_split_knowns_r(right, N - 1)
+        evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0
+        # sum += min(left_coherence, right_coherence) * (evenness ** 2)
+        # delta = 1.0 - ((left_coherence - right_coherence) * (left_coherence - right_coherence))
+        sum += 0.7 * min(left_coherence, right_coherence) + 0.3 * evenness ** 2
+        # sum += min(left_coherence, right_coherence) * (1.0 - abs(left_coherence - right_coherence))
+    return sum / N
+
+def main():
+    N = 8
+    S = 2 ** N
+    distances = compute_distances(S)
+
+    knowns = [(i, sha_n(i)) for i in [
+        0, 1, 2, 3, 4, 5, 6, 7
+        # 0, 3, 4, 5, 7
+        # 3, 5, 6, 10, 12, 14
+        # 1, 3, 7, 10, 14, 15
+        # 0, 3, 5, 6, 10, 11, 12
+        # 0, 3, 5, 6, 10, 11, 12, 24, 30
+        # 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127
+        # 128, 131, 248, 0, 7, 13, 17, 19
+    ]]
+
+    # best_coherence = compute_coherence(knowns, N)
+    best_coherence = compute_split_knowns_r(knowns, N)
+    print(best_coherence)
+    print(knowns)
+    print()
+    while best_coherence < 1.0:
+        best_index = -1
+        best_reverse = False
+        # best_coherence = 0
+        for i in range(0, N):
+            for reverse in [False, True]:
+                mutated_knowns = xor_by_index(knowns, i, reverse)
+                # coherence = compute_coherence(mutated_knowns, N)
+                coherence = compute_split_knowns_r(mutated_knowns, N)
+                print(i, reverse, coherence)
+                if coherence > best_coherence:
+                    best_coherence = coherence
+                    best_index = i
+                    best_reverse = reverse
+        if best_index < 0:
+            break
+        knowns = xor_by_index(knowns, best_index, best_reverse)
+        print()
+        print(best_index, best_reverse, best_coherence)
+        print(knowns)
+        print()
+    print(knowns)
+
+    # for known_i in knowns:
+    #     i = known_i[0]
+    #     for known_j in knowns:
+    #         j = known_j[0]
+    #         print(distances[i][j], end=' ')
+    #     print()
+
+    # print(knowns)
+    # print()
+
+    # coherences = compute_est_coherences(N, knowns, distances)
+    # best_coherence = score(coherences, knowns, distances)
+    # print(best_coherence)
+
+    # flipped = []
+    # while best_coherence < 1.0:
+    #     print()
+    #     # print(knowns)
+    #     # print()
+    #     best_index = -1
+    #     # best_coherence = 0
+    #     for i in range(0, n):
+    #         if i in flipped:
+    #             continue
+    #         mutated_knowns = xor_by_index(knowns, i)
+    #         coherences = compute_est_coherences(N, mutated_knowns, distances)
+    #         coherence = score(coherences, mutated_knowns, distances)
+    #         # print(coherence)
+    #         print(coherence, end=' ')
+    #         print(mutated_knowns)
+    #         if coherence > best_coherence:
+    #             best_coherence = coherence
+    #             best_index = i
+    #     if best_index < 0:
+    #         break
+    #     knowns = xor_by_index(knowns, best_index)
+    #     # flipped.append(best_index)
+    #     print(knowns)
+
+
+if __name__ == "__main__":
+    main()
--- a/model_probabilities4.py
+++ b/model_probabilities4.py
@ -0,0 +1,208 @@
+import hashlib
+import math
+from statistics import median, stdev
+import numpy as np
+import random
+
+def count_one_bits(n):
+    return bin(n).count("1")
+
+def xor_n(n):
+    return count_one_bits(n) % 2
+
+def sha_n(n):
+    m = hashlib.sha256()
+    m.update(str(n).encode("utf-8"))
+    result = m.digest()
+    return result[0] & 0b1
+
+def xor_by_index(knowns, index, reverse=False):
+    mask = 1 << index
+    knowns = knowns[:]
+    for i in range(len(knowns)):
+        known = knowns[i]
+        if known[0] & mask or (not (known[0] & mask) and reverse):
+            knowns[i] = (known[0], known[1] ^ 1)
+    return knowns
+
+def remove_bit(i, n):
+    return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
+
+def split_at(knowns, N, i):
+    mask = 1 << i
+    left = [(remove_bit(j, i), value) for (j, value) in knowns if (j & mask) == 0]
+    right = [(remove_bit(j, i), value) for (j, value) in knowns if not (j & mask) == 0]
+    return (left, right)
+
+def factor_at(knowns, N, i, identity_value=1):
+    mask = 1 << i
+    left = [(j, value) for (j, value) in knowns if value == identity_value or (j & mask) == 0]
+    right = [(j, value) for (j, value) in knowns if value == identity_value or not (j & mask) == 0]
+    return (left, right)
+
+def compute_coherence(pair, N, depth = 0):
+    (left, right) = pair
+    (left_depth, left_coherence) = compute_split_knowns_r(left, N, depth)
+    (right_depth, right_coherence) = compute_split_knowns_r(right, N, depth)
+    ratio = min(len(left), len(right)) / max(len(left), len(right))
+    # evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0
+    evenness = left_coherence - right_coherence
+    # return 0.75 * min(left_coherence, right_coherence) + 0.25 * evenness ** 2
+    # return 0.8 * min(left_coherence, right_coherence) + 0.2 * evenness ** 2
+    coherence = left_coherence if left_depth > right_depth else right_coherence if right_depth > left_depth else (left_coherence + right_coherence) / 2.0
+    depth = max(left_depth, right_depth)
+    return (depth, 0.9 * coherence + 0.1 * (1.0 - (evenness ** 2)))
+
+def compute_split_knowns_r(knowns, N, depth = 0):
+    if len(knowns) == 0:
+        return (depth, 1.0)
+
+    hist = np.zeros(N)
+    for i in range(0, N):
+        mask = 1 << i
+        for (j, value) in knowns:
+            if j & mask == 0:
+                hist[i] += 1
+
+    constant_bits = [i for i in range(0, N) if hist[i] == 0 or hist[i] == len(knowns)]
+    if len(constant_bits) > 0:
+        constant_bits.reverse()
+        for n in constant_bits:
+            knowns = [(remove_bit(j, n), value) for (j, value) in knowns]
+        return compute_split_knowns_r(knowns, N - len(constant_bits), depth)
+
+    if len(knowns) == 1:
+        return (depth, 1.0)
+    if len(knowns) == 2:
+        if knowns[0][1] == knowns[1][1]:
+            return (depth, 1.0)
+        else:
+            return (depth, 0.0)
+
+    sum = 0
+    denominator = 0
+    for i in range(0, N):
+        (left, right) = split_at(knowns, N, i)
+        (depth, partial) = compute_coherence((left, right), N, depth + 1)
+        sum += depth * partial
+        denominator += depth
+    return (depth, sum / denominator)
+
+def invert(knowns):
+    inverted_knowns = []
+    for (i, value) in knowns:
+        inverted_knowns.append((i, 1 - value))
+    return inverted_knowns
+
+def reduce(knowns, N):
+    flips = []
+    (depth, best_coherence) = compute_split_knowns_r(knowns, N)
+    print(best_coherence)
+    print(knowns)
+    print()
+    while best_coherence < 1.0:
+        best_index = -1
+        best_reverse = False
+        # best_coherence = 0
+        for i in range(0, N):
+            for reverse in [False, True]:
+                mutated_knowns = xor_by_index(knowns, i, reverse)
+                # coherence = compute_coherence(mutated_knowns, N)
+                (depth, coherence) = compute_split_knowns_r(mutated_knowns, N)
+                print(i, reverse, coherence)
+                if coherence > best_coherence:
+                    best_coherence = coherence
+                    best_index = i
+                    best_reverse = reverse
+        if best_index < 0:
+            break
+        knowns = xor_by_index(knowns, best_index, best_reverse)
+        flips.append((best_index, best_reverse))
+        print()
+        print(best_index, best_reverse, best_coherence)
+        print(knowns)
+        print()
+    return (knowns, best_coherence, flips)
+
+def solve(knowns, N):
+    (knowns, coherence, flips) = reduce(knowns, N)
+    if coherence == 1.0:
+        inverted = knowns[0][1]
+        return (inverted, flips, None)
+    
+    raise Exception('Stop')
+
+    best_coherence = 0
+    best_index = -1
+    best_identity_value = False
+    print()
+    for i in range(0, N):
+        for identity_value in [0, 1]:
+            coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N)
+            print(i, identity_value, coherence)
+            if coherence > best_coherence:
+                best_coherence = coherence
+                best_index = i
+                best_identity_value = identity_value
+    print()
+    (left, right) = factor_at(knowns, N, best_index, best_identity_value)
+    return (0, flips, (best_identity_value, solve(left, N), solve(right, N)))
+
+def evaluate(model, n, value = 0):
+    (inverted, flips, child) = model
+    for (i, invert) in flips:
+        mask = (1 << i)
+        masked_n = n & mask
+        if (masked_n > 0 and not invert) or (masked_n == 0 and invert):
+            value = 1 - value
+    if not child is None:
+        (identity, left_child, right_child) = child
+        left = evaluate(left_child, n, 1 - identity)
+        right = evaluate(right_child, n, 1 - identity)
+        if left and right:
+            value = 1 - value
+        if identity == 0:
+            value = 1 - value
+    if inverted:
+        value = 1 - value
+    return value
+
+def main():
+    N = 8
+    S = 2 ** N
+    train_size = 16
+    test_size = 100
+    f = xor_n
+
+    knowns = [(i, f(i)) for i in [
+        # 0, 1, 2, 3, 4, 5, 6, 7
+        # 0, 3, 4, 5, 7
+        # 3, 5, 6, 10, 12, 14
+        # 1, 3, 7, 10, 14, 15
+        # 0, 3, 5, 6, 10, 11, 12
+        # 0, 3, 5, 6, 10, 11, 12, 24, 30
+        # 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127
+        # 128, 131, 248, 0, 7, 13, 17, 19
+        23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255
+    ]]
+
+    # f = xor_n
+    # knowns = []
+    # train_samples = set()
+    # for i in range(0, train_size):
+    #     k = random.randint(0, S)
+    #     while k in train_samples:
+    #         k = random.randint(0, S)
+    #     knowns.append((k, f(i)))
+    #     train_samples.add(k)
+
+    model = solve(knowns, N)
+    # print(model)
+    correct = 0
+    for i in range(0, test_size):
+        if f(i) == evaluate(model, i):
+            correct += 1
+    print(str(correct) + "/" + str(test_size))
+
+if __name__ == "__main__":
+    main()
--- a/model_probabilities5.py
+++ b/model_probabilities5.py
@ -0,0 +1,219 @@
+import hashlib
+import math
+import numpy as np
+import random
+
+def count_one_bits(n):
+    return bin(n).count("1")
+
+def xor_n(n):
+    return count_one_bits(n) % 2
+
+def sha_n(n):
+    m = hashlib.sha256()
+    m.update(str(n).encode("utf-8"))
+    result = m.digest()
+    return result[0] & 0b1
+
+def xor_by_index(knowns, index, reverse=False):
+    mask = 1 << index
+    knowns = knowns[:]
+    for i in range(len(knowns)):
+        known = knowns[i]
+        if known[0] & mask or (not (known[0] & mask) and reverse):
+            knowns[i] = (known[0], known[1] ^ 1)
+    return knowns
+
+def remove_bit(i, n):
+    return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
+
+def split_at(knowns, N, i):
+    mask = 1 << i
+    left = [(remove_bit(j, i), value) for (j, value) in knowns if (j & mask) == 0]
+    right = [(remove_bit(j, i), value) for (j, value) in knowns if not (j & mask) == 0]
+    return (left, right)
+
+def factor_at(knowns, N, i, identity_value=1):
+    mask = 1 << i
+    left = [(j, value) for (j, value) in knowns if value == identity_value or (j & mask) == 0]
+    right = [(j, value) for (j, value) in knowns if value == identity_value or not (j & mask) == 0]
+    return (left, right)
+
+def compute_coherence(pair, N):
+    (left, right) = pair
+    left_coherence = compute_split_knowns_r(left, N)
+    right_coherence = compute_split_knowns_r(right, N)
+    ratio = min(len(left), len(right)) / max(len(left), len(right))
+    # evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0
+    # evenness = left_coherence - right_coherence
+    evenness = (1.0 - ((1.0 - left_coherence) - (1.0 - right_coherence)) ** 2)
+    # return 0.75 * min(left_coherence, right_coherence) + 0.25 * evenness ** 2
+    # return 0.8 * min(left_coherence, right_coherence) + 0.2 * evenness ** 2
+    # coherence = left_coherence if left_depth > right_depth else right_coherence if right_depth > left_depth else (left_coherence + right_coherence) / 2.0
+    # depth = max(left_depth, right_depth)
+    # return (depth, 0.9 * coherence + 0.1 * (1.0 - (evenness ** 2)))
+    # return 0.8 * min(left_coherence, right_coherence) + 0.2 * (1.0 - (evenness ** 2))
+    # return 0.75 * min(left_coherence, right_coherence) + 0.25 * (evenness ** 2)
+    # return ((left_coherence * len(left) + right_coherence * len(right)) / (len(left) +len(right))) * min(left_coherence, right_coherence) * evenness
+    # return min(left_coherence, right_coherence) * (evenness ** 2)
+    coherence = ((len(left) / (len(left) + len(right))) * left_coherence + (len(right) / (len(left) + len(right))) * right_coherence)
+    return min(left_coherence, right_coherence) * (evenness ** 2)
+
+def compute_split_knowns_r(knowns, N):
+    # if len(knowns) == 0:
+    #     return 1.0
+
+    # hist = np.zeros(N)
+    # for i in range(0, N):
+    #     mask = 1 << i
+    #     for (j, value) in knowns:
+    #         if j & mask == 0:
+    #             hist[i] += 1
+
+    # constant_bits = [i for i in range(0, N) if hist[i] == 0 or hist[i] == len(knowns)]
+    # if len(constant_bits) > 0:
+    #     constant_bits.reverse()
+    #     for n in constant_bits:
+    #         knowns = [(remove_bit(j, n), value) for (j, value) in knowns]
+    #     return compute_split_knowns_r(knowns, N - len(constant_bits), depth)
+
+    if len(knowns) == 1:
+        return 1.0
+    if len(knowns) == 2:
+        if knowns[0][1] == knowns[1][1]:
+            return 1.0
+        else:
+            return 0.0
+
+    sum = 0
+    denominator = 0
+    for i in range(0, N):
+        (left, right) = split_at(knowns, N, i)
+        weight = min(len(left), len(right)) / max(len(left), len(right))
+        # weight = 1.0 - (abs(len(left) - len(right)) / (len(left) + len(right)))
+        if weight == 0:
+            continue
+        partial = compute_coherence((left, right), N - 1)
+        sum += weight * partial
+        denominator += weight
+    return sum / denominator
+
+def invert(knowns):
+    inverted_knowns = []
+    for (i, value) in knowns:
+        inverted_knowns.append((i, 1 - value))
+    return inverted_knowns
+
+def reduce(knowns, N):
+    flips = []
+    best_coherence = compute_split_knowns_r(knowns, N)
+    print(best_coherence)
+    print(knowns)
+    print()
+    while best_coherence < 1.0:
+        best_index = -1
+        best_reverse = False
+        # best_coherence = 0
+        for i in range(0, N):
+            for reverse in [False, True]:
+                mutated_knowns = xor_by_index(knowns, i, reverse)
+                # coherence = compute_coherence(mutated_knowns, N)
+                coherence = compute_split_knowns_r(mutated_knowns, N)
+                print(i, reverse, coherence)
+                if coherence > best_coherence:
+                    best_coherence = coherence
+                    best_index = i
+                    best_reverse = reverse
+        if best_index < 0:
+            break
+        knowns = xor_by_index(knowns, best_index, best_reverse)
+        flips.append((best_index, best_reverse))
+        print()
+        print(best_index, best_reverse, best_coherence)
+        print(knowns)
+        print()
+    return (knowns, best_coherence, flips)
+
+def solve(knowns, N):
+    (knowns, coherence, flips) = reduce(knowns, N)
+    if coherence == 1.0:
+        inverted = knowns[0][1]
+        return (inverted, flips, None)
+    
+    raise Exception('Stop')
+
+    best_coherence = 0
+    best_index = -1
+    best_identity_value = False
+    print()
+    for i in range(0, N):
+        for identity_value in [0, 1]:
+            coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N)
+            print(i, identity_value, coherence)
+            if coherence > best_coherence:
+                best_coherence = coherence
+                best_index = i
+                best_identity_value = identity_value
+    print()
+    (left, right) = factor_at(knowns, N, best_index, best_identity_value)
+    return (0, flips, (best_identity_value, solve(left, N), solve(right, N)))
+
+def evaluate(model, n, value = 0):
+    (inverted, flips, child) = model
+    for (i, invert) in flips:
+        mask = (1 << i)
+        masked_n = n & mask
+        if (masked_n > 0 and not invert) or (masked_n == 0 and invert):
+            value = 1 - value
+    if not child is None:
+        (identity, left_child, right_child) = child
+        left = evaluate(left_child, n, 1 - identity)
+        right = evaluate(right_child, n, 1 - identity)
+        if left and right:
+            value = 1 - value
+        if identity == 0:
+            value = 1 - value
+    if inverted:
+        value = 1 - value
+    return value
+
+def main():
+    N = 8
+    S = 2 ** N
+    train_size = 128
+    test_size = 100
+    f = xor_n
+
+    knowns = [(i, f(i)) for i in [
+        # 0, 1, 2, 3, 4, 5, 6, 7
+        # 0, 3, 4, 5, 7
+        # 3, 5, 6, 10, 12, 14
+        # 1, 3, 7, 10, 14, 15
+        # 0, 3, 5, 6, 10, 11, 12
+        # 0, 3, 5, 6, 10, 11, 12, 24, 30
+        # 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127
+        128, 131, 248, 0, 7, 13, 17, 19
+        # 23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255
+    ]]
+
+    # knowns = []
+    # train_samples = set()
+    # for i in range(0, train_size):
+    #     k = random.randint(0, S)
+    #     while k in train_samples:
+    #         k = random.randint(0, S)
+    #     knowns.append((k, f(i)))
+    #     train_samples.add(k)
+
+    model = solve(knowns, N)
+    print(model)
+    # print(model)
+    correct = 0
+    for i in range(0, test_size):
+        k = random.randint(0, S - 1)
+        if f(k) == evaluate(model, k):
+            correct += 1
+    print(str(correct) + "/" + str(test_size))
+
+if __name__ == "__main__":
+    main()
--- a/model_probabilities6.py
+++ b/model_probabilities6.py
@ -0,0 +1,201 @@
+import hashlib
+import math
+import numpy as np
+import random
+
+def count_one_bits(n):
+    return bin(n).count("1")
+
+def xor_n(n):
+    return count_one_bits(n) % 2
+
+def sha_n(n):
+    m = hashlib.sha256()
+    m.update(str(n).encode("utf-8"))
+    result = m.digest()
+    return result[0] & 0b1
+
+def xor_by_index(knowns, index, reverse=False):
+    mask = 1 << index
+    knowns = knowns[:]
+    for i in range(len(knowns)):
+        (g, j, value) = knowns[i]
+        if j & mask or (not (j & mask) and reverse):
+            knowns[i] = (g, j, value ^ 1)
+    return knowns
+
+def remove_bit(i, n):
+    return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
+
+def split_at(knowns, N, i):
+    mask = 1 << i
+    left = [(g, remove_bit(j, i), value) for (g, j, value) in knowns if (j & mask) == 0]
+    right = [(g, remove_bit(j, i), value) for (g, j, value) in knowns if not (j & mask) == 0]
+    return (left, right)
+
+def factor_at(knowns, N, i, identity_value=1):
+    mask = 1 << i
+    left = [(g, j, value) for (g, j, value) in knowns if value == identity_value or (j & mask) == 0]
+    right = [(g, j, value) for (g, j, value) in knowns if value == identity_value or not (j & mask) == 0]
+    return (left, right)
+
+def key_for_knowns(knowns):
+    return tuple([g for (g, _, _) in knowns])
+
+primes = [1, 2, 3, 5, 7, 11, 13, 17, 19, 23]
+
+def compute_split_knowns_r(knowns, N):
+    stack = [(knowns, N)]
+    numerator = 0.0
+    denominator = 0.0
+
+    while len(stack) > 0:
+        (s, n) = stack.pop()
+        depth = (N - n)
+        weight = depth ** 64
+
+        if len(s) == 1:
+            # numerator += weight
+            # denominator += weight
+            numerator += weight
+            denominator += weight
+            continue
+        if len(s) == 2:
+            (_, a, left_value) = s[0]
+            (_, b, right_value) = s[1]
+            distance = count_one_bits(a ^ b)
+            weight /= (2 ** distance)
+            if left_value == right_value:
+                numerator += weight
+                denominator += weight
+            else:
+                denominator += weight
+            continue
+
+        for i in range(0, n):
+            (left, right) = split_at(s, n, i)
+            if len(left) == 0 or len(right) == 0:
+                continue
+            stack.append((left, n - 1))
+            stack.append((right, n - 1))
+    
+    return numerator / denominator
+
+def invert(knowns):
+    inverted_knowns = []
+    for (i, value) in knowns:
+        inverted_knowns.append((i, 1 - value))
+    return inverted_knowns
+
+def reduce(knowns, N):
+    flips = []
+    best_coherence = compute_split_knowns_r(knowns, N)
+    print(best_coherence)
+    print(knowns)
+    print()
+    while best_coherence < 1.0:
+        best_index = -1
+        best_reverse = False
+        # best_coherence = 0
+        for i in range(0, N):
+            for reverse in [False, True]:
+                mutated_knowns = xor_by_index(knowns, i, reverse)
+                # coherence = compute_coherence(mutated_knowns, N)
+                coherence = compute_split_knowns_r(mutated_knowns, N)
+                print(i, reverse, coherence)
+                if coherence > best_coherence:
+                    best_coherence = coherence
+                    best_index = i
+                    best_reverse = reverse
+        if best_index < 0:
+            break
+        knowns = xor_by_index(knowns, best_index, best_reverse)
+        flips.append((best_index, best_reverse))
+        print()
+        print(best_index, best_reverse, best_coherence)
+        print(knowns)
+        print()
+    return (knowns, best_coherence, flips)
+
+def solve(knowns, N):
+    (knowns, coherence, flips) = reduce(knowns, N)
+    if coherence == 1.0:
+        (_, _, inverted) = knowns[0]
+        return (inverted, flips, None)
+    
+    raise Exception('Stop')
+
+    best_coherence = 0
+    best_index = -1
+    best_identity_value = False
+    print()
+    for i in range(0, N):
+        for identity_value in [0, 1]:
+            coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N)
+            print(i, identity_value, coherence)
+            if coherence > best_coherence:
+                best_coherence = coherence
+                best_index = i
+                best_identity_value = identity_value
+    print()
+    (left, right) = factor_at(knowns, N, best_index, best_identity_value)
+    return (0, flips, (best_identity_value, solve(left, N), solve(right, N)))
+
+def evaluate(model, n, value = 0):
+    (inverted, flips, child) = model
+    for (i, invert) in flips:
+        mask = (1 << i)
+        masked_n = n & mask
+        if (masked_n > 0 and not invert) or (masked_n == 0 and invert):
+            value = 1 - value
+    if not child is None:
+        (identity, left_child, right_child) = child
+        left = evaluate(left_child, n, 1 - identity)
+        right = evaluate(right_child, n, 1 - identity)
+        if left and right:
+            value = 1 - value
+        if identity == 0:
+            value = 1 - value
+    if inverted:
+        value = 1 - value
+    return value
+
+def main():
+    N = 8
+    S = 2 ** N
+    train_size = 128
+    test_size = 100
+    f = xor_n
+
+    knowns = [(i, i, f(i)) for i in [
+        # 0, 1, 2, 3, 4, 5, 6, 7
+        # 0, 3, 4, 5, 7
+        # 3, 5, 6, 10, 12, 14
+        # 1, 3, 7, 10, 14, 15
+        # 0, 3, 5, 6, 10, 11, 12
+        0, 3, 5, 6, 10, 11, 12, 24, 30
+        # 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127
+        # 128, 131, 248, 0, 7, 13, 17, 19
+        # 23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255
+    ]]
+
+    # knowns = []
+    # train_samples = set()
+    # for i in range(0, train_size):
+    #     k = random.randint(0, S)
+    #     while k in train_samples:
+    #         k = random.randint(0, S)
+    #     knowns.append((k, f(i)))
+    #     train_samples.add(k)
+
+    model = solve(knowns, N)
+    # print(model)
+    correct = 0
+    for i in range(0, test_size):
+        k = random.randint(0, S - 1)
+        if f(k) == evaluate(model, k):
+            correct += 1
+    print(str(correct) + "/" + str(test_size))
+
+if __name__ == "__main__":
+    main()
--- a/model_probabilities7.py
+++ b/model_probabilities7.py
@ -0,0 +1,249 @@
+import hashlib
+import math
+import numpy as np
+import random
+
+def count_one_bits(n):
+    return bin(n).count("1")
+
+def xor_n(n):
+    return count_one_bits(n) % 2
+
+def sha_n(n):
+    m = hashlib.sha256()
+    m.update(str(n).encode("utf-8"))
+    result = m.digest()
+    return result[0] & 0b1
+
+def xor_by_index(knowns, index, reverse=False):
+    mask = 1 << index
+    knowns = knowns[:]
+    for i in range(len(knowns)):
+        known = knowns[i]
+        if known[0] & mask or (not (known[0] & mask) and reverse):
+            knowns[i] = (known[0], known[1] ^ 1)
+    return knowns
+
+def remove_bit(i, n):
+    return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
+
+def split_at(knowns, N, i):
+    mask = 1 << i
+    left = [(remove_bit(j, i), value) for (j, value) in knowns if (j & mask) == 0]
+    right = [(remove_bit(j, i), value) for (j, value) in knowns if not (j & mask) == 0]
+    return (left, right)
+
+def factor_at(knowns, N, i, identity_value=1):
+    mask = 1 << i
+    left = [(j, value) for (j, value) in knowns if value == identity_value or (j & mask) == 0]
+    right = [(j, value) for (j, value) in knowns if value == identity_value or not (j & mask) == 0]
+    return (left, right)
+
+def span(s, N):
+    lower_bound = (1 << N) - 1
+    upper_bound = 0
+    for (x, _) in s:
+        upper_bound |= x
+        lower_bound &= x 
+    return 2 ** count_one_bits(lower_bound ^ upper_bound)
+
+def compute_coherence(pair, N):
+    (left, right) = pair
+    left_coherence = compute_split_knowns_r(left, N)
+    right_coherence = compute_split_knowns_r(right, N)
+
+    ratio = min(len(left), len(right)) / max(len(left), len(right))
+    # evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0
+    # evenness = left_coherence - right_coherence
+    evenness = (1.0 - ((1.0 - left_coherence) - (1.0 - right_coherence)) ** 2)
+    # return 0.75 * min(left_coherence, right_coherence) + 0.25 * evenness ** 2
+    # return 0.8 * min(left_coherence, right_coherence) + 0.2 * evenness ** 2
+    # coherence = left_coherence if left_depth > right_depth else right_coherence if right_depth > left_depth else (left_coherence + right_coherence) / 2.0
+    # depth = max(left_depth, right_depth)
+    # return (depth, 0.9 * coherence + 0.1 * (1.0 - (evenness ** 2)))
+    # return 0.8 * min(left_coherence, right_coherence) + 0.2 * (1.0 - (evenness ** 2))
+    # return 0.75 * min(left_coherence, right_coherence) + 0.25 * (evenness ** 2)
+    # return ((left_coherence * len(left) + right_coherence * len(right)) / (len(left) +len(right))) * min(left_coherence, right_coherence) * evenness
+    # return min(left_coherence, right_coherence) * (evenness ** 2)
+    # coherence = ((len(left) / (len(left) + len(right))) * left_coherence + (len(right) / (len(left) + len(right))) * right_coherence)
+    # return min(left_coherence, right_coherence) * (evenness ** 2)
+    span_left = span(left, N)
+    span_right = span(right, N)
+    weighted_left_coherence = span_left * left_coherence / (span_left + span_right)
+    weighted_right_coherence = span_right * right_coherence / (span_left + span_right)
+    return (weighted_left_coherence + weighted_right_coherence) * (evenness ** 2)
+
+def compute_split_knowns_r(knowns, N):
+    # if len(knowns) == 0:
+    #     return 1.0
+
+    # hist = np.zeros(N)
+    # for i in range(0, N):
+    #     mask = 1 << i
+    #     for (j, value) in knowns:
+    #         if j & mask == 0:
+    #             hist[i] += 1
+
+    # constant_bits = [i for i in range(0, N) if hist[i] == 0 or hist[i] == len(knowns)]
+    # if len(constant_bits) > 0:
+    #     constant_bits.reverse()
+    #     for n in constant_bits:
+    #         knowns = [(remove_bit(j, n), value) for (j, value) in knowns]
+    #     return compute_split_knowns_r(knowns, N - len(constant_bits), depth)
+
+    if len(knowns) == 1:
+        return 1.0
+    if len(knowns) == 2:
+        if knowns[0][1] == knowns[1][1]:
+            return 1.0
+        else:
+            return 0.0
+
+    sum = 0
+    denominator = 0
+    for i in range(0, N):
+        (left, right) = split_at(knowns, N, i)
+        if len(left) == 0 or len(right) == 0:
+            continue
+        weight = min(span(left, N), span(right, N))
+        # weight = max(span(left, N), span(right, N)) / min(span(left, N), span(right, N))
+        # weight = 1.0 - (abs(len(left) - len(right)) / (len(left) + len(right)))
+        if weight == 0:
+            continue
+        partial = compute_coherence((left, right), N - 1)
+        sum += weight * partial
+        denominator += weight
+    return sum / denominator
+
+def invert(knowns):
+    inverted_knowns = []
+    for (i, value) in knowns:
+        inverted_knowns.append((i, 1 - value))
+    return inverted_knowns
+
+def reduce(knowns, N):
+    flips = []
+    best_coherence = compute_split_knowns_r(knowns, N)
+    print(best_coherence)
+    print(knowns)
+    print()
+    while best_coherence < 1.0:
+        best_index = -1
+        best_reverse = False
+        # best_coherence = 0
+        for i in range(0, N):
+            for reverse in [False, True]:
+                mutated_knowns = xor_by_index(knowns, i, reverse)
+                # coherence = compute_coherence(mutated_knowns, N)
+                coherence = compute_split_knowns_r(mutated_knowns, N)
+                print(i, reverse, coherence)
+                if coherence > best_coherence:
+                    best_coherence = coherence
+                    best_index = i
+                    best_reverse = reverse
+        if best_index < 0:
+            break
+        knowns = xor_by_index(knowns, best_index, best_reverse)
+        flips.append((best_index, best_reverse))
+        print()
+        print(best_index, best_reverse, best_coherence)
+        print(knowns)
+        print()
+    return (knowns, best_coherence, flips)
+
+def solve(knowns, N):
+    (knowns, coherence, flips) = reduce(knowns, N)
+    if coherence == 1.0:
+        inverted = knowns[0][1]
+        return (inverted, flips, None)
+    
+    raise Exception('Stop')
+
+    best_coherence = 0
+    best_index = -1
+    best_identity_value = False
+    print()
+    for i in range(0, N):
+        for identity_value in [0, 1]:
+            coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N)
+            print(i, identity_value, coherence)
+            if coherence > best_coherence:
+                best_coherence = coherence
+                best_index = i
+                best_identity_value = identity_value
+    print()
+    (left, right) = factor_at(knowns, N, best_index, best_identity_value)
+    return (0, flips, (best_identity_value, solve(left, N), solve(right, N)))
+
+def evaluate(model, n, value = 0):
+    (inverted, flips, child) = model
+    for (i, invert) in flips:
+        mask = (1 << i)
+        masked_n = n & mask
+        if (masked_n > 0 and not invert) or (masked_n == 0 and invert):
+            value = 1 - value
+    if not child is None:
+        (identity, left_child, right_child) = child
+        left = evaluate(left_child, n, 1 - identity)
+        right = evaluate(right_child, n, 1 - identity)
+        if left and right:
+            value = 1 - value
+        if identity == 0:
+            value = 1 - value
+    if inverted:
+        value = 1 - value
+    return value
+
+def run_for_input(input):
+    N = 8
+    S = 2 ** N
+    train_size = 128
+    test_size = 100
+    f = xor_n
+
+    knowns = [(i, f(i)) for i in input]
+
+    # knowns = []
+    # train_samples = set()
+    # for i in range(0, train_size):
+    #     k = random.randint(0, S)
+    #     while k in train_samples:
+    #         k = random.randint(0, S)
+    #     knowns.append((k, f(i)))
+    #     train_samples.add(k)
+
+    model = solve(knowns, N)
+    print(model)
+    # print(model)
+    correct = 0
+    for i in range(0, test_size):
+        k = random.randint(0, S - 1)
+        if f(k) == evaluate(model, k):
+            correct += 1
+    print(str(correct) + "/" + str(test_size))
+
+def run():
+    inputs = [
+        # [0, 1, 2, 3, 4, 5, 6, 7],
+        # [0, 3, 4, 5, 7],
+        # [3, 5, 6, 10, 12, 14],
+        # [1, 3, 7, 10, 14, 15],
+        # [0, 3, 5, 6, 10, 11, 12],
+        # [0, 3, 5, 6, 10, 11, 12, 24, 30],
+        [0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127],
+        # [128, 131, 248, 0, 7, 13, 17, 19],
+        # [23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255]
+    ]
+    results = []
+    for i, input in enumerate(inputs):
+        success = False
+        try:
+            run_for_input(input)
+            success = True
+        except:
+            pass
+        results.append(success)
+    print(results)
+
+if __name__ == "__main__":
+    run()
--- a/model_probabilities8.py
+++ b/model_probabilities8.py
@ -0,0 +1,219 @@
+import hashlib
+import math
+import numpy as np
+import random
+
+def count_one_bits(n):
+    return bin(n).count("1")
+
+def xor_n(n):
+    return count_one_bits(n) % 2
+
+def sha_n(n):
+    m = hashlib.sha256()
+    m.update(str(n).encode("utf-8"))
+    result = m.digest()
+    return result[0] & 0b1
+
+def xor_by_index(knowns, index, reverse=False):
+    mask = 1 << index
+    knowns = knowns[:]
+    for i in range(len(knowns)):
+        known = knowns[i]
+        if known[0] & mask or (not (known[0] & mask) and reverse):
+            knowns[i] = (known[0], known[1] ^ 1)
+    return knowns
+
+def remove_bit(i, n):
+    return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
+
+def split_at(knowns, N, i):
+    mask = 1 << i
+    left = [(remove_bit(j, i), value) for (j, value) in knowns if (j & mask) == 0]
+    right = [(remove_bit(j, i), value) for (j, value) in knowns if not (j & mask) == 0]
+    return (left, right)
+
+def factor_at(knowns, N, i, identity_value=1):
+    mask = 1 << i
+    left = [(j, value) for (j, value) in knowns if value == identity_value or (j & mask) == 0]
+    right = [(j, value) for (j, value) in knowns if value == identity_value or not (j & mask) == 0]
+    return (left, right)
+
+def compute_coherence(pair, N):
+    (left, right) = pair
+    left_coherence = compute_split_knowns_r(left, N)
+    right_coherence = compute_split_knowns_r(right, N)
+    ratio = min(len(left), len(right)) / max(len(left), len(right))
+    # evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0
+    # evenness = left_coherence - right_coherence
+    evenness = (1.0 - ((1.0 - left_coherence) - (1.0 - right_coherence)) ** 2)
+    # return 0.75 * min(left_coherence, right_coherence) + 0.25 * evenness ** 2
+    # return 0.8 * min(left_coherence, right_coherence) + 0.2 * evenness ** 2
+    # coherence = left_coherence if left_depth > right_depth else right_coherence if right_depth > left_depth else (left_coherence + right_coherence) / 2.0
+    # depth = max(left_depth, right_depth)
+    # return (depth, 0.9 * coherence + 0.1 * (1.0 - (evenness ** 2)))
+    # return 0.8 * min(left_coherence, right_coherence) + 0.2 * (1.0 - (evenness ** 2))
+    # return 0.75 * min(left_coherence, right_coherence) + 0.25 * (evenness ** 2)
+    # return ((left_coherence * len(left) + right_coherence * len(right)) / (len(left) +len(right))) * min(left_coherence, right_coherence) * evenness
+    # return min(left_coherence, right_coherence) * (evenness ** 2)
+    coherence = ((len(left) / (len(left) + len(right))) * left_coherence + (len(right) / (len(left) + len(right))) * right_coherence)
+    return min(left_coherence, right_coherence) * (evenness ** 2)
+
+def compute_split_knowns_r(knowns, N):
+    # if len(knowns) == 0:
+    #     return 1.0
+
+    # hist = np.zeros(N)
+    # for i in range(0, N):
+    #     mask = 1 << i
+    #     for (j, value) in knowns:
+    #         if j & mask == 0:
+    #             hist[i] += 1
+
+    # constant_bits = [i for i in range(0, N) if hist[i] == 0 or hist[i] == len(knowns)]
+    # if len(constant_bits) > 0:
+    #     constant_bits.reverse()
+    #     for n in constant_bits:
+    #         knowns = [(remove_bit(j, n), value) for (j, value) in knowns]
+    #     return compute_split_knowns_r(knowns, N - len(constant_bits), depth)
+
+    if len(knowns) == 1:
+        return 1.0
+    if len(knowns) == 2:
+        if knowns[0][1] == knowns[1][1]:
+            return 1.0
+        else:
+            return 0.0
+
+    sum = 0
+    denominator = 0
+    for i in range(0, N):
+        (left, right) = split_at(knowns, N, i)
+        weight = min(len(left), len(right)) / max(len(left), len(right))
+        # weight = 1.0 - (abs(len(left) - len(right)) / (len(left) + len(right)))
+        if weight == 0:
+            continue
+        partial = compute_coherence((left, right), N - 1)
+        sum += weight * partial
+        denominator += weight
+    return sum / denominator
+
+def invert(knowns):
+    inverted_knowns = []
+    for (i, value) in knowns:
+        inverted_knowns.append((i, 1 - value))
+    return inverted_knowns
+
+def reduce(knowns, N):
+    flips = []
+    best_coherence = compute_split_knowns_r(knowns, N)
+    print(best_coherence)
+    print(knowns)
+    print()
+    while best_coherence < 1.0:
+        best_index = -1
+        best_reverse = False
+        # best_coherence = 0
+        for i in range(0, N):
+            for reverse in [False, True]:
+                mutated_knowns = xor_by_index(knowns, i, reverse)
+                # coherence = compute_coherence(mutated_knowns, N)
+                coherence = compute_split_knowns_r(mutated_knowns, N)
+                print(i, reverse, coherence)
+                if coherence > best_coherence:
+                    best_coherence = coherence
+                    best_index = i
+                    best_reverse = reverse
+        if best_index < 0:
+            break
+        knowns = xor_by_index(knowns, best_index, best_reverse)
+        flips.append((best_index, best_reverse))
+        print()
+        print(best_index, best_reverse, best_coherence)
+        print(knowns)
+        print()
+    return (knowns, best_coherence, flips)
+
+def solve(knowns, N):
+    (knowns, coherence, flips) = reduce(knowns, N)
+    if coherence == 1.0:
+        inverted = knowns[0][1]
+        return (inverted, flips, None)
+    
+    raise Exception('Stop')
+
+    best_coherence = 0
+    best_index = -1
+    best_identity_value = False
+    print()
+    for i in range(0, N):
+        for identity_value in [0, 1]:
+            coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N)
+            print(i, identity_value, coherence)
+            if coherence > best_coherence:
+                best_coherence = coherence
+                best_index = i
+                best_identity_value = identity_value
+    print()
+    (left, right) = factor_at(knowns, N, best_index, best_identity_value)
+    return (0, flips, (best_identity_value, solve(left, N), solve(right, N)))
+
+def evaluate(model, n, value = 0):
+    (inverted, flips, child) = model
+    for (i, invert) in flips:
+        mask = (1 << i)
+        masked_n = n & mask
+        if (masked_n > 0 and not invert) or (masked_n == 0 and invert):
+            value = 1 - value
+    if not child is None:
+        (identity, left_child, right_child) = child
+        left = evaluate(left_child, n, 1 - identity)
+        right = evaluate(right_child, n, 1 - identity)
+        if left and right:
+            value = 1 - value
+        if identity == 0:
+            value = 1 - value
+    if inverted:
+        value = 1 - value
+    return value
+
+def main():
+    N = 8
+    S = 2 ** N
+    train_size = 128
+    test_size = 100
+    f = xor_n
+
+    knowns = [(i, f(i)) for i in [
+        # 0, 1, 2, 3, 4, 5, 6, 7
+        # 0, 3, 4, 5, 7
+        # 3, 5, 6, 10, 12, 14
+        # 1, 3, 7, 10, 14, 15
+        # 0, 3, 5, 6, 10, 11, 12
+        # 0, 3, 5, 6, 10, 11, 12, 24, 30
+        # 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127
+        # 128, 131, 248, 0, 7, 13, 17, 19
+        23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255
+    ]]
+
+    # knowns = []
+    # train_samples = set()
+    # for i in range(0, train_size):
+    #     k = random.randint(0, S)
+    #     while k in train_samples:
+    #         k = random.randint(0, S)
+    #     knowns.append((k, f(i)))
+    #     train_samples.add(k)
+
+    model = solve(knowns, N)
+    print(model)
+    # print(model)
+    correct = 0
+    for i in range(0, test_size):
+        k = random.randint(0, S - 1)
+        if f(k) == evaluate(model, k):
+            correct += 1
+    print(str(correct) + "/" + str(test_size))
+
+if __name__ == "__main__":
+    main()
--- a/model_probabilities9.py
+++ b/model_probabilities9.py
@ -0,0 +1,310 @@
+import hashlib
+import math
+import numpy as np
+import random
+import secrets
+from struct import pack, pack_into, unpack_from
+
+def bit_at_index(buffer, index):
+    offset = (index >> 3) % len(buffer)
+    return buffer[offset] & (1 << (index & 0b111)) != 0
+
+def count_one_bits(n):
+    return bin(n).count("1")
+
+def hamming_distance(a, b):
+    distance = 0
+    for i in range(0, len(a)):
+        distance += count_one_bits(a[i] ^ b[i])
+    return distance
+
+def xor_n(n):
+    return count_one_bits(n) % 2
+
+def sha(x):
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def apply_flips(samples, inputs, flips):
+    samples = samples[:]
+    for i in range(len(samples)):
+        (key, old_value) = samples[i]
+        new_value = old_value
+        for index in flips:
+            if bit_at_index(inputs[key], index):
+                new_value = new_value ^ 1
+        if not new_value == old_value:
+            samples[i] = (key, new_value)
+    return samples
+
+def coherence_for_knowns(knowns, distances, N):
+    if len(knowns) == 1:
+        return 1.0
+    coherences = []
+    for i in range(0, len(knowns)):
+        (a_key, a_value) = knowns[i]
+        numerator = 0
+        denominator = 0
+        for j in range(0, len(knowns)):
+            if i == j:
+                continue
+            (b_key, b_value) = knowns[j]
+            distance = distances[a_key][b_key]
+            weight = 1.0 / (2 ** distance)
+            denominator += weight
+            if a_value == b_value:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def iterate_indices(indices, N):
+    carry_index = -1
+    for i in range(0, len(indices)):
+        j = len(indices) - i - 1
+        if indices[j] + 1 + i < N:
+            carry_index = j
+            break
+    if carry_index < 0:
+        return None
+    base_value = indices[carry_index]
+    for i in range(0, len(indices) - carry_index):
+        new_value = base_value + i + 1
+        if new_value >= N:
+            return None
+        indices[carry_index + i] = new_value
+    return indices
+
+def compute_indices(samples, inputs, N):
+    zero_buckets = [False for i in range(0, N)]
+    one_buckets = [False for i in range(0, N)]
+    for (key, _) in samples:
+        for index in range(0, N):
+            if bit_at_index(inputs[key], index):
+                one_buckets[index] = True
+            else:
+                zero_buckets[index] = True
+    return [index for index in range(0, N) if zero_buckets[index] and one_buckets[index]]
+
+def compute_distances(inputs, distances):
+    for i in range(0, len(inputs)):
+        a = inputs[i]
+        for j in range(i, len(inputs)):
+            b = inputs[j]
+            distance = hamming_distance(a, b) if j != i else 0
+            distances[i][j] = distance
+            distances[j][i] = distance
+
+def reduce(samples, inputs, distances, N):
+    available_indices = compute_indices(samples, inputs, N)
+    flips = []
+    best_coherence = coherence_for_knowns(samples, distances, N)
+    # print(best_coherence)
+    # print(knowns)
+    # print()
+    depth = 1
+    while depth <= len(available_indices) and depth < 2:
+        while best_coherence < 1.0:
+            best_flip = None
+            try_indices = [i for i in range(0, depth)]
+            while not try_indices is None:
+                try_flip = [available_indices[i] for i in try_indices]
+                mutated_samples = apply_flips(samples, inputs, try_flip)
+                coherence = coherence_for_knowns(mutated_samples, distances, N)
+                # print(try_flip, coherence)
+                if coherence > best_coherence:
+                    best_coherence = coherence
+                    best_flip = try_flip
+                try_indices = iterate_indices(try_indices, len(available_indices))
+
+            if best_flip is None:
+                depth += 1
+                # print(depth)
+                break
+            samples = apply_flips(samples, inputs, best_flip)
+            flips += best_flip
+            available_indices = [index for index in available_indices if index not in best_flip]
+            depth = 1
+            # print()
+            # print(best_flip, best_coherence)
+            # print(knowns)
+            # print()
+            # print(depth)
+            if len(available_indices) == 0:
+                break
+        if best_coherence == 1.0:
+            break
+    return (samples, best_coherence, flips)
+
+def dominant_value(knowns, M=2):
+    buckets = [0 for i in range(0, M)]
+    for (_, value) in knowns:
+        buckets[value] += 1
+    return buckets.index(max(buckets))
+
+def solve(samples, inputs, distances, N):
+    (samples, coherence, flips) = reduce(samples, inputs, distances, N)
+    if coherence == 1.0:
+        inverted = samples[0][1]
+        return (inverted, flips, None)
+    
+    identity = dominant_value(samples)
+    left = [(key, 1) for (key, value) in samples if value != identity]
+    right = [(key, 1) for (key, value) in samples if value != identity]
+    for (key, value) in samples:
+        if value == identity:
+            if random.random() > 0.5:
+                left.append((key, 0))
+            else:
+                right.append((key, 0))
+
+    return (0, flips, (identity, solve(left, inputs, distances, N), solve(right, inputs, distances, N)))
+
+def evaluate(model, x, value = 0):
+    (inverted, flips, child) = model
+    for i in flips:
+        if bit_at_index(x, i) != 0:
+            value ^= 1
+    if not child is None:
+        (identity, left_child, right_child) = child
+        left = evaluate(left_child, x)
+        right = evaluate(right_child, x)
+        if left & right != identity:
+            value ^= 1
+    if inverted:
+        value ^= 1
+    return value
+
+def transform(x, layers):
+    x[0] = 0
+    for layer in layers:
+        prefix = 0
+        for i in range(0, len(layer)):
+            model = layer[i]
+            value = evaluate(model, x)
+            prefix <<= 1
+            prefix |= value
+        x[0] = prefix
+
+def encode_f(f, buffer, offset=0):
+    (inverted, flips, residual) = f
+    pack_into('B', buffer, offset, inverted)
+    offset += 1
+    for index in flips:
+        pack_into('B', buffer, offset, 0)
+        offset += 1
+        pack_into('I', buffer, offset, index)
+        offset += 4
+    if residual is None:
+        pack_into('B', buffer, offset, 1)
+        offset += 1
+        return offset
+    (inverted, left, right) = residual
+    pack_into('B', buffer, offset, 2 if not inverted else 3)
+    offset += 1
+    offset = encode_f(left, buffer, offset)
+    offset = encode_f(right, buffer, offset)
+    return offset
+
+def decode_f(buffer, offset = 0):
+    [inverted] = unpack_from('B', buffer, offset)
+    offset += 1
+    inverted &= 0b1
+    flips = []
+    while offset < len(buffer):
+        [opcode] = unpack_from('B', buffer, offset)
+        offset += 1
+        opcode &= 0b11
+        if opcode == 0:
+            [index] = unpack_from('I', buffer, offset)
+            offset += 4
+            flips.append(index)
+        elif opcode == 1:
+            return (offset, (inverted, flips, None))
+        else:
+            (offset, left) = decode_f(buffer, offset)
+            (offset, right) = decode_f(buffer, offset)
+            gate_inverted = 0 if opcode == 2 else 1
+            return (offset, (gate_inverted, flips, (left, right)))
+    return (offset, (inverted, [], None))
+
+def random_input():
+    return bytearray(1) + secrets.token_bytes(3)
+
+def main():
+    N = 32
+    S = 2 ** N
+    train_size = 64
+    test_size = 1000
+    f = sha
+    num_epochs = 4
+    num_layers = 7
+    layers_samples = []
+    layers = []
+    score = 0.5
+    distances = np.zeros((train_size, train_size))
+
+    for epoch in range(0, num_epochs):
+        layer = []
+        layer_samples = []
+        total_correct = 0.0
+        layer_index = 0
+        total_difficulty = 0
+        difficulty = 0
+        while layer_index < num_layers:
+            inputs = []
+            samples = []
+            raw_samples = []
+            for i in range(0, train_size):
+                x = random_input()
+                y = f(x)
+                transform(x, layers)
+                inputs.append(x)
+                samples.append((i, y))
+                raw_samples.append((x, y))
+
+            compute_distances(inputs, distances)
+            model = solve(samples, inputs, distances, N)
+            # print(model)
+            # encoded = bytearray(1024)
+            # offset = encode_f(model, encoded)
+            # decoded_model = decode_f(encoded)
+            # print()
+            # print(decoded_model)
+
+            # correct = 0
+            # for (x, y) in samples:
+            #     if evaluate(model, inputs[x]) == y:
+            #         correct += 1
+            # print(str(correct) + "/" + str(train_size))
+            
+            correct = 0
+            for _ in range(0, test_size):
+                x = random_input()
+                y = f(x)
+                transform(x, layers)
+                if evaluate(model, x) == y:
+                    correct += 1
+            difficulty += 1
+            local_score = correct / test_size
+            if local_score < score - 0.0001 * difficulty:
+                continue
+            # print_score = round(local_score * 10000.0) / 100.0
+            # print('Layer ' + str(layer_index) + ': ' + str(candidates) + ' ' + str(print_score) + '%')
+            layer_index += 1
+            total_correct += correct
+            total_difficulty += difficulty
+            difficulty = 0
+            layer.append(model)
+            layer_samples.append(raw_samples)
+        score = total_correct / (test_size * num_layers)
+        average_difficulty = round(total_difficulty * 100.0 / num_layers) / 100.0
+        print_score = round(score * 10000.0) / 100.0
+        print('Epoch ' + str(epoch) + ': ' + str(average_difficulty) + ' ' + str(print_score) + '%')
+        layers.append(layer)
+        layers_samples.append(layer_samples)
+
+if __name__ == "__main__":
+    main()
--- a/mutations.cl
+++ b/mutations.cl
@ -0,0 +1,96 @@
+__kernel void compute_distances(__global const uchar* x, __global float* distances) {
+    int i = get_global_id(0);
+    int j = get_global_id(1);
+    int index = i * get_global_size(1) + j;
+    if (i == j) {
+        distances[index] = 0;
+        return;
+    }
+    float distance = 0;
+    for (int k = 0; k < {N}; k++) {
+        distance += x[i * {N} + k] ^ x[j * {N} + k];
+    }
+    distances[index] = pow(2, -distance);
+}
+
+__kernel void evaluate(__global const uchar* program, __global const uchar* x, __global uchar* scratch, __global uchar* y) {
+    int program_index = get_global_id(0) * {MAX_PROGRAM_SIZE} * (1 + {N} + 2);
+    int scratch_index = get_global_id(0) * {MAX_PROGRAM_SIZE};
+    int input_index = get_global_id(1) * {N};
+    int output_index = get_global_id(1);
+
+    scratch[scratch_index] = 0;
+
+    for (int i = 0; i < {MAX_PROGRAM_SIZE}; i++) {
+        uchar output = program[program_index++];
+
+        for (int j = 0; j < {N}; j++) {
+            output += program[program_index++] * x[input_index + j];
+        }
+        int left_index = program[program_index++];
+        int right_index = program[program_index++];
+
+        output += scratch[scratch_index + left_index] * scratch[scratch_index + right_index];
+        output %= {M};
+
+        if (program[program_index] == 255) {
+            y[output_index] = output;
+            return;
+        } else {
+            scratch[scratch_index + i + 1] = output;
+        }
+    }
+}
+
+__kernel void compute_coherences(__global const uchar* y, __global const uchar* z, __global const float* distances, __global float* coherences) {
+    int index = get_global_id(0);
+    int sample_size = get_global_size(0);
+    
+    float numerator = 0;
+    float denominator = 0;
+    for (int i = 0; i < sample_size; i++) {
+        int p = z[i] ^ y[index * sample_size + i];
+        for (int j = 0; j < sample_size; j++) {
+            int q = z[j] ^ y[index * sample_size + j];
+            float distance = distances[i * sample_size + j];
+            denominator += distance;
+            if (p == q) {
+                numerator += distance;
+            }
+        }
+    }
+    coherences[index] = numerator / denominator;
+}
+
+__kernel void initialize_sort(__global uint* indices, __global uint* offset) {
+    uint index = get_global_id(0);
+    indices[index] = index;
+    if (index == 0) {
+        *offset = 0;
+    }
+}
+
+__kernel void increment_offset(__global uint* offset) {
+    uint x = *offset;
+    if (x == 0) {
+        *offset = 1;
+    } else {
+        *offset = 0;
+    }
+}
+
+__kernel void sort(__global const float* coherences, __global uint* indices, __global uint* offset) {
+    uint index = get_global_id(0) * 2 + *offset;
+    uint a = indices[index];
+    uint b = indices[index + 1];
+    float coherence_a = coherences[a];
+    float coherence_b = coherences[b];
+    if (coherence_a < coherence_b) {
+        indices[index] = b;
+        indices[index + 1] = a;
+    }
+}
+
+__kernel void evolve(__global const uchar* program, __global float* coherences) {
+    int index_a = get_global_id(0);
+}
--- a/mutations.py
+++ b/mutations.py
@ -0,0 +1,511 @@
+import hashlib
+import math
+import numpy as np
+import random
+from struct import pack, pack_into, unpack_from
+import secrets
+
+from numpy import hamming
+
+N = 8
+
+def bit_at_index(buffer, index):
+    offset = (index >> 3) % len(buffer)
+    return buffer[offset] & (1 << (index & 0b111)) != 0
+
+def count_one_bits(n):
+    return bin(n).count("1")
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def encode_f(f, buffer, offset=0):
+    (inverted, flips, child) = f
+    pack_into('I', buffer, offset, inverted)
+    offset += 4
+    for index in flips:
+        pack_into('I', buffer, offset, 0)
+        offset += 4
+        pack_into('I', buffer, offset, index)
+        offset += 4
+    if child is None:
+        pack_into('I', buffer, offset, 1)
+        offset += 4
+        return offset
+    (inverted, left, right) = child
+    pack_into('I', buffer, offset, 2 if not inverted else 3)
+    offset += 4
+    offset = encode_f(left, buffer, offset)
+    offset = encode_f(right, buffer, offset)
+    return offset
+
+def generate_random_branch(p_mutation):
+    global N
+
+    p_add_indices = p_mutation * random.random()
+    p_add_children = p_mutation * random.random()
+
+    inverted = random.randint(0, 1)
+    indices = set()
+    children = []
+
+    # randomly add indices
+    while random.random() < p_add_indices and len(indices) < N:
+        available_indices = [i for i in range(0, N) if i not in indices]
+        if len(available_indices) == 1:
+            indices.add(available_indices[0])
+            continue
+        indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
+    # randomly add children
+    while random.random() < p_add_children:
+        child_inverted = random.randint(0, 1)
+        left = generate_random_branch(p_add_children)
+        right = generate_random_branch(p_add_children)
+        children.append((child_inverted, left, right))
+    return (inverted, indices, children)
+
+def mutate_f(f, p_mutation):
+    global N
+    (inverted, indices, children) = f    
+    mutated_indices = set(indices)
+    mutated_children = children[:]
+
+    p_invert = p_mutation * random.random()
+    p_drop_indices = p_mutation * random.random()
+    p_add_indices = p_mutation * random.random()
+    p_drop_children = p_mutation * random.random()
+    p_mutate_child = p_mutation * random.random()
+    p_clone_child = p_mutation * random.random()
+    p_invert_child = p_mutation * random.random()
+    p_add_children = p_mutation * random.random()
+
+    # randomly invert
+    if random.random() < p_invert:
+        inverted ^= 1
+    # randomly drop indices
+    while random.random() < p_drop_indices and len(mutated_indices) > 0: 
+        mutated_indices.pop()
+    # randomly add indices
+    while random.random() < p_add_indices and len(mutated_indices) < N:
+        available_indices = [i for i in range(0, N) if i not in mutated_indices]
+        if len(available_indices) == 1:
+            mutated_indices.add(available_indices[0])
+            continue
+        mutated_indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
+    # randomly drop children
+    while random.random() < p_drop_children and len(mutated_children) > 0:
+        if len(mutated_children) == 1:
+            del mutated_children[0]
+            break
+        del mutated_children[random.randint(0, len(mutated_children) - 1)]
+    # randomly clone children
+    while random.random() < p_clone_child and len(mutated_children) > 0:
+        index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
+        (child_inverted, left, right) = mutated_children[index]
+        if random.random() < p_invert_child:
+            child_inverted ^= 1
+        clone = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
+        mutated_children.append(clone)
+    # randomly mutate children
+    while random.random() < p_mutate_child and len(mutated_children) > 0:
+        index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
+        (child_inverted, left, right) = mutated_children[index]
+        if random.random() < p_invert_child:
+            child_inverted ^= 1
+        mutated_children[index] = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
+    # randomly add children
+    while random.random() < p_add_children:
+        child_inverted = random.randint(0, 1)
+        left = generate_random_branch(p_mutation)
+        right = generate_random_branch(p_mutation)
+        mutated_children.append((child_inverted, left, right))
+    return (inverted, mutated_indices, mutated_children)
+
+def decode_f(buffer, mutate = False, offset = 0, skip_invert = False):
+    global N
+    inverted = 0
+    if not skip_invert:
+        [inverted] = unpack_from('I', buffer, offset)
+        offset += 4
+    # random invert
+    if mutate and random.random() < 0.01:
+        inverted ^= 1
+    inverted &= 0b1
+    flips = set()
+    # random add flip
+    while mutate and random.random() < 0.5 and len(flips) < N:
+        available_indices = [i for i in range(0, N) if i not in flips]
+        if len(available_indices) == 1:
+            flips.add(available_indices[0])
+            continue
+        flips.add(available_indices[random.randint(0, len(available_indices) - 1)])
+    while offset < len(buffer):
+        # random create branch
+        if mutate and random.random() < 0.01:
+            gate_inverted = random.randint(0, 1)
+            left = generate_random_branch()
+            (offset, right) = decode_f(buffer, mutate, offset, True)
+            return (offset, (inverted, flips, (gate_inverted, left, right)))
+        [opcode] = unpack_from('I', buffer, offset)
+        offset += 4            
+        opcode &= 0b11
+        if opcode == 0:
+            [index] = unpack_from('I', buffer, offset)
+            offset += 4
+            # random skip flip
+            if mutate and random.random() < 0.01:
+                continue
+            if index in flips:
+                flips.remove(index)
+            else:
+                flips.add(index)
+        elif opcode == 1:
+            return (offset, (inverted, flips, None))
+        else:
+            (offset, left) = decode_f(buffer, mutate, offset)
+            (offset, right) = decode_f(buffer, mutate, offset)
+            gate_inverted = 0 if opcode == 2 else 1
+            # random invert
+            if mutate and random.random() < 0.01:
+                gate_inverted ^= 1
+            # random skip branch
+            if mutate and random.random() < 0.01:
+                return (offset, (inverted, flips, None))
+            return (offset, (inverted, flips, (gate_inverted, left, right)))
+    return (offset, (inverted, [], None))
+
+def generate_program(f):
+    statement = ""
+    (inverted, indices, children) = f
+    if inverted:
+        statement += "1^"
+    statement += "("
+    for i in indices:
+        statement += "(x[" + str(i) + ">>3]&(1<<(" + str(i) + "&0b111))!=0)^"
+    for child in children:
+        (gate_inverted, left, right) = child
+        if gate_inverted:
+            statement += "1^"
+        statement += "((" + generate_program(left) + ")&(" + generate_program(right) + "))^"
+    statement += "0)"
+    return statement
+
+def compile_f(f):
+    program = 'def f(x):\n\treturn ' + generate_program(f)
+    scope = {}
+    exec(program, scope)
+    return scope['f']
+
+def evaluate(model, x, value = 0):
+    (inverted, indices, children) = model
+    for i in indices:
+        if bit_at_index(x, i) != 0:
+            value ^= 1
+    for child in children:
+        (child_inverted, left, right) = child
+        left = evaluate(left, x)
+        right = evaluate(right, x)
+        if left & right != child_inverted:
+            value ^= 1
+    if inverted:
+        value ^= 1
+    return value
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(N / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(x)
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def xor(x):
+    num_one_bits = 0
+    for n in x:
+        num_one_bits += count_one_bits(n)
+    return num_one_bits % 2
+
+def random_sample(m, n):
+    inputs = np.zeros((m, n))
+    for i in range(0, m):
+        for j in range(0, n):
+            inputs[i][j] = random.randint(0, 1)
+    return inputs
+
+def update_sample(sample, index):
+    global N
+    for j in range(0, N):
+        sample[index][j] = random.randint(0, 1)
+
+def coherence(inputs, outputs):
+    coherences = []
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        y_a = outputs[i]
+        numerator = 0
+        denominator = 0
+        for j in range(0, len(inputs)):
+            if i == j:
+                continue
+            x_b = inputs[j]
+            y_b = outputs[j]
+            distance = hamming_distance(x_a, x_b)
+            weight = 1.0 / (2 ** distance)
+            denominator += weight
+            if y_a == y_b:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def score(f, sample, distances):
+    return coherence([(x, f(x) ^ y) for (x, y) in sample], distances)
+
+def compute_distances(inputs, distances, scratch):
+    for i in range(0, len(inputs)):
+        a = inputs[i]
+        for j in range(i, len(inputs)):
+            if i == j:
+                distances[i][j] = 0
+                continue
+            b = inputs[j]
+            distance = 2 ** -hamming_distance(a, b, scratch)
+            distances[i][j] = distance
+            distances[j][i] = distance
+
+def update_distances(inputs, distances, i, scratch):
+    a = inputs[i]
+    for j in range(0, len(inputs)):
+        if i == j:
+            distances[i][j] = 0
+            continue
+        b = inputs[j]
+        distance = 2 ** -hamming_distance(a, b, scratch)
+        distances[i][j] = distance
+        distances[j][i] = distance
+
+def evaluate_sample(model, sample, output):
+    stack = [model]
+    (_, _, _, root_scratch, _) = model
+    while len(stack) > 0:
+        layer = stack.pop()
+        (inverted, xors, child, scratch, touched) = layer
+        if child is None:
+            np.matmul(sample, xors, scratch)
+            np.mod(scratch, 2, scratch)
+            if inverted == 1:
+                np.logical_xor(1, scratch, scratch)
+            touched[0] = 1
+        else:
+            (child_inverted, left, right) = child
+            (_, _, _, left_scratch, left_touched) = left
+            (_, _, _, right_scratch, right_touched) = right
+            if left_touched[0] and right_touched[0]:
+                np.multiply(left_scratch, right_scratch, output)
+                np.matmul(sample, xors, scratch)
+                np.mod(scratch, 2, scratch)
+                if inverted:
+                    np.logical_xor(scratch, 1, scratch)
+                if child_inverted:
+                    np.logical_xor(output, 1, output)
+                np.logical_xor(scratch, output, scratch)
+                touched[0] = 1
+            else:
+                stack.insert(0, layer)
+                stack.insert(0, left)
+                stack.insert(0, right)
+    np.copyto(output, root_scratch)
+    reset_model(model)
+
+def reset_model(model):
+    stack = [model]
+    while len(stack) > 0:
+        layer = stack.pop()
+        (_, _, child, _, touched) = layer
+        touched[0] = 0
+        if not child is None:
+            (_, left, right) = child
+            stack.append(left)
+            stack.append(right)
+
+def clone_model(model, p_mutation):
+    global N
+
+    p_invert = p_mutation * random.random()
+    p_invert_child = p_mutation * random.random()
+    p_flip = p_mutation * random.random()
+    p_add_child = p_mutation * random.random()
+    # p_drop_child = p_mutation * random.random() * 0.5
+    p_drop_child = 0
+
+    (inverted, xors, child, scratch, touched) = model
+    if random.random() < p_invert:
+        inverted ^= 1
+    clone_xors = np.zeros((N,))
+    np.copyto(clone_xors, xors)
+    for i in range(0, N):
+        if random.random() < p_flip:
+            clone_xors[i] = int(clone_xors[i]) ^ 1
+    clone_scratch = np.zeros(np.shape(scratch))
+    clone_touched = np.zeros(np.shape(touched))
+    if child is None:
+        if random.random() < p_add_child:
+            sample_size = len(scratch)
+            child_inverted = random.randint(0, 1)
+            left = random_child(sample_size, p_mutation)
+            right = random_child(sample_size, p_mutation)
+            return (inverted, clone_xors, (child_inverted, left, right), clone_scratch, clone_touched)
+        return (inverted, clone_xors, None, clone_scratch, clone_touched)
+    if random.random() < p_drop_child:
+        return (inverted, clone_xors, None, clone_scratch, clone_touched)
+    (child_inverted, left, right) = child
+    if random.random() < p_invert_child:
+        inverted ^= 1
+    clone_left = clone_model(left, p_mutation)
+    clone_right = clone_model(right, p_mutation)
+    return (inverted, clone_xors, (child_inverted, clone_left, clone_right), clone_scratch, clone_touched)
+
+def random_child(sample_size, p_mutation):
+    global N
+    inverted = random.randint(0, 1)
+    xors = np.zeros((N,))
+    scratch = np.zeros((sample_size,))
+    touched = np.zeros((1,))
+
+    p_flip = p_mutation * random.random()
+    p_child = p_mutation * random.random()
+
+    index = random.randint(0, N - 1)
+    xors[index] = 1
+    for i in range(0, N):
+        if random.random() < p_flip:
+            xors[i] = 1
+    # if random.random() < p_child:
+    #     child_inverted = random.randint(0, 1)
+    #     left = random_child(sample_size, p_mutation * random.random())
+    #     right = random_child(sample_size, p_mutation * random.random())
+    #     return (inverted, xors, (child_inverted, left, right), scratch, touched)
+    return (inverted, xors, None, scratch, touched)
+
+def size(model):
+    (_, xors, child, _, _) = model
+    xor_size = np.sum(xors)
+    if not child is None:
+        (_, left, right) = child
+        return xor_size + size(left) * size(right)
+    return xor_size
+
+def null_candidate(sample_size):
+    global N
+    return (0, np.zeros((N,)), None, np.zeros((sample_size,)), np.zeros((1,)))
+
+def main():
+    global N
+    epochs = 10000
+    num_survivors = 100
+    num_offspring = 10
+    num_candidates = num_survivors + num_survivors * num_offspring
+    sample_size = 32
+    eval_size = 100
+    p_mutation = 0.5
+    g = sha
+    current_generation = [null_candidate(sample_size) for _ in range(0, num_candidates)]
+
+    distances = np.zeros((sample_size, sample_size))
+    output_equality = np.zeros((sample_size, sample_size))
+    inputs = random_sample(sample_size, N)
+    scratch = np.zeros(N,)
+    compute_distances(inputs, distances, scratch)
+    expected_outputs = np.zeros((sample_size,))
+    for i in range(0, sample_size):
+        expected_outputs[i] = g(inputs[i])
+    outputs = np.zeros((sample_size,))
+    output_xor = np.zeros((sample_size,))
+    ones = np.ones((sample_size,))
+    numerators = np.zeros((sample_size,))
+    denominators = np.zeros((sample_size,))
+    coherences = np.zeros((sample_size,))
+    np.matmul(ones, distances, denominators)
+    scores = np.zeros((num_candidates,))
+    max_score = 0
+    last_score = 0
+    streak = 0
+
+    for epoch in range(0, epochs):
+        for i in range(0, num_candidates):
+            candidate = current_generation[i]
+            evaluate_sample(candidate, inputs, outputs)
+            np.logical_xor(outputs, expected_outputs, output_xor)
+            for p in range(0, sample_size):
+                for q in range(0, sample_size):
+                    m = int(output_xor[p])
+                    n = int(output_xor[q])
+                    output_equality[p][q] = 1 ^ m ^ n
+            np.multiply(output_equality, distances, output_equality)
+            np.matmul(ones, output_equality, numerators)
+            np.divide(numerators, denominators, coherences)
+            score = np.average(coherences)
+            scores[i] = score
+
+        top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
+        survivors = [current_generation[index] for index in top_n]
+
+        # f = lambda x: evaluate(current_generation[0], x)
+        # correct = 0
+        # for i in range(0, eval_size):
+        #     x = random_input()
+        #     if f(x) == g(x):
+        #         correct += 1
+
+        top_score = scores[top_n[-1]]
+        print(epoch, top_score, size(survivors[-1]))
+        if top_score <= max_score:
+            p_mutation += 0.01
+        else:
+            p_mutation = 0.5
+            max_score = top_score
+
+        for i in range(0, num_survivors):
+            current_generation[i] = survivors[i]
+
+        for i in range(0, num_survivors):
+            candidate = survivors[i]
+            for j in range(0, num_offspring):
+                index = num_survivors + j * num_survivors + i
+                current_generation[index] = clone_model(candidate, random.random())
+        
+        # while random.random() < 0.5:
+        if last_score == top_score:
+        #     streak += 1
+        # else:
+        #     streak = 0
+        # if streak >= 4:
+        #     streak = 0
+            inputs = random_sample(sample_size, N)
+            compute_distances(inputs, distances, scratch)
+            np.matmul(ones, distances, denominators)
+            for i in range(0, sample_size):
+                expected_outputs[i] = g(inputs[i])
+        # expected_outputs = np.zeros((sample_size,))
+        # for i in range(0, sample_size):
+        #     expected_outputs[i] = g(inputs[i])
+            # index = random.randint(0, sample_size - 1)
+            # update_sample(inputs, index)
+            # expected_outputs[index] = g(inputs[index])
+            # update_distances(inputs, distances, index, scratch)
+            # np.matmul(ones, distances, denominators)
+        last_score = top_score
+
+if __name__ == "__main__":
+    main()
--- a/mutations10.py
+++ b/mutations10.py
@ -0,0 +1,425 @@
+from enum import unique
+import hashlib
+import math
+import numpy as np
+import random
+import time
+
+N = 8
+M = 2
+
+def vec_to_int(x):
+    z = 0
+    for i in range(0, len(x)):
+        z <<= 1
+        z |= x[i]
+    return z
+
+def timeit(f):
+    def timed(*args, **kw):
+        ts = time.time()
+        result = f(*args, **kw)
+        te = time.time()
+
+        print('func:%r took: %2.4f sec' % (f.__name__, te-ts))
+        return result
+    return timed
+
+class Candidate:
+    def __init__(self, layer, slots):
+        global N
+        self.layer = layer
+        self.node_count = layer
+        self.offsets = np.zeros((self.node_count, N + 1 + slots)).astype(np.int32)
+
+class Probabilities:
+    def __init__(self, layer, slots):
+        global N
+        self.layer = layer
+        self.slots = slots
+        self.node_count = layer
+        self.p_offsets = np.zeros((self.node_count, N + 1 + slots))
+        self.p_offsets.fill(0.5)
+        self.offset_coherences = np.zeros((2, self.node_count, N + 1 + slots, 2, self.node_count, N + 1 + slots))
+        self.offset_coherences.fill(-1)
+        self.deltas = np.zeros((self.node_count, N + 1 + slots, 2, self.node_count, N + 1 + slots))
+
+    def has_converged(self):
+        for i in range(0,self.node_count):
+            for j in range(0, len(self.p_offsets[i])):
+                if self.p_offsets[i][j] > 0.05 and self.p_offsets[i][j] < 0.95:
+                    return False
+        return True
+
+    def flatten(self):
+        candidate = Candidate(self.layer, self.slots)
+        for i in range(0, self.node_count):
+            for j in range(0, len(self.p_offsets[i])):
+                candidate.offsets[i][j] = 1 if self.p_offsets[i][j] >= 0.5 else 0
+        return candidate
+
+def clamp(x, min_value = 0.01, max_value = 1):
+    return min(max(x, min_value), max_value)
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+# 00100111 x4
+# 00000110 x1
+def sha(v):
+    global M
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] % M
+
+def xor(x):
+    num_one_bits = 0
+    for i in range(0, len(x)):
+        if i == 0:
+            continue
+        num_one_bits += x[i]
+    return num_one_bits % 2
+
+
+# 0 ^ 1 ^ (2 ^ (4 * (5 ^ 0 * 7))) * (3 ^ 6 * 7)
+# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * (5 ^ 0 * 7)) ^ 4 * 6 * 7 * (5 ^ 0 * 7)
+# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * 5 ^ 0 * 3 * 4 * 7 ^ 4 * 5 * 6 * 7 ^ 0 * 4 * 6 * 7
+
+# 0 ^ 1 ^ 2*3 ^ 2*6*7 ^ 3*4*5 ^ 0*3*4*7 ^ 4*5*6*7 ^ 0*4*6*7
+# What about strictly SOP?
+# That is, 1-Hot of increasing complexity?
+# How would that work?
+# Candidate generation could apply some kind of softmax to filter down to one
+# 
+def test_fn(x):
+    # 0 1
+    # 2 | 3
+    # 4 | 5 | 6 | 7
+    #   |   | 0 | 7 |   |   |   |
+    return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7])))
+
+def candidate_fn(x):
+    return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2])
+
+def true_fn(x):
+    return x[0] ^ x[1] ^ (x[3] * x[2])
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def coherence(outputs, distances):
+    coherences = []
+    for i in range(0, len(outputs)):
+        y_a = outputs[i]
+        numerator = 0
+        denominator = 0
+        for j in range(0, len(outputs)):
+            if i == j:
+                continue
+            y_b = outputs[j]
+            weight = distances[i][j]
+            denominator += weight
+            if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def random_sample(m, n, layers, g, compute_scratch):
+    inputs = np.zeros((m, n)).astype(np.int32)
+    augmented_inputs = np.zeros((m, n + len(layers) + 1)).astype(np.int32)
+    outputs = np.zeros((m,)).astype(np.int32)
+    for i in range(0, m):
+        for j in range(0, n):
+            val = random.randint(0, 1)
+            inputs[i][j] = val
+            augmented_inputs[i][j] = val
+        outputs[i] = g(inputs[i])
+        augmented_inputs[i][n] = 1
+        for j in range(0, len(layers)):
+            augmented_inputs[i][n + j] = evaluate_candidate(layers[j], augmented_inputs[i], compute_scratch)
+    return (inputs, augmented_inputs, outputs)
+
+def populate_distances(inputs, distances, scratch):
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        for j in range(0, len(inputs)):
+            if i == j:
+                continue
+            x_b = inputs[j]
+            distance = hamming_distance(x_a, x_b, scratch)
+            distances[i][j] = 1.0 / (2 ** distance)
+
+def evaluate(layers, candidate, x, compute_scratch):
+    z = evaluate_layers(layers, x, compute_scratch)
+    z ^= evaluate_candidate(candidate, x, compute_scratch)
+    return z
+
+def evaluate_layers(layers, x, compute_scratch):
+    z = 0
+    for layer in layers:
+        z ^= evaluate_candidate(layer, x, compute_scratch)
+    return z
+
+def evaluate_candidate(candidate, x, compute_scratch):
+    y = 1
+    for j in range(0, candidate.node_count):
+        value = 0
+        compute_scratch.fill(0)
+        compute_scratch[0:len(candidate.offsets[j])] = candidate.offsets[j]
+        np.multiply(compute_scratch, x, compute_scratch)
+        value ^= np.sum(compute_scratch) % 2
+        y &= value
+    return y
+
+@timeit
+def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch):
+    global M
+
+    for i in range(0, sample_size):
+        outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch)
+    for j in range(1, num_candidates):
+        np.copyto(outputs[j], outputs[0])
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    base_score = coherence(output_xor, distances)
+
+    scores.fill(0)
+    unique_candidates = {}
+    for j in range(0, num_candidates):
+        create_candidate(probabilities, candidates[j])        
+        unique_candidates[candidate_str(candidates[j])] = j
+
+    for i in range(0, sample_size):
+        for _, j in unique_candidates.items():
+            candidate = candidates[j]
+            outputs[j][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
+    for _, j in unique_candidates.items():
+        candidate = candidates[j]
+        np.subtract(outputs[j], expected_outputs, output_xor)
+        np.mod(output_xor, M, output_xor)
+        score = coherence(output_xor, distances)
+        scores[j] = score
+    return base_score
+
+
+def compute_uplift(candidate, layers, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch):
+    global M
+
+    for i in range(0, sample_size):
+        outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch)
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    base_score = coherence(output_xor, distances)
+
+    for i in range(0, sample_size):
+        outputs[0][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
+
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    score = coherence(output_xor, distances)
+    return (base_score, score)
+
+@timeit
+def update_probabilities(probabilities, candidates, inputs, base_score, scores, scale):
+    num_candidates = len(candidates)
+
+    probabilities.offset_coherences.fill(-1)
+    for p in range(0, num_candidates):
+        candidate = candidates[p]
+        if scores[p] == 0:
+            continue
+        # score = max(scores[p], base_score)
+        score = scores[p]
+        for j in range(0, probabilities.node_count):
+            for k in range(0, len(candidate.offsets[j])):
+                i = candidate.offsets[j][k]
+                for m in range(0, probabilities.node_count):
+                    for n in range(0, len(candidate.offsets[m])):
+                        l = candidate.offsets[m][n]
+                        probabilities.offset_coherences[i][j][k][l][m][n] = max(score, probabilities.offset_coherences[i][j][k][l][m][n])
+
+    p_offsets_next = np.empty_like(probabilities.p_offsets)
+    inertia = 0
+    for j in range(0, probabilities.node_count):
+        for k in range(0, len(p_offsets_next[j])):
+            delta = 0
+            count = 0
+            for m in range(0, probabilities.node_count):
+                for n in range(0, len(p_offsets_next[m])):
+                    # if j == m and k == n:
+                    #     continue
+                    p_j1_if_m0 = probabilities.offset_coherences[1][j][k][0][m][n]
+                    p_j0_if_m0 = probabilities.offset_coherences[0][j][k][0][m][n]
+                    p_j1_if_m1 = probabilities.offset_coherences[1][j][k][1][m][n]
+                    p_j0_if_m1 = probabilities.offset_coherences[0][j][k][1][m][n]
+                    if p_j1_if_m0 >= 0 and p_j0_if_m0 >= 0:
+                        # delta_if_m0 = (p_j1_if_m0 - base_score) - (p_j0_if_m0 - base_score)
+                        delta_if_m0 = p_j1_if_m0 - p_j0_if_m0
+                        delta += delta_if_m0 * (1.0 - probabilities.p_offsets[m][n]) * scale
+                        count += 1
+                    if p_j1_if_m1 >= 0 and p_j0_if_m1 >= 0:
+                        # delta_if_m1 = (p_j1_if_m1 - base_score) - (p_j0_if_m1 - base_score)
+                        delta_if_m1 = p_j1_if_m1 - p_j0_if_m1
+                        delta += delta_if_m1 * probabilities.p_offsets[m][n] * scale
+                        count += 1
+            if count > 0:
+                delta /= count
+            p_offsets_next[j][k] = clamp(probabilities.p_offsets[j][k] + delta, 0, 1)
+            inertia += abs(p_offsets_next[j][k] - probabilities.p_offsets[j][k])
+
+    for j in range(0, probabilities.node_count):
+        for k in range(0, len(probabilities.p_offsets[j])):
+            p_offset_next = 0.9 * probabilities.p_offsets[j][k] + 0.1 * p_offsets_next[j][k]
+            # if p_offset_next <= 0.05:
+            #     p_offset_next = 0.0
+            # elif p_offset_next >= 0.95:
+            #     p_offset_next = 1.0
+            probabilities.p_offsets[j][k] = p_offset_next
+
+    return inertia
+
+def create_candidate(probabilities, candidate):
+    candidate.offsets.fill(0)
+    for i in range(0, probabilities.node_count):
+        max_value = -1
+        max_index = -1
+        for j in range(0, len(probabilities.p_offsets[i])):
+            value = random.random() + probabilities.p_offsets[i][j]
+            if value > max_value:
+                max_value = value
+                max_index = j
+            # candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0
+        candidate.offsets[i][max_index] = 1
+
+def copy_candidate(src, dest):
+    for i in range(0, src.node_count):
+        for j in range(0, len(src.offsets[i])):
+            dest.offsets[i][j] = src.offsets[i][j]
+
+def p(x):
+    return math.ceil(x * 100) / 100
+
+def p_a(x):
+    return [p(z) for z in x]
+
+def print_probabilities(probabilities):
+    print('=====================')
+    for i in range(0, probabilities.node_count):
+        print(i, p_a(probabilities.p_offsets[i]))
+    print('=====================')
+
+def candidate_str(candidate):
+    build_str = ''
+    for i in range(0, candidate.node_count):
+        for j in range(0, len(candidate.offsets[i])):
+            build_str += str(candidate.offsets[i][j])
+    return build_str
+
+def main():
+    global N, M
+    sample_size = 64
+    num_candidates = 100
+    num_survivors = 1
+    uplift_sample_size = 100
+    output_xor = np.zeros(sample_size,)
+    scratch = np.zeros((N,))
+    int_scratch = np.zeros((N,)).astype(np.int32)
+    g = test_fn
+    layers = []
+    augment_layers = []
+    layer = 1
+    (inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, int_scratch)
+    distances = np.zeros((sample_size, sample_size))
+    populate_distances(inputs, distances, scratch)
+    outputs = np.zeros((num_candidates + num_survivors, sample_size,)).astype(np.int32)
+    scores = np.zeros((num_candidates + num_survivors,))
+
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    score = coherence(output_xor, distances)
+
+    while score < 1:
+        probabilities = Probabilities(layer, len(augment_layers))
+        candidates = [Candidate(layer, len(augment_layers)) for _ in range(0, num_candidates + num_survivors)]
+        augmented_int_scratch = np.zeros((N + 1 + len(augment_layers),)).astype(np.int32)
+        (inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, augmented_int_scratch)
+        populate_distances(inputs, distances, scratch)
+
+        inertia = 1
+        epoch = 1
+        while inertia > 0.001 and epoch < 1000 and not probabilities.has_converged():
+            base_score = compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, augmented_inputs, outputs, output_xor, expected_outputs, sample_size, augmented_int_scratch)
+            round_inertia = update_probabilities(probabilities, candidates, augmented_inputs, base_score, scores, 1 + 0.01 * epoch)
+            inertia = 0.9 * inertia + 0.1 * round_inertia
+                    
+            print_probabilities(probabilities)
+            for candidate in layers:
+                print(candidate.offsets)
+            max_score = np.max(scores)
+            print(base_score, max_score,round_inertia, inertia)
+
+            top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
+
+            for i in range(0, num_survivors):
+                src_index = top_n[i]
+                dest_index = num_candidates + i
+                if src_index == dest_index:
+                    continue
+                src = candidates[src_index]
+                dest = candidates[dest_index]
+                candidates[dest_index] = src
+                candidates[src_index] = dest
+
+            (inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, augmented_int_scratch)
+            populate_distances(inputs, distances, scratch)
+            epoch += 1
+
+        candidate = probabilities.flatten()
+        print(candidate.offsets)
+        for j in range(0, sample_size):
+            outputs[0][j] = evaluate(layers, candidate, augmented_inputs[j], augmented_int_scratch)
+            np.subtract(outputs[0], expected_outputs, output_xor)
+            np.mod(output_xor, M, output_xor)
+        score = coherence(output_xor, distances)
+
+        average_base_score = 0
+        average_score = 0
+        for i in range(0, uplift_sample_size):
+            (inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, augmented_int_scratch)
+            populate_distances(inputs, distances, scratch)
+            (base_score, score) = compute_uplift(candidate, layers, distances, augmented_inputs, outputs, output_xor, expected_outputs, sample_size, augmented_int_scratch)
+            average_base_score += base_score
+            average_score += score
+        average_base_score /= uplift_sample_size
+        average_score /= uplift_sample_size
+        uplift = average_score - average_base_score
+        print(uplift)
+
+        if uplift <= 0:
+            layer += 1
+            # augment_layers = layers[1:]
+            continue
+
+        layers.append(candidate)
+        # if layer == 1:
+        #     layer += 1
+
+    for candidate in layers:
+        print(candidate.offsets)
+
+if __name__ == "__main__":
+    main()
--- a/mutations11.py
+++ b/mutations11.py
@ -0,0 +1,535 @@
+from enum import unique
+import hashlib
+import math
+import numpy as np
+import random
+import time
+
+N = 8
+N_ACTUAL = 2 * ((N - 1) + 8)
+M = 2
+
+def vec_to_int(x):
+    z = 0
+    for i in range(0, len(x)):
+        z <<= 1
+        z |= x[i]
+    return z
+
+def timeit(f):
+    def timed(*args, **kw):
+        ts = time.time()
+        result = f(*args, **kw)
+        te = time.time()
+
+        print('func:%r took: %2.4f sec' % (f.__name__, te-ts))
+        return result
+    return timed
+
+class Candidate:
+    def __init__(self, layer):
+        global N_ACTUAL
+        self.layer = layer
+        self.offsets = np.zeros((N_ACTUAL)).astype(np.int32)
+
+class Probabilities:
+    def __init__(self, layer):
+        global N_ACTUAL
+        self.layer = layer
+        self.p_offsets = np.zeros((N_ACTUAL))
+        self.p_offsets.fill(0.5)
+        self.p_offsets_next = np.zeros((N_ACTUAL))
+        self.offset_coherences = np.zeros((N_ACTUAL))
+        self.offset_coherences.fill(-1)
+        self.knowns = set()
+
+    def snap(self):
+        reset = False
+        for j in range(0, len(self.p_offsets)):
+            if self.p_offsets[j] > 0.6 and self.p_offsets[j] < 0.95:
+                self.p_offsets[j] = 1.0
+                self.knowns.add(j)
+                flip = j ^ 0b1
+                self.p_offsets[flip] = 0.0
+                reset = True
+                break
+            elif self.p_offsets[j] < 0.05:
+                self.p_offsets[j] = 0.0
+        if reset:
+            for j in range(0, len(self.p_offsets)):
+                flip = j ^ 0b1
+                if self.p_offsets[j] < 0.95 and self.p_offsets[flip] < 0.95:
+                    self.p_offsets[j] = 0.5
+
+    def eliminate_random_known(self):
+        if len(self.knowns) == 0:
+            return False
+        index = random.sample(self.knowns, 1)[0]
+        self.knowns.remove(index)
+        return True
+
+    def reset(self):
+        self.p_offsets.fill(0.5)
+        for index in self.knowns:
+            flip = index ^ 0b1
+            self.p_offsets[index] = 1.0
+            self.p_offsets[flip] = 0.0
+
+    def all_zeros(self):
+        for j in range(0, len(self.p_offsets)):
+            if self.p_offsets[j] > 0.05 and self.p_offsets[j] < 0.95:
+                return False
+        return True
+
+    def has_converged(self):
+        if self.all_zeros():
+            return True
+
+        top_n = sorted(range(len(self.p_offsets)), key=lambda i: self.p_offsets[i])[-self.layer:]
+        for i in top_n:
+            if self.p_offsets[i] < 0.95:
+                return False
+
+        return True
+
+    def flatten(self):
+        candidate = Candidate(self.layer)
+        top_n = sorted(range(len(self.p_offsets)), key=lambda i: self.p_offsets[i])[-self.layer:]
+        for i in top_n:
+            if self.p_offsets[i] < 0.95:
+                return None
+            candidate.offsets[i] = 1
+        
+        return candidate
+
+def clamp(x, min_value = 0.01, max_value = 1):
+    return min(max(x, min_value), max_value)
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+# 00100111 x4
+# 00000110 x1
+def sha(v):
+    global M
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] % M
+
+def sha_byte(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result
+
+def xor(x):
+    num_one_bits = 0
+    for i in range(0, len(x)):
+        if i == 0:
+            continue
+        num_one_bits += x[i]
+    return num_one_bits % 2
+
+
+# 0 ^ 1 ^ (2 ^ (4 * (5 ^ 0 * 7))) * (3 ^ 6 * 7)
+# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * (5 ^ 0 * 7)) ^ 4 * 6 * 7 * (5 ^ 0 * 7)
+# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * 5 ^ 0 * 3 * 4 * 7 ^ 4 * 5 * 6 * 7 ^ 0 * 4 * 6 * 7
+
+# 0 ^ 1 ^ 2*3 ^ 2*6*7 ^ 3*4*5 ^ 0*3*4*7 ^ 4*5*6*7 ^ 0*4*6*7
+# What about strictly SOP?
+# That is, 1-Hot of increasing complexity?
+# How would that work?
+# Candidate generation could apply some kind of softmax to filter down to one
+# 
+def test_fn(x):
+    # 0 1
+    # 2 | 3
+    # 4 | 5 | 6 | 7
+    #   |   | 0 | 7 |   |   |   |
+    return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7])))
+
+def candidate_fn(x):
+    return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2])
+
+def true_fn(x):
+    return x[0] ^ x[1] ^ (x[3] * x[2])
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def coherence(outputs, distances):
+    coherences = []
+    for i in range(0, len(outputs)):
+        y_a = outputs[i]
+        numerator = 0
+        denominator = 0
+        for j in range(0, len(outputs)):
+            if i == j:
+                continue
+            y_b = outputs[j]
+            weight = distances[i][j]
+            denominator += weight
+            if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def random_sample(m, inputs, augmented_inputs, outputs):
+    global N, N_ACTUAL
+    for i in range(0, m):
+        for j in range(0, N):
+            val = random.randint(0, 1)
+            inputs[i][j] = val
+            if j > 0:
+                augmented_inputs[i][(j - 1) * 2] = val
+                augmented_inputs[i][(j - 1) * 2 + 1] = 1 - val
+            # augmented_inputs[i][j * 2] = val
+            # augmented_inputs[i][j * 2 + 1] = 1 - val
+        output = sha_byte(inputs[i])
+        outputs[i] = inputs[i][0]
+        for k in range(0, 1):
+            output_byte = output[k]
+            for j in range(0, 8):
+                val = (output_byte >> j) & 0b1;
+                inputs[i][k * 8 + j] = val
+                augmented_inputs[i][(N - 1 + k * 8 + j) * 2] = val
+                augmented_inputs[i][(N - 1 + k * 8 + j) * 2 + 1] = 1 - val
+        # outputs[i] = g(inputs[i])
+    return (inputs, augmented_inputs, outputs)
+
+def populate_distances(inputs, distances, scratch):
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        for j in range(0, len(inputs)):
+            if i == j:
+                continue
+            x_b = inputs[j]
+            distance = hamming_distance(x_a, x_b, scratch)
+            distances[i][j] = 1.0 / (2 ** distance)
+
+def evaluate(layers, candidate, x, compute_scratch):
+    z = evaluate_layers(layers, x, compute_scratch)
+    z ^= evaluate_candidate(candidate, x, compute_scratch)
+    return z
+
+def evaluate_layers(layers, x, compute_scratch):
+    z = 0
+    for layer in layers:
+        z ^= evaluate_candidate(layer, x, compute_scratch)
+    return z
+
+def evaluate_candidate(candidate, x, compute_scratch):
+    compute_scratch.fill(0)
+    compute_scratch[0:len(candidate.offsets)] = candidate.offsets
+    np.multiply(compute_scratch, x, compute_scratch)
+    return 1 if np.sum(compute_scratch) - np.sum(candidate.offsets) == 0 else 0
+
+def layer_str(layer):
+    parts = []
+    for i in range(0, len(layer.offsets)):
+        if layer.offsets[i] == 1:
+            parts.append('x[' + str(i) + ']')
+    return '*'.join(parts)
+
+def cache_layers(layers):
+    expr = 'def f(x):\n\tresult=0\n'
+    for i in range(0, len(layers)):
+        layer = layers[i]
+        expr += '\tresult^=' + layer_str(layer) + '\n'
+    expr += '\treturn result\n'
+    scope = {}
+    exec(expr, scope)
+    return scope['f']
+
+@timeit
+def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch, cached_f):
+    global M
+
+    for i in range(0, sample_size):
+        outputs[0][i] = cached_f(inputs[i])
+        # outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch)
+        # check = cached_f(inputs[i])
+        # if check != outputs[0][i]:
+        #     raise Exception('Mistake')
+    for j in range(1, num_candidates):
+        np.copyto(outputs[j], outputs[0])
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    base_score = coherence(output_xor, distances)
+
+    scores.fill(0)
+    unique_candidates = {}
+    for j in range(0, num_candidates):
+        create_candidate(probabilities, candidates[j])        
+        unique_candidates[candidate_str(candidates[j])] = j
+
+    for i in range(0, sample_size):
+        for _, j in unique_candidates.items():
+            candidate = candidates[j]
+            outputs[j][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
+    for _, j in unique_candidates.items():
+        candidate = candidates[j]
+        np.subtract(outputs[j], expected_outputs, output_xor)
+        np.mod(output_xor, M, output_xor)
+        score = coherence(output_xor, distances)
+        scores[j] = score
+    # for j in range(0, num_candidates):
+    #     candidate = candidates[j]
+    #     create_candidate(probabilities, candidate)
+
+    # for i in range(0, sample_size):
+    #     for j in range(0, num_candidates):
+    #         candidate = candidates[j]
+    #         outputs[j][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
+
+    # for j in range(0, num_candidates):
+    #     candidate = candidates[j]
+    #     np.subtract(outputs[j], expected_outputs, output_xor)
+    #     np.mod(output_xor, M, output_xor)
+    #     score = coherence(output_xor, distances)
+    #     scores[j] = score
+
+    return base_score
+
+
+def compute_uplift(candidate, layers, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch):
+    global M
+
+    for i in range(0, sample_size):
+        outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch)
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    base_score = coherence(output_xor, distances)
+
+    for i in range(0, sample_size):
+        outputs[0][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
+
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    score = coherence(output_xor, distances)
+    return (base_score, score)
+
+@timeit
+def update_probabilities(probabilities, candidates, inputs, base_score, scores, scale):
+    num_candidates = len(candidates)
+
+    probabilities.offset_coherences.fill(-1)
+    for p in range(0, num_candidates):
+        score = scores[p]
+        if score == 0:
+            continue
+        candidate = candidates[p]
+
+        for j in range(0, len(candidate.offsets)):
+            if candidate.offsets[j] == 0:
+                continue
+            probabilities.offset_coherences[j] = max(score, probabilities.offset_coherences[j])
+
+    inertia = 0
+    for j in range(0, len(probabilities.p_offsets_next)):
+        p = probabilities.offset_coherences[j]
+        delta = p - base_score if p >= 0 else 0
+        probabilities.p_offsets_next[j] = clamp(probabilities.p_offsets[j] + delta, 0, 1)
+        inertia += abs(probabilities.p_offsets_next[j] - probabilities.p_offsets[j])
+
+    for j in range(0, len(probabilities.p_offsets_next)):
+        p_offset_next = 0.9 * probabilities.p_offsets[j] + 0.1 * probabilities.p_offsets_next[j]
+            # if p_offset_next <= 0.05:
+            #     p_offset_next = 0.0
+            # elif p_offset_next >= 0.95:
+            #     p_offset_next = 1.0
+        probabilities.p_offsets[j] = p_offset_next
+        # total = np.sum(probabilities.p_offsets[j])
+        # probabilities.p_offsets[j] *= 1.0 / total
+
+    probabilities.snap()
+
+    return inertia
+
+def create_candidate(probabilities, candidate):
+    candidate.offsets.fill(0)
+    scores = np.empty_like(candidate.offsets).astype(np.float32)
+    for j in range(0, len(probabilities.p_offsets)):
+        if probabilities.p_offsets[j] == 1.0:
+            scores[j] = 1000
+        elif probabilities.p_offsets[j] == 0.0:
+            scores[j] = -1000
+        else:
+            scores[j] = random.random() + probabilities.p_offsets[j]
+    top = sorted(range(len(scores)), key=lambda i: scores[i], reverse = True)
+    picked = set()
+    for i in top:
+        flip = i ^ 0b1
+        if flip in picked:
+            continue
+        candidate.offsets[i] = 1
+        picked.add(i)
+        if len(picked) == candidate.layer:
+            return
+
+def copy_candidate(src, dest):
+    for j in range(0, len(src.offsets)):
+        dest.offsets[j] = src.offsets[j]
+
+def p(x):
+    return math.ceil(x * 100) / 100
+
+def p_a(x):
+    return [p(z) for z in x]
+
+def print_probabilities(probabilities):
+    print('=====================')
+    print(p_a(probabilities.p_offsets))
+    print('=====================')
+
+def candidate_str(candidate):
+    build_str = ''
+    for j in range(0, len(candidate.offsets)):
+        build_str += str(candidate.offsets[j])
+    return build_str
+
+def main():
+    global N, N_ACTUAL, M
+    sample_size = 64
+    num_candidates = 100
+    num_survivors = 1
+    uplift_sample_size = 128
+    output_xor = np.zeros(sample_size,)
+    scratch = np.zeros((N,))
+    int_scratch = np.zeros((N,)).astype(np.int32)
+    g = sha
+    layers = []
+    unique_layers = set()
+    augment_layers = []
+    layer = 1
+    inputs = np.zeros((sample_size, N)).astype(np.int32)
+    augmented_inputs = np.zeros((sample_size, N_ACTUAL)).astype(np.int32)
+    expected_outputs = np.zeros((sample_size,)).astype(np.int32)
+    random_sample(sample_size, inputs, augmented_inputs, expected_outputs)
+    distances = np.zeros((sample_size, sample_size))
+    populate_distances(inputs, distances, scratch)
+    outputs = np.zeros((num_candidates + num_survivors, sample_size,)).astype(np.int32)
+    scores = np.zeros((num_candidates + num_survivors,))
+    cached_f = cache_layers(layers)
+    probabilities = Probabilities(1)
+
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    score = coherence(output_xor, distances)
+
+    with open('model.txt', 'w') as f:
+        while score < 1:
+            probabilities.layer = layer
+            candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)]
+            augmented_int_scratch = np.zeros((N_ACTUAL,)).astype(np.int32)
+            random_sample(sample_size, inputs, augmented_inputs, expected_outputs)
+            populate_distances(inputs, distances, scratch)
+
+            inertia = 1
+            epoch = 1
+            while inertia > 0.001 and epoch < 2000 and not probabilities.has_converged():
+                base_score = compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, augmented_inputs, outputs, output_xor, expected_outputs, sample_size, augmented_int_scratch, cached_f)
+                round_inertia = update_probabilities(probabilities, candidates, augmented_inputs, base_score, scores, 1 + 0.01 * epoch)
+                inertia = 0.9 * inertia + 0.1 * round_inertia
+                        
+                print_probabilities(probabilities)
+                # for candidate in layers:
+                #     print(candidate.offsets)
+                max_score = np.max(scores)
+                print(base_score, max_score,round_inertia, inertia)
+
+                top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
+
+                for i in range(0, num_survivors):
+                    src_index = top_n[i]
+                    dest_index = num_candidates + i
+                    if src_index == dest_index:
+                        continue
+                    src = candidates[src_index]
+                    dest = candidates[dest_index]
+                    candidates[dest_index] = src
+                    candidates[src_index] = dest
+
+                random_sample(sample_size, inputs, augmented_inputs, expected_outputs)
+                populate_distances(inputs, distances, scratch)
+                epoch += 1
+
+            candidate = probabilities.flatten()
+            # uplift = -1
+            # if not candidate is None:
+            #     print(candidate.offsets)
+            #     for j in range(0, sample_size):
+            #         outputs[0][j] = evaluate(layers, candidate, augmented_inputs[j], augmented_int_scratch)
+            #         np.subtract(outputs[0], expected_outputs, output_xor)
+            #         np.mod(output_xor, M, output_xor)
+            #     score = coherence(output_xor, distances)
+
+            #     average_base_score = 0
+            #     average_score = 0
+            #     for i in range(0, uplift_sample_size):
+            #         (inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, augmented_int_scratch)
+            #         populate_distances(inputs, distances, scratch)
+            #         (base_score, score) = compute_uplift(candidate, layers, distances, augmented_inputs, outputs, output_xor, expected_outputs, sample_size, augmented_int_scratch)
+            #         average_base_score += base_score
+            #         average_score += score
+            #     average_base_score /= uplift_sample_size
+            #     average_score /= uplift_sample_size
+            #     uplift = average_score - average_base_score
+            #     print(uplift)
+
+            # if uplift <= 0:
+            #     layer += 1
+            #     # augment_layers = layers[1:]
+            #     continue
+            if candidate is None:
+                if probabilities.eliminate_random_known():
+                    probabilities.reset()
+                    continue
+                layer += 1
+                continue
+
+            layer_id = candidate_str(candidate)
+            if layer_id in unique_layers:
+                if probabilities.eliminate_random_known():
+                    if probabilities.eliminate_random_known():
+                        probabilities.reset()
+                        continue
+                layer += 1
+                continue
+
+            unique_layers.add(layer_id)
+            layers.append(candidate)
+            cached_f = cache_layers(layers)
+            probabilities.eliminate_random_known()
+            probabilities.reset()
+
+            for i in range(0, len(candidate.offsets)):
+                if candidate.offsets[i] == 1:
+                    f.write(str(i))
+                    f.write(' ')
+            f.write('\n')
+            f.flush()
+
+            # if layer == 1:
+            #     layer += 1
+
+        for candidate in layers:
+            print(candidate.offsets)
+
+if __name__ == "__main__":
+    main()
--- a/mutations12.py
+++ b/mutations12.py
@ -0,0 +1,391 @@
+import bisect
+from email.mime import base
+import hashlib
+import math
+import numpy as np
+import random
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def index_hash(indices):
+    return ','.join([str(index) for index in sorted(indices)])
+
+class Candidate():
+    def __init__(self, indices):
+        self.indices = indices[:]
+
+    def evaluate(self, x):
+        if len(x) in self.indices:
+            return 0
+        value = 1
+        for index in self.indices:
+            value *= x[index]
+        return value
+
+    def id(self):
+        return index_hash(self.indices)
+
+    def eval_str(self):
+        parts = []
+        for index in self.indices:
+            parts.append('x[' + str(index) + ']')
+        return '*'.join(parts)
+
+class Probabilities():
+    def __init__(self):
+        self.N = 8
+        self.actual_N = self.N * 2
+        self.num_terms = 1
+        self.num_candidates = 100
+        self.sample_size = 64
+        self.p = np.zeros((self.actual_N + 1,))
+        self.p_temp = np.empty_like(self.p)
+        self.next_p = np.empty_like(self.p)
+        self.knowns = []
+        self.stops = set()
+        self.reset_p()
+        self.epoch = 0
+
+        self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
+        self.distances = np.zeros((self.sample_size, self.sample_size))
+        self.xor_square = np.zeros((self.sample_size, self.sample_size))
+        self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
+        self.max_coherences = np.zeros((self.actual_N + 1))
+        self.max_candidates = [None for _ in range(0, self.actual_N)]
+
+        self.layers = []
+        self.base = None
+        self.rings = []
+
+        self.scratch = np.zeros((self.actual_N,))
+        
+        self.last_value = -1
+        self.rounds = 0
+        self.average_delta_over_null = 0
+
+    def randomize_inputs(self):
+        for i in range(0, self.sample_size):
+            for j in range(0, self.N):
+                val = random.randint(0, 1)
+                self.inputs[i][j * 2] = val
+                self.inputs[i][j * 2 + 1] = val ^ 1
+
+    def populate_distances(self):
+        for i in range(0, len(self.inputs)):
+            x_a = self.inputs[i]
+            for j in range(0, len(self.inputs)):
+                if i == j:
+                    continue
+                x_b = self.inputs[j]
+                distance = hamming_distance(x_a, x_b, self.scratch)
+                self.distances[i][j] = 1.0 / (2 ** distance)
+
+    def compute_rings(self):
+        self.rings = []
+        for i in range(0, len(self.inputs)):
+            x_a = self.inputs[i]
+            min_distance = self.actual_N
+            indices = []
+            for j in range(0, len(self.inputs)):
+                if i == j:
+                    continue
+                x_b = self.inputs[j]
+                distance = hamming_distance(x_a, x_b, self.scratch)
+                if distance < min_distance:
+                    min_distance = distance
+                    indices = [j]
+                elif distance == min_distance:
+                    indices.append(j)
+            self.rings.append(indices)
+
+    def compute_expected_outputs(self):
+        for i in range(0, len(self.inputs)):
+            self.expected_outputs[i] = sha(self.inputs[i])
+
+    def compute_base_outputs(self):
+        if self.base is None:
+            self.base_outputs.fill(0)
+            return
+        for i in range(0, len(self.inputs)):
+            self.base_outputs[i] = self.base(self.inputs[i])
+
+    def coherence(self, outputs=None):
+        if outputs is None:
+            outputs = self.outputs
+        np.logical_xor(outputs, self.expected_outputs, self.output_xor)
+        coherences = []
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            numerator = 0
+            denominator = 0
+            for j in range(0, len(self.output_xor)):
+                if i == j:
+                    continue
+                y_b = self.output_xor[j]
+                weight = self.distances[i][j]
+                denominator += weight
+                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                    numerator += weight
+            coherence = numerator / denominator if denominator > 0 else 0
+            coherences.append(coherence)
+
+        return sum(coherences) / len(coherences)
+
+    def ring_coherence(self, outputs=None):
+        if outputs is None:
+            outputs = self.outputs
+        np.logical_xor(outputs, self.expected_outputs, self.output_xor)
+        total = 0
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            indices = self.rings[i]
+            coherence = sum([1 if self.output_xor[j] == y_a else 0 for j in indices]) / len(indices)
+            total += coherence
+        return total / len(self.output_xor)
+
+    def normalize_p(self):
+        check = self.knowns[:]
+        for i in range(0, len(self.p)):
+            if self.p[i] < 0:
+                self.p[i] = 0
+        for i in range(0, len(self.p)):
+            if i in self.knowns:
+                flip = i ^ 0b1
+                self.p[i] = 0.0
+                self.p[flip] = 0.0
+            else:
+                check.append(i)
+                stop_id = index_hash(check)
+                check.pop()
+                if stop_id in self.stops:
+                    self.p[i] = 0.0
+        total = np.sum(self.p)
+        if total > 0:
+            for i in range(0, len(self.p)):
+                self.p[i] = self.p[i] / total
+
+    def reset_p(self):
+        self.p.fill(1.0)
+        self.normalize_p()
+
+    def threshold(self):
+        # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
+        return 1.0 - (self.epoch / 100)
+
+    def get_converged_index(self):
+        for i in range(0, len(self.p)):
+            if self.p[i] > self.threshold():
+                return i
+        return None
+
+    def add_layer(self):
+        self.add_stop()
+        layer = Candidate(self.knowns)
+        self.layers.append(layer)
+        self.base = self.cache_layers()
+        self.knowns.pop()
+        self.reset_p()
+
+    def random_sample(self):
+        self.randomize_inputs()
+        self.populate_distances()
+        # self.compute_rings()
+        self.compute_expected_outputs()
+        self.compute_base_outputs()
+        return self.coherence(self.base_outputs)
+        # return self.ring_coherence(self.base_outputs)
+
+    def random_candidate(self):
+        indices = self.knowns[:]
+        np.copyto(self.p_temp, self.p)
+        self.p_temp[self.actual_N] = 0
+        total = np.sum(self.p_temp)
+        if total == 0:
+            return None
+        np.divide(self.p_temp, total, self.p_temp)
+        for _ in range(0, self.num_terms - len(self.knowns)):
+            index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
+            indices.append(index)
+            flip = index ^ 0b1
+            self.p_temp[index] = 0
+            self.p_temp[flip] = 0
+            for i in range(0, len(self.p_temp)):
+                if i not in indices:
+                    indices.append(i)
+                    stop_id = index_hash(indices)
+                    indices.pop()
+                    if stop_id in self.stops:
+                        self.p_temp[i] = 0.0
+            total = np.sum(self.p_temp)
+            if total == 0:
+                return None
+            np.divide(self.p_temp, total, self.p_temp)
+        return Candidate(indices)
+
+    def add_stop(self):
+        stop_id = index_hash(self.knowns)
+        self.stops.add(stop_id)
+
+    def update(self):
+        self.epoch += 1
+        base_coherence = self.random_sample()
+        self.max_coherences.fill(0)
+        for i in range(0, self.actual_N):
+            self.max_candidates[i] = None
+        visited = set()
+        has_candidate = False
+        # np.copyto(self.next_p, self.p)
+        for _ in range(0, self.num_candidates):
+            candidate = self.random_candidate()
+            if candidate is None:
+                continue
+            candidate_id = candidate.id()
+            if candidate_id in visited:
+                continue
+            visited.add(candidate_id)
+            if self.actual_N in candidate.indices:
+                continue
+            has_candidate = True
+            for i in range(0, len(self.inputs)):
+                self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
+            # coherence = self.ring_coherence()
+            coherence = self.coherence()
+            # if coherence <= base_coherence:
+            #     continue
+            # for index in candidate.indices:
+            #     self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
+                # self.p_temp[index] += 0
+            for index in candidate.indices:
+                if coherence > self.max_coherences[index]:
+                    self.max_coherences[index] = coherence
+                    self.max_candidates[index] = candidate
+                # self.max_coherences[index] = max(self.max_coherences[index], coherence)
+        # np.copyto(self.p, self.next_p)
+
+        # np.copyto(self.p_temp, self.p)
+        for i in range(0, self.actual_N):
+            candidate = self.max_candidates[i]
+            if candidate is None:
+                continue
+            for index in candidate.indices:
+                self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
+            # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
+        self.normalize_p()
+        # print(self.p)
+
+        # np.subtract(self.p_temp, self.p, self.p_temp)
+        # np.abs(self.p_temp, self.p_temp)
+        # delta = np.sum(self.p_temp) / len(self.p_temp)
+        # print(delta, np.argmax(self.p))
+        # np.copyto(self.p_temp, self.p)
+        # for i in range(0, len(self.p_temp)):
+        #     self.p_temp[i] = round(self.p_temp[i] * 100) / 100
+        # print(self.p_temp)
+
+        index = np.argmax(self.p)
+        delta_over_null = self.p[index] - self.p[self.actual_N]
+        if self.epoch == 0:
+            self.average_delta_over_null = delta_over_null
+        else:
+            self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
+        diff = self.num_terms - len(self.knowns)
+
+        print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
+
+        # Always iterate for a minimum number of epochs
+        if self.epoch < 15:
+            return
+        if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
+            return
+        if self.average_delta_over_null < 0.001:
+            index = self.actual_N
+        else:
+            index = np.argmax(self.p)
+
+        # index = np.argmax(self.p)
+        # if index == self.last_value:
+        #     self.rounds += 1
+        # else:
+        #     self.rounds = 0
+        #     self.last_value = index
+
+        # if self.rounds < 10 and self.epoch < 100:
+        #     return
+
+        # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
+        #     return
+
+        # index = np.argmax(self.p)
+        
+        # print(self.p)
+        # print(self.threshold())
+        # print(self.p)
+        # index = self.get_converged_index()
+        if not index is None or not has_candidate:
+            # print(index, delta, np.argmax(self.p))
+            self.epoch = 0
+            if index == self.actual_N or not has_candidate:
+                if len(self.knowns) > 0:
+                    self.add_stop()
+                    self.knowns.pop()
+                    print('Backtrack: ' + str(self.knowns))
+                    self.reset_p()
+                    return
+                self.num_terms += 1
+                self.knowns = []
+                self.stops = set()
+                self.reset_p()
+                print(self.num_terms)
+                return
+            self.knowns.append(index)
+            # bisect.insort(self.knowns, index)
+            if len(self.knowns) == self.num_terms:
+                print('Add layer: ' + str(self.knowns))
+                self.add_layer()
+            else:
+                print('Found term: ' + str(self.knowns))
+                self.reset_p()
+            print(base_coherence)
+            return
+
+    def cache_layers(self):
+        expr = 'def f(x):\n\tresult=0\n'
+        for layer in self.layers:
+            expr += '\tresult^=' + layer.eval_str() + '\n'
+        expr += '\treturn result\n'
+        scope = {}
+        exec(expr, scope)
+        return scope['f']
+
+def main():
+    probabilities = Probabilities()
+    while probabilities.num_terms <= probabilities.N:
+        probabilities.update()
+
+if __name__ == "__main__":
+    main()
--- a/mutations13.py
+++ b/mutations13.py
@ -0,0 +1,447 @@
+import bisect
+from email.mime import base
+import hashlib
+import math
+import numpy as np
+import random
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def index_hash(indices):
+    return ','.join([str(index) for index in sorted(indices)])
+
+class Candidate():
+    def __init__(self, indices):
+        self.indices = indices[:]
+        self.uplift = 0
+
+    def evaluate(self, x):
+        if len(x) in self.indices:
+            return 0
+        value = 1
+        for index in self.indices:
+            value *= x[index]
+        return value
+
+    def id(self):
+        return index_hash(self.indices)
+
+    def eval_str(self):
+        parts = []
+        for index in self.indices:
+            parts.append('x[' + str(index) + ']')
+        return '*'.join(parts)
+
+class Probabilities():
+    def __init__(self):
+        self.N = 8
+        self.actual_N = self.N * 2
+        self.num_terms = 1
+        self.num_candidates = 100
+        self.sample_size = 64
+        self.p = np.zeros((self.actual_N + 1,))
+        self.p_temp = np.empty_like(self.p)
+        self.next_p = np.empty_like(self.p)
+        self.knowns = []
+        self.stops = set()
+        self.reset_p()
+        self.epoch = 0
+
+        self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
+        self.distances = np.zeros((self.sample_size, self.sample_size))
+        self.xor_square = np.zeros((self.sample_size, self.sample_size))
+        self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
+        self.max_coherences = np.zeros((self.actual_N + 1))
+        self.max_candidates = [None for _ in range(0, self.actual_N)]
+
+        self.layers = []
+        self.base = None
+        self.rings = []
+
+        self.scratch = np.zeros((self.actual_N,))
+        
+        self.last_value = -1
+        self.rounds = 0
+        self.average_delta_over_null = 0
+
+        self.candidate_pool = []
+        self.candidate_ids = set()
+
+    def randomize_inputs(self):
+        for i in range(0, self.sample_size):
+            for j in range(0, self.N):
+                val = random.randint(0, 1)
+                self.inputs[i][j * 2] = val
+                self.inputs[i][j * 2 + 1] = val ^ 1
+
+    def populate_distances(self):
+        for i in range(0, len(self.inputs)):
+            x_a = self.inputs[i]
+            for j in range(0, len(self.inputs)):
+                if i == j:
+                    continue
+                x_b = self.inputs[j]
+                distance = hamming_distance(x_a, x_b, self.scratch)
+                self.distances[i][j] = 1.0 / (2 ** distance)
+
+    def compute_rings(self):
+        self.rings = []
+        for i in range(0, len(self.inputs)):
+            x_a = self.inputs[i]
+            min_distance = self.actual_N
+            indices = []
+            for j in range(0, len(self.inputs)):
+                if i == j:
+                    continue
+                x_b = self.inputs[j]
+                distance = hamming_distance(x_a, x_b, self.scratch)
+                if distance < min_distance:
+                    min_distance = distance
+                    indices = [j]
+                elif distance == min_distance:
+                    indices.append(j)
+            self.rings.append(indices)
+
+    def compute_expected_outputs(self):
+        for i in range(0, len(self.inputs)):
+            self.expected_outputs[i] = sha(self.inputs[i])
+
+    def compute_base_outputs(self):
+        if self.base is None:
+            self.base_outputs.fill(0)
+            return
+        for i in range(0, len(self.inputs)):
+            self.base_outputs[i] = self.base(self.inputs[i])
+
+    def coherence(self, outputs=None):
+        if outputs is None:
+            outputs = self.outputs
+        np.logical_xor(outputs, self.expected_outputs, self.output_xor)
+        coherences = []
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            numerator = 0
+            denominator = 0
+            for j in range(0, len(self.output_xor)):
+                if i == j:
+                    continue
+                y_b = self.output_xor[j]
+                weight = self.distances[i][j]
+                denominator += weight
+                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                    numerator += weight
+            coherence = numerator / denominator if denominator > 0 else 0
+            coherences.append(coherence)
+
+        return sum(coherences) / len(coherences)
+
+    def ring_coherence(self, outputs=None):
+        if outputs is None:
+            outputs = self.outputs
+        np.logical_xor(outputs, self.expected_outputs, self.output_xor)
+        total = 0
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            indices = self.rings[i]
+            coherence = sum([1 if self.output_xor[j] == y_a else 0 for j in indices]) / len(indices)
+            total += coherence
+        return total / len(self.output_xor)
+
+    def normalize_p(self):
+        check = self.knowns[:]
+        for i in range(0, len(self.p)):
+            if self.p[i] < 0:
+                self.p[i] = 0
+        for i in range(0, len(self.p)):
+            if i in self.knowns:
+                flip = i ^ 0b1
+                self.p[i] = 0.0
+                self.p[flip] = 0.0
+            else:
+                check.append(i)
+                stop_id = index_hash(check)
+                check.pop()
+                if stop_id in self.stops:
+                    self.p[i] = 0.0
+        total = np.sum(self.p)
+        if total > 0:
+            for i in range(0, len(self.p)):
+                self.p[i] = self.p[i] / total
+
+    def reset_p(self):
+        self.p.fill(1.0)
+        self.normalize_p()
+
+    def threshold(self):
+        # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
+        return 1.0 - (self.epoch / 100)
+
+    def get_converged_index(self):
+        for i in range(0, len(self.p)):
+            if self.p[i] > self.threshold():
+                return i
+        return None
+
+    def add_layer(self):
+        self.add_stop()
+        layer = Candidate(self.knowns)
+        self.layers.append(layer)
+        self.base = self.cache_layers()
+        self.knowns.pop()
+        self.reset_p()
+
+    def random_sample(self):
+        self.randomize_inputs()
+        self.populate_distances()
+        # self.compute_rings()
+        self.compute_expected_outputs()
+        self.compute_base_outputs()
+        return self.coherence(self.base_outputs)
+        # return self.ring_coherence(self.base_outputs)
+
+    def random_candidate(self):
+        indices = self.knowns[:]
+        np.copyto(self.p_temp, self.p)
+        self.p_temp[self.actual_N] = 0
+        total = np.sum(self.p_temp)
+        if total == 0:
+            return None
+        np.divide(self.p_temp, total, self.p_temp)
+        for _ in range(0, self.num_terms - len(self.knowns)):
+            index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
+            indices.append(index)
+            flip = index ^ 0b1
+            self.p_temp[index] = 0
+            self.p_temp[flip] = 0
+            for i in range(0, len(self.p_temp)):
+                if i not in indices:
+                    indices.append(i)
+                    stop_id = index_hash(indices)
+                    indices.pop()
+                    if stop_id in self.stops:
+                        self.p_temp[i] = 0.0
+            total = np.sum(self.p_temp)
+            if total == 0:
+                return None
+            np.divide(self.p_temp, total, self.p_temp)
+        return Candidate(indices)
+
+    def seed_candidate_pool(self):
+        for _ in range(0, self.num_candidates):
+            candidate = self.random_candidate()
+            if candidate is None:
+                continue
+            candidate_id = candidate.id()
+            if candidate_id in self.candidate_ids:
+                continue
+            self.candidate_pool.append(candidate)
+            self.candidate_ids.add(candidate_id)
+
+    def add_stop(self):
+        stop_id = index_hash(self.knowns)
+        self.stops.add(stop_id)
+
+    def update(self):
+        self.epoch += 1
+        base_coherence = self.random_sample()
+        self.seed_candidate_pool()
+        for candidate in self.candidate_pool:
+            for i in range(0, len(self.inputs)):
+                self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
+            coherence = self.coherence()
+            candidate.uplift += coherence - base_coherence
+        self.candidate_pool.sort(key=lambda x: x.uplift, reverse=True)
+        for drop_candidate in self.candidate_pool[self.num_candidates:]:
+            self.candidate_ids.remove(drop_candidate.id())
+        self.candidate_pool = self.candidate_pool[:self.num_candidates]
+        # print('======')
+        # print(self.epoch, base_coherence)
+        # print('======')
+
+        # if len(self.candidate_pool) == 0:
+        #     print(self.p)
+
+        # for i in range(0, min(5, len(self.candidate_pool))):
+        #     candidate = self.candidate_pool[i]
+        #     print(candidate.id(), candidate.uplift)
+
+        # if self.epoch < 15:
+        #     return
+        
+        if self.candidate_pool[0].uplift > 0.3:
+            candidate = self.candidate_pool[0]
+            candidate_id = candidate.id()
+            self.candidate_ids.remove(candidate_id)
+            print(candidate_id)
+            self.knowns = candidate.indices
+            self.add_layer()
+            self.knowns = []
+            self.reset_p()
+            self.epoch = 0
+            self.candidate_pool = []
+            self.candidate_ids = set()
+        elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
+            self.epoch = 0
+            self.num_terms += 1
+            self.candidate_pool = []
+            self.candidate_ids = set()
+            self.knowns = []
+            self.stops = set()
+            self.reset_p()
+        return
+
+        # np.copyto(self.next_p, self.p)
+        for _ in range(0, self.num_candidates):
+            candidate = self.random_candidate()
+            if candidate is None:
+                continue
+            candidate_id = candidate.id()
+            if candidate_id in visited:
+                continue
+            visited.add(candidate_id)
+            if self.actual_N in candidate.indices:
+                continue
+            has_candidate = True
+            for i in range(0, len(self.inputs)):
+                self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
+            # coherence = self.ring_coherence()
+            coherence = self.coherence()
+            # if coherence <= base_coherence:
+            #     continue
+            # for index in candidate.indices:
+            #     self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
+                # self.p_temp[index] += 0
+            for index in candidate.indices:
+                if coherence > self.max_coherences[index]:
+                    self.max_coherences[index] = coherence
+                    self.max_candidates[index] = candidate
+                # self.max_coherences[index] = max(self.max_coherences[index], coherence)
+        # np.copyto(self.p, self.next_p)
+
+        # np.copyto(self.p_temp, self.p)
+        for i in range(0, self.actual_N):
+            candidate = self.max_candidates[i]
+            if candidate is None:
+                continue
+            for index in candidate.indices:
+                self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
+            # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
+        self.normalize_p()
+        # print(self.p)
+
+        # np.subtract(self.p_temp, self.p, self.p_temp)
+        # np.abs(self.p_temp, self.p_temp)
+        # delta = np.sum(self.p_temp) / len(self.p_temp)
+        # print(delta, np.argmax(self.p))
+        # np.copyto(self.p_temp, self.p)
+        # for i in range(0, len(self.p_temp)):
+        #     self.p_temp[i] = round(self.p_temp[i] * 100) / 100
+        # print(self.p_temp)
+
+        index = np.argmax(self.p)
+        delta_over_null = self.p[index] - self.p[self.actual_N]
+        if self.epoch == 0:
+            self.average_delta_over_null = delta_over_null
+        else:
+            self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
+        diff = self.num_terms - len(self.knowns)
+
+        print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
+
+        # Always iterate for a minimum number of epochs
+        if self.epoch < 15:
+            return
+        if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
+            return
+        if self.average_delta_over_null < 0.001:
+            index = self.actual_N
+        else:
+            index = np.argmax(self.p)
+
+        # index = np.argmax(self.p)
+        # if index == self.last_value:
+        #     self.rounds += 1
+        # else:
+        #     self.rounds = 0
+        #     self.last_value = index
+
+        # if self.rounds < 10 and self.epoch < 100:
+        #     return
+
+        # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
+        #     return
+
+        # index = np.argmax(self.p)
+        
+        # print(self.p)
+        # print(self.threshold())
+        # print(self.p)
+        # index = self.get_converged_index()
+        if not index is None or not has_candidate:
+            # print(index, delta, np.argmax(self.p))
+            self.epoch = 0
+            if index == self.actual_N or not has_candidate:
+                if len(self.knowns) > 0:
+                    self.add_stop()
+                    self.knowns.pop()
+                    print('Backtrack: ' + str(self.knowns))
+                    self.reset_p()
+                    return
+                self.num_terms += 1
+                self.knowns = []
+                self.stops = set()
+                self.reset_p()
+                print(self.num_terms)
+                return
+            self.knowns.append(index)
+            # bisect.insort(self.knowns, index)
+            if len(self.knowns) == self.num_terms:
+                print('Add layer: ' + str(self.knowns))
+                self.add_layer()
+            else:
+                print('Found term: ' + str(self.knowns))
+                self.reset_p()
+            print(base_coherence)
+            return
+
+    def cache_layers(self):
+        expr = 'def f(x):\n\tresult=0\n'
+        for layer in self.layers:
+            expr += '\tresult^=' + layer.eval_str() + '\n'
+        expr += '\treturn result\n'
+        scope = {}
+        exec(expr, scope)
+        return scope['f']
+
+def main():
+    probabilities = Probabilities()
+    while probabilities.num_terms <= probabilities.N:
+        probabilities.update()
+
+if __name__ == "__main__":
+    main()
--- a/mutations14.py
+++ b/mutations14.py
@ -0,0 +1,549 @@
+import bisect
+from email.mime import base
+import hashlib
+import math
+import numpy as np
+import random
+
+from pkg_resources import get_distribution
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def index_hash(indices):
+    return ','.join([str(index) for index in sorted(indices)])
+
+def bin_div(a, b):
+    if a == 0 and b == 0:
+        return 2
+    if a == 1 and b == 0:
+        return -1
+    if a == 0 and b == 1:
+        return 0
+    return 1
+
+class Candidate():
+    def __init__(self, indices):
+        self.indices = indices[:]
+        self.uplift = 0
+
+    def evaluate(self, x):
+        if len(x) in self.indices:
+            return 0
+        value = 1
+        for index in self.indices:
+            value *= x[index]
+        return value
+
+    def id(self):
+        return index_hash(self.indices)
+
+    def eval_str(self):
+        parts = []
+        for index in self.indices:
+            parts.append('x[' + str(index) + ']')
+        return '*'.join(parts)
+
+class Probabilities():
+    def __init__(self):
+        self.N = 16
+        self.actual_N = self.N * 2
+        self.num_terms = 1
+        self.num_candidates = 100
+        self.sample_size = 64
+        self.p = np.zeros((self.actual_N + 1,))
+        self.p_temp = np.empty_like(self.p)
+        self.next_p = np.empty_like(self.p)
+        self.knowns = []
+        self.stops = set()
+        self.reset_p()
+        self.epoch = 0
+
+        self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
+        self.masked_distances = np.zeros((self.sample_size, self.sample_size))
+        self.distances = np.zeros((self.sample_size, self.sample_size))
+        self.xor_square = np.zeros((self.sample_size, self.sample_size))
+        self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
+        self.mask = np.zeros((self.sample_size))
+        self.numerators = np.zeros((self.sample_size))
+        self.denominators = np.zeros((self.sample_size))
+        self.coherences = np.zeros((self.sample_size))
+        self.max_coherences = np.zeros((self.actual_N + 1))
+        self.max_candidates = [None for _ in range(0, self.actual_N)]
+        self.uplifts = np.zeros((self.actual_N))
+        self.subspace_uplifts = np.zeros((self.actual_N))
+
+        self.layers = []
+        self.base = None
+
+        self.scratch = np.zeros((self.actual_N,))
+        
+        self.last_value = -1
+        self.rounds = 0
+        self.average_delta_over_null = 0
+        self.visited = set()
+
+        self.candidate_pool = []
+        self.candidate_ids = set()
+
+    def randomize_inputs(self):
+        for i in range(0, self.sample_size):
+            for j in range(0, self.N):
+                val = random.randint(0, 1)
+                self.inputs[i][j * 2] = val
+                self.inputs[i][j * 2 + 1] = val ^ 1
+
+    def populate_distances(self):
+        for i in range(0, len(self.inputs)):
+            x_a = self.inputs[i]
+            for j in range(0, len(self.inputs)):
+                if i == j:
+                    continue
+                x_b = self.inputs[j]
+                distance = hamming_distance(x_a, x_b, self.scratch)
+                self.distances[i][j] = 1.0 / (2 ** distance)
+
+    def compute_expected_outputs(self):
+        for i in range(0, len(self.inputs)):
+            self.expected_outputs[i] = sha(self.inputs[i])
+
+    def compute_base_outputs(self):
+        if self.base is None:
+            self.base_outputs.fill(0)
+            return
+        for i in range(0, len(self.inputs)):
+            self.base_outputs[i] = self.base(self.inputs[i])
+
+    def mat_coherence(self):
+        np.abs(self.output_xor, self.mask)
+        np.subtract(self.output_xor, self.mask, self.mask)
+        np.divide(self.mask, 2.0, self.mask)
+        np.add(1.0, self.mask, self.mask)
+        self.xor_square.fill(0)
+        np.copyto(self.masked_distances, self.distances)
+        masked_distances_t = self.masked_distances.transpose()
+        for i in range(0, len(self.xor_square)):
+            self.xor_square[i] = self.output_xor
+            np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i])
+            np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i])
+        np.sum(self.masked_distances, axis=0, out=self.denominators)
+        self.xor_square = self.xor_square.transpose()
+        np.logical_xor(self.xor_square, self.output_xor, self.xor_square)
+        np.multiply(self.xor_square, self.masked_distances, self.xor_square)
+        np.sum(self.xor_square, axis=0, out=self.numerators)
+        np.divide(self.numerators, self.denominators, self.coherences)
+        return 1.0 - np.nanmean(self.coherences)
+
+    def coherence(self, outputs=None):
+        if outputs is None:
+            outputs = self.outputs
+        np.logical_xor(outputs, self.expected_outputs, self.output_xor)
+        return self.mat_coherence()
+        coherences = []
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            numerator = 0
+            denominator = 0
+            for j in range(0, len(self.output_xor)):
+                if i == j:
+                    continue
+                y_b = self.output_xor[j]
+                weight = self.distances[i][j]
+                denominator += weight
+                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                    numerator += weight
+            coherence = numerator / denominator if denominator > 0 else 0
+            coherences.append(coherence)
+
+        raw_coherence = sum(coherences) / len(coherences)
+        check_coherence = self.mat_coherence()
+
+        return raw_coherence
+
+    def div_coherence(self):
+        coherences = []
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            if y_a < 0:
+                continue
+            numerator = 0
+            denominator = 0
+            for j in range(0, len(self.output_xor)):
+                if i == j:
+                    continue
+                y_b = self.output_xor[j]
+                if y_b < 0:
+                    continue
+                weight = self.distances[i][j]
+                denominator += weight
+                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                    numerator += weight
+                # if y_a < 0 or y_b < 0:
+                #     numerator += weight
+            coherence = numerator / denominator if denominator > 0 else 0
+            coherences.append(coherence)
+        if len(coherences) == 0:
+            return 1.0
+        return sum(coherences) / len(coherences)
+
+    def normalize_p(self):
+        check = self.knowns[:]
+        for i in range(0, len(self.p)):
+            if self.p[i] < 0:
+                self.p[i] = 0
+        for i in range(0, len(self.p)):
+            if i in self.knowns:
+                flip = i ^ 0b1
+                self.p[i] = 0.0
+                self.p[flip] = 0.0
+            else:
+                check.append(i)
+                stop_id = index_hash(check)
+                check.pop()
+                if stop_id in self.stops:
+                    self.p[i] = 0.0
+        total = np.sum(self.p)
+        if total > 0:
+            for i in range(0, len(self.p)):
+                self.p[i] = self.p[i] / total
+
+    def reset_p(self):
+        self.p.fill(1.0)
+        self.normalize_p()
+
+    def threshold(self):
+        # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
+        return 1.0 - (self.epoch / 100)
+
+    def get_converged_index(self):
+        for i in range(0, len(self.p)):
+            if self.p[i] > self.threshold():
+                return i
+        return None
+
+    def add_layer(self):
+        self.add_stop()
+        layer = Candidate(self.knowns)
+        self.layers.append(layer)
+        self.base = self.cache_layers()
+        self.knowns.pop()
+        self.reset_p()
+
+    def random_sample(self):
+        self.randomize_inputs()
+        self.populate_distances()
+        self.compute_expected_outputs()
+        self.compute_base_outputs()
+        return self.coherence(self.base_outputs)
+
+    def random_candidate(self):
+        indices = self.knowns[:]
+        np.copyto(self.p_temp, self.p)
+        self.p_temp[self.actual_N] = 0
+        total = np.sum(self.p_temp)
+        if total == 0:
+            return None
+        np.divide(self.p_temp, total, self.p_temp)
+        for _ in range(0, self.num_terms - len(self.knowns)):
+            index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
+            indices.append(index)
+            flip = index ^ 0b1
+            self.p_temp[index] = 0
+            self.p_temp[flip] = 0
+            for i in range(0, len(self.p_temp)):
+                if i not in indices:
+                    indices.append(i)
+                    stop_id = index_hash(indices)
+                    indices.pop()
+                    if stop_id in self.stops:
+                        self.p_temp[i] = 0.0
+            total = np.sum(self.p_temp)
+            if total == 0:
+                return None
+            np.divide(self.p_temp, total, self.p_temp)
+        return Candidate(indices)
+
+    def seed_candidate_pool(self):
+        for _ in range(0, self.num_candidates):
+            candidate = self.random_candidate()
+            if candidate is None:
+                continue
+            candidate_id = candidate.id()
+            if candidate_id in self.candidate_ids:
+                continue
+            self.candidate_pool.append(candidate)
+            self.candidate_ids.add(candidate_id)
+
+    def add_stop(self):
+        stop_id = index_hash(self.knowns)
+        self.stops.add(stop_id)
+
+    def get_distribution(self, candidate, half = 1):
+        count = 0
+        for i in range(0, len(self.inputs)):
+            value = candidate.evaluate(self.inputs[i])
+            if value == half:
+                self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i]
+                count += 1
+            else:
+                self.output_xor[i] = -1
+        return (count, self.mat_coherence())
+
+    def update(self):
+        self.epoch += 1
+        base_coherence = self.random_sample()
+        candidate = Candidate(self.knowns[:])
+
+        index = -1
+        subspace_index = -1
+        bar = 1.0 - (self.epoch / 1000.0)
+        for i in range(0, self.actual_N):
+            if i in self.knowns:
+                continue
+            candidate.indices.append(i)
+            (count_0, subspace_coherence_0) = self.get_distribution(candidate, 0)
+            # (_, subspace_coherence) = self.get_distribution(candidate, 0)
+            # subspace_coherence = subspace_coherence_0 * count_0 / (count_0 + count_1) + subspace_coherence_1 * count_1 / (count_0 + count_1)
+            # subspace_coherence = subspace_coherence_0
+            # delta = (subspace_coherence_0 - base_coherence) * count_0 / (count_0 + count_1) + (subspace_coherence_1 - base_coherence) * count_1 / (count_0 + count_1)
+            delta = (subspace_coherence_0 - base_coherence) * count_0 / len(self.inputs)
+            self.subspace_uplifts[i] += delta
+            if self.subspace_uplifts[i] > bar:
+                if subspace_index < 0 or self.subspace_uplifts[i] > self.subspace_uplifts[subspace_index]:
+                    subspace_index = i
+
+            if index_hash(candidate.indices) not in self.stops:
+                for j in range(0, len(self.inputs)):
+                    self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j])
+                coherence = self.coherence()
+                delta = coherence - base_coherence
+                # self.uplifts[i] = 0.9 * self.uplifts[i] + 0.1 * coherence
+                self.uplifts[i] += delta
+                if self.uplifts[i] > bar:
+                    if index < 0 or self.uplifts[i] > self.uplifts[index]:
+                        index = i
+            candidate.indices.pop()
+
+        # print('=====' + str(base_coherence))
+        # print(self.uplifts)
+        # print(self.subspace_uplifts)
+
+        if index >= 0:
+            self.knowns.append(index)
+            print(base_coherence)
+            print(self.knowns, bar)
+            # print(self.uplifts)
+            # print(self.subspace_uplifts)
+            self.add_layer()
+            self.knowns = []
+            self.uplifts.fill(0)
+            self.subspace_uplifts.fill(0)
+            self.epoch = 0
+            return
+
+        if subspace_index >= 0:
+            self.knowns.append(subspace_index)
+            print(self.knowns, bar)
+            # print(self.uplifts)
+            # print(self.subspace_uplifts)
+            self.uplifts.fill(0)
+            self.subspace_uplifts.fill(0)
+            self.epoch = 0
+        return
+
+        # print('======')
+        # print(self.epoch, base_coherence)
+        # print('======')
+
+        # if len(self.candidate_pool) == 0:
+        #     print(self.p)
+
+        # for i in range(0, min(5, len(self.candidate_pool))):
+        #     candidate = self.candidate_pool[i]
+        #     print(candidate.id(), candidate.uplift)
+
+        # if self.epoch < 15:
+        #     return
+        
+        if self.candidate_pool[0].uplift > 0.3:
+            candidate = self.candidate_pool[0]
+            candidate_id = candidate.id()
+            self.candidate_ids.remove(candidate_id)
+            print(candidate_id)
+            self.knowns = candidate.indices
+            self.add_layer()
+            self.knowns = []
+            self.reset_p()
+            self.epoch = 0
+            self.candidate_pool = []
+            self.candidate_ids = set()
+        elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
+            self.epoch = 0
+            self.num_terms += 1
+            self.candidate_pool = []
+            self.candidate_ids = set()
+            self.knowns = []
+            self.stops = set()
+            self.reset_p()
+        return
+
+        # np.copyto(self.next_p, self.p)
+        for _ in range(0, self.num_candidates):
+            candidate = self.random_candidate()
+            if candidate is None:
+                continue
+            candidate_id = candidate.id()
+            if candidate_id in visited:
+                continue
+            visited.add(candidate_id)
+            if self.actual_N in candidate.indices:
+                continue
+            has_candidate = True
+            for i in range(0, len(self.inputs)):
+                self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
+            # coherence = self.ring_coherence()
+            coherence = self.coherence()
+            # if coherence <= base_coherence:
+            #     continue
+            # for index in candidate.indices:
+            #     self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
+                # self.p_temp[index] += 0
+            for index in candidate.indices:
+                if coherence > self.max_coherences[index]:
+                    self.max_coherences[index] = coherence
+                    self.max_candidates[index] = candidate
+                # self.max_coherences[index] = max(self.max_coherences[index], coherence)
+        # np.copyto(self.p, self.next_p)
+
+        # np.copyto(self.p_temp, self.p)
+        for i in range(0, self.actual_N):
+            candidate = self.max_candidates[i]
+            if candidate is None:
+                continue
+            for index in candidate.indices:
+                self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
+            # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
+        self.normalize_p()
+        # print(self.p)
+
+        # np.subtract(self.p_temp, self.p, self.p_temp)
+        # np.abs(self.p_temp, self.p_temp)
+        # delta = np.sum(self.p_temp) / len(self.p_temp)
+        # print(delta, np.argmax(self.p))
+        # np.copyto(self.p_temp, self.p)
+        # for i in range(0, len(self.p_temp)):
+        #     self.p_temp[i] = round(self.p_temp[i] * 100) / 100
+        # print(self.p_temp)
+
+        index = np.argmax(self.p)
+        delta_over_null = self.p[index] - self.p[self.actual_N]
+        if self.epoch == 0:
+            self.average_delta_over_null = delta_over_null
+        else:
+            self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
+        diff = self.num_terms - len(self.knowns)
+
+        print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
+
+        # Always iterate for a minimum number of epochs
+        if self.epoch < 15:
+            return
+        if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
+            return
+        if self.average_delta_over_null < 0.001:
+            index = self.actual_N
+        else:
+            index = np.argmax(self.p)
+
+        # index = np.argmax(self.p)
+        # if index == self.last_value:
+        #     self.rounds += 1
+        # else:
+        #     self.rounds = 0
+        #     self.last_value = index
+
+        # if self.rounds < 10 and self.epoch < 100:
+        #     return
+
+        # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
+        #     return
+
+        # index = np.argmax(self.p)
+        
+        # print(self.p)
+        # print(self.threshold())
+        # print(self.p)
+        # index = self.get_converged_index()
+        if not index is None or not has_candidate:
+            # print(index, delta, np.argmax(self.p))
+            self.epoch = 0
+            if index == self.actual_N or not has_candidate:
+                if len(self.knowns) > 0:
+                    self.add_stop()
+                    self.knowns.pop()
+                    print('Backtrack: ' + str(self.knowns))
+                    self.reset_p()
+                    return
+                self.num_terms += 1
+                self.knowns = []
+                self.stops = set()
+                self.reset_p()
+                print(self.num_terms)
+                return
+            self.knowns.append(index)
+            # bisect.insort(self.knowns, index)
+            if len(self.knowns) == self.num_terms:
+                print('Add layer: ' + str(self.knowns))
+                self.add_layer()
+            else:
+                print('Found term: ' + str(self.knowns))
+                self.reset_p()
+            print(base_coherence)
+            return
+
+    def cache_layers(self):
+        expr = 'def f(x):\n\tresult=0\n'
+        for layer in self.layers:
+            expr += '\tresult^=' + layer.eval_str() + '\n'
+        expr += '\treturn result\n'
+        scope = {}
+        exec(expr, scope)
+        return scope['f']
+
+def main():
+    probabilities = Probabilities()
+    # probabilities.knowns = [14]
+    # probabilities.add_layer()
+    # probabilities.knowns = [8]
+    # probabilities.add_layer()
+    # probabilities.knowns = [4]
+    # probabilities.add_layer()
+    while probabilities.num_terms <= probabilities.N:
+        probabilities.update()
+
+if __name__ == "__main__":
+    main()
--- a/mutations15.py
+++ b/mutations15.py
@ -0,0 +1,628 @@
+import bisect
+from email.mime import base
+import hashlib
+import math
+import numpy as np
+import random
+import statistics
+
+from pkg_resources import get_distribution
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def index_hash(indices):
+    return ','.join([str(index) for index in sorted(indices)])
+
+def bin_div(a, b):
+    if a == 0 and b == 0:
+        return 2
+    if a == 1 and b == 0:
+        return -1
+    if a == 0 and b == 1:
+        return 0
+    return 1
+
+class Candidate():
+    def __init__(self, indices):
+        self.indices = indices[:]
+        self.uplift = 0
+
+    def evaluate(self, x):
+        if len(x) in self.indices:
+            return 0
+        value = 1
+        for index in self.indices:
+            value *= x[index]
+        return value
+
+    def id(self):
+        return index_hash(self.indices)
+
+    def eval_str(self):
+        parts = []
+        for index in self.indices:
+            parts.append('x[' + str(index) + ']')
+        return '*'.join(parts)
+
+class Probabilities():
+    def __init__(self):
+        self.N = 8
+        self.actual_N = self.N * 2
+        self.num_terms = 1
+        self.num_candidates = 100
+        self.sample_size = 64
+        self.p = np.zeros((self.actual_N + 1,))
+        self.p_temp = np.empty_like(self.p)
+        self.next_p = np.empty_like(self.p)
+        self.knowns = []
+        self.stops = set()
+        self.reset_p()
+        self.epoch = 0
+
+        self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
+        self.masked_distances = np.zeros((self.sample_size, self.sample_size))
+        self.distances = np.zeros((self.sample_size, self.sample_size))
+        self.xor_square = np.zeros((self.sample_size, self.sample_size))
+        self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
+        self.mask = np.zeros((self.sample_size))
+        self.numerators = np.zeros((self.sample_size))
+        self.denominators = np.zeros((self.sample_size))
+        self.coherences = np.zeros((self.sample_size))
+        self.max_coherences = np.zeros((self.actual_N + 1))
+        self.max_candidates = [None for _ in range(0, self.actual_N)]
+        self.uplifts = np.zeros((self.actual_N))
+        self.uplift_means = np.zeros((self.actual_N))
+        self.uplift_medians = np.zeros((self.actual_N))
+        self.uplift_convergences = np.zeros((self.actual_N))
+        self.uplift_samples = [[] for _ in range(0, self.actual_N)]
+        self.subspace_uplifts = np.zeros((self.actual_N))
+        self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)]
+        self.uplift_stddevs = np.zeros((self.actual_N))
+
+        self.layers = []
+        self.base = None
+
+        self.scratch = np.zeros((self.actual_N,))
+        
+        self.last_value = -1
+        self.rounds = 0
+        self.average_delta_over_null = 0
+        self.visited = set()
+
+        self.candidate_pool = []
+        self.candidate_ids = set()
+        self.has_added_layer = False
+
+    def randomize_inputs(self):
+        for i in range(0, self.sample_size):
+            for j in range(0, self.N):
+                val = random.randint(0, 1)
+                self.inputs[i][j * 2] = val
+                self.inputs[i][j * 2 + 1] = val ^ 1
+
+    def populate_distances(self):
+        for i in range(0, len(self.inputs)):
+            x_a = self.inputs[i]
+            for j in range(0, len(self.inputs)):
+                if i == j:
+                    continue
+                x_b = self.inputs[j]
+                distance = hamming_distance(x_a, x_b, self.scratch)
+                self.distances[i][j] = 1.0 / (2 ** distance)
+
+    def compute_expected_outputs(self):
+        for i in range(0, len(self.inputs)):
+            self.expected_outputs[i] = sha(self.inputs[i])
+
+    def compute_base_outputs(self):
+        if self.base is None:
+            self.base_outputs.fill(0)
+            return
+        for i in range(0, len(self.inputs)):
+            self.base_outputs[i] = self.base(self.inputs[i])
+
+    def mat_coherence(self):
+        np.abs(self.output_xor, self.mask)
+        np.subtract(self.output_xor, self.mask, self.mask)
+        np.divide(self.mask, 2.0, self.mask)
+        np.add(1.0, self.mask, self.mask)
+        self.xor_square.fill(0)
+        np.copyto(self.masked_distances, self.distances)
+        masked_distances_t = self.masked_distances.transpose()
+        for i in range(0, len(self.xor_square)):
+            self.xor_square[i] = self.output_xor
+            np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i])
+            np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i])
+        np.sum(self.masked_distances, axis=0, out=self.denominators)
+        self.xor_square = self.xor_square.transpose()
+        np.logical_xor(self.xor_square, self.output_xor, self.xor_square)
+        np.multiply(self.xor_square, self.masked_distances, self.xor_square)
+        np.sum(self.xor_square, axis=0, out=self.numerators)
+        np.divide(self.numerators, self.denominators, self.coherences)
+        return 1.0 - np.nanmean(self.coherences)
+
+    def coherence(self, outputs=None):
+        if outputs is None:
+            outputs = self.outputs
+        np.logical_xor(outputs, self.expected_outputs, self.output_xor)
+        return self.mat_coherence()
+        coherences = []
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            numerator = 0
+            denominator = 0
+            for j in range(0, len(self.output_xor)):
+                if i == j:
+                    continue
+                y_b = self.output_xor[j]
+                weight = self.distances[i][j]
+                denominator += weight
+                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                    numerator += weight
+            coherence = numerator / denominator if denominator > 0 else 0
+            coherences.append(coherence)
+
+        raw_coherence = sum(coherences) / len(coherences)
+        check_coherence = self.mat_coherence()
+
+        return raw_coherence
+
+    def div_coherence(self):
+        coherences = []
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            if y_a < 0:
+                continue
+            numerator = 0
+            denominator = 0
+            for j in range(0, len(self.output_xor)):
+                if i == j:
+                    continue
+                y_b = self.output_xor[j]
+                if y_b < 0:
+                    continue
+                weight = self.distances[i][j]
+                denominator += weight
+                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                    numerator += weight
+                # if y_a < 0 or y_b < 0:
+                #     numerator += weight
+            coherence = numerator / denominator if denominator > 0 else 0
+            coherences.append(coherence)
+        if len(coherences) == 0:
+            return 1.0
+        return sum(coherences) / len(coherences)
+
+    def normalize_p(self):
+        check = self.knowns[:]
+        for i in range(0, len(self.p)):
+            if self.p[i] < 0:
+                self.p[i] = 0
+        for i in range(0, len(self.p)):
+            if i in self.knowns:
+                flip = i ^ 0b1
+                self.p[i] = 0.0
+                self.p[flip] = 0.0
+            else:
+                check.append(i)
+                stop_id = index_hash(check)
+                check.pop()
+                if stop_id in self.stops:
+                    self.p[i] = 0.0
+        total = np.sum(self.p)
+        if total > 0:
+            for i in range(0, len(self.p)):
+                self.p[i] = self.p[i] / total
+
+    def reset_p(self):
+        self.p.fill(1.0)
+        self.normalize_p()
+
+    def threshold(self):
+        # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
+        return 1.0 - (self.epoch / 1000)
+
+    def get_converged_index(self):
+        for i in range(0, len(self.p)):
+            if self.p[i] > self.threshold():
+                return i
+        return None
+
+    def add_layer(self):
+        self.has_added_layer = True
+        self.add_stop()
+        layer = Candidate(self.knowns)
+        self.layers.append(layer)
+        self.base = self.cache_layers()
+        self.knowns.pop()
+        self.reset_p()
+
+    def random_sample(self):
+        self.randomize_inputs()
+        self.populate_distances()
+        self.compute_expected_outputs()
+        self.compute_base_outputs()
+        return self.coherence(self.base_outputs)
+
+    def random_candidate(self):
+        indices = self.knowns[:]
+        np.copyto(self.p_temp, self.p)
+        self.p_temp[self.actual_N] = 0
+        total = np.sum(self.p_temp)
+        if total == 0:
+            return None
+        np.divide(self.p_temp, total, self.p_temp)
+        for _ in range(0, self.num_terms - len(self.knowns)):
+            index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
+            indices.append(index)
+            flip = index ^ 0b1
+            self.p_temp[index] = 0
+            self.p_temp[flip] = 0
+            for i in range(0, len(self.p_temp)):
+                if i not in indices:
+                    indices.append(i)
+                    stop_id = index_hash(indices)
+                    indices.pop()
+                    if stop_id in self.stops:
+                        self.p_temp[i] = 0.0
+            total = np.sum(self.p_temp)
+            if total == 0:
+                return None
+            np.divide(self.p_temp, total, self.p_temp)
+        return Candidate(indices)
+
+    def seed_candidate_pool(self):
+        for _ in range(0, self.num_candidates):
+            candidate = self.random_candidate()
+            if candidate is None:
+                continue
+            candidate_id = candidate.id()
+            if candidate_id in self.candidate_ids:
+                continue
+            self.candidate_pool.append(candidate)
+            self.candidate_ids.add(candidate_id)
+
+    def add_stop(self):
+        stop_id = index_hash(self.knowns)
+        self.stops.add(stop_id)
+
+    def get_distribution(self, candidate, half = 1):
+        count = 0
+        for i in range(0, len(self.inputs)):
+            value = candidate.evaluate(self.inputs[i])
+            if value == half:
+                self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i]
+                count += 1
+            else:
+                self.output_xor[i] = -1
+        return (count, self.mat_coherence())
+
+    def update(self):
+        self.epoch += 1
+
+        index = -1
+        subspace_index = -1
+        # bar = 1.0 - (self.epoch / 10000.0)
+        if self.epoch >= 200:
+            self.uplifts.fill(0)
+            self.subspace_uplifts.fill(0)
+            self.uplift_samples = [[] for _ in range(0, self.actual_N)]
+            self.epoch = 0
+            # if len(self.knowns) > 0 and not self.has_added_layer:
+            #     self.add_stop()
+            #     self.knowns.pop()
+            self.has_added_layer = False
+            if len(self.knowns) == 0:
+                self.num_terms += 1
+                self.stops = set()
+            else:
+                self.add_stop()
+                self.knowns.pop()
+            self.update()
+            return
+
+        base_coherence = self.random_sample()
+        candidate = Candidate(self.knowns[:])
+
+        for i in range(0, self.actual_N):
+            # if i in self.knowns:
+            #     continue
+            candidate.indices.append(i)
+            try:
+                if i in self.knowns:
+                    continue
+                if index_hash(candidate.indices) in self.stops:
+                    continue
+
+                if len(candidate.indices) < self.num_terms:
+                    (count_0, subspace_coherence_0) = self.get_distribution(candidate, 0)
+                    delta_0 = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size
+                    (count_1, subspace_coherence_1) = self.get_distribution(candidate, 1)
+                    delta_1 = (subspace_coherence_1 - base_coherence) * count_1 / self.sample_size
+                    self.uplift_samples[i].append(delta_0)
+                    self.uplift_samples[i].append(delta_1)
+                    mean = statistics.mean(self.uplift_samples[i])
+                    median = statistics.median(self.uplift_samples[i])
+                    self.uplift_convergences[i] = abs(self.uplift_medians[i] - median)
+                    self.uplift_means[i] = mean
+                    self.uplift_medians[i] = median
+                    if self.epoch > 20 and self.uplift_convergences[i] < 1e-5 and self.uplift_medians[i] > 0:
+                        if subspace_index < 0 or self.uplift_medians[i] > self.uplift_medians[subspace_index]:
+                            subspace_index = i
+                    # if self.uplift_convergences[i] < 1e-6 and self.uplift_means[i] > 0:
+                    #     if subspace_index < 0 or self.uplift_means[i] > self.uplift_means[subspace_index]:
+                    #         subspace_index = i
+                    # self.subspace_uplifts[i] += delta
+                    # if self.subspace_uplifts[i] > bar:
+                    #     if subspace_index < 0 or self.subspace_uplifts[i] > self.subspace_uplifts[subspace_index]:
+                    #         subspace_index = i
+                else:
+                    for j in range(0, len(self.inputs)):
+                        self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j])
+                    coherence = self.coherence()
+                    delta = coherence - base_coherence
+                    self.uplift_samples[i].append(delta)
+                    self.uplift_ranges[i][0] = max(self.uplift_samples[i])
+                    self.uplift_ranges[i][1] = min(self.uplift_samples[i])
+                    mean = statistics.mean(self.uplift_samples[i])
+                    median = statistics.median(self.uplift_samples[i])
+                    if len(self.uplift_samples[i]) >= 2:
+                        stddev = statistics.stdev(self.uplift_samples[i])
+                        self.uplift_stddevs[i] = stddev
+                    self.uplift_convergences[i] = abs(self.uplift_medians[i] - median)
+                    self.uplift_means[i] = mean
+                    self.uplift_medians[i] = median
+                    # self.uplifts[i] = 0.9 * self.uplifts[i] + 0.1 * coherence
+                    self.uplifts[i] += delta
+                    middle = self.uplift_ranges[i][1] + (self.uplift_ranges[i][0] - self.uplift_ranges[i][1]) / 2
+
+                    if self.epoch > 20 and self.uplift_convergences[i] < 1e-5 and self.uplift_medians[i] > 0:
+                        if index < 0 or self.uplift_medians[i] > self.uplift_medians[index]:
+                            index = i
+                    # if self.epoch > 100 and max(self.uplift_samples[i]) + min(self.uplift_samples[i]) > 0.01:
+                    #     if index < 0 or max(self.uplift_samples[i]) + min(self.uplift_samples[i]) > max(self.uplift_samples[index]) + min(self.uplift_samples[index]):
+                    #         index = i
+                    # if self.uplift_convergences[i] < 1e-6 and self.uplift_means[i] > 0:
+                    #     if index < 0 or self.uplift_means[i] > self.uplift_means[index]:
+                    #         index = i
+                    # if self.uplifts[i] > bar:
+                    #     if index < 0 or self.uplifts[i] > self.uplifts[index]:
+                    #         index = i
+            finally:
+                candidate.indices.pop()
+
+        # print('=====' + str(base_coherence))
+        # print(self.uplifts)
+        # print(self.uplift_means)
+        # print(self.uplift_medians)
+        # print(self.uplift_stddevs)
+        # print(self.uplift_ranges)
+        # print(self.uplift_convergences)
+        # print(self.subspace_uplifts)
+
+        if index >= 0:
+            self.knowns.append(index)
+            print(base_coherence)
+            print(self.knowns, self.epoch)
+            # print(self.uplift_medians)
+            # print(self.uplifts)
+            # print(self.subspace_uplifts)
+            self.add_layer()
+            self.uplifts.fill(0)
+            self.subspace_uplifts.fill(0)
+            self.uplift_medians.fill(0)
+            self.uplift_convergences.fill(0)
+            self.uplift_samples = [[] for _ in range(0, self.actual_N)]
+            self.epoch = 0
+            return
+
+        if subspace_index >= 0:
+            self.knowns.append(subspace_index)
+            print(self.knowns, self.epoch)
+            # print(self.uplifts)
+            # print(self.subspace_uplifts)
+            self.uplifts.fill(0)
+            self.subspace_uplifts.fill(0)
+            self.uplift_medians.fill(0)
+            self.uplift_convergences.fill(0)
+            self.uplift_samples = [[] for _ in range(0, self.actual_N)]
+            self.epoch = 0
+        return
+
+        # print('======')
+        # print(self.epoch, base_coherence)
+        # print('======')
+
+        # if len(self.candidate_pool) == 0:
+        #     print(self.p)
+
+        # for i in range(0, min(5, len(self.candidate_pool))):
+        #     candidate = self.candidate_pool[i]
+        #     print(candidate.id(), candidate.uplift)
+
+        # if self.epoch < 15:
+        #     return
+        
+        if self.candidate_pool[0].uplift > 0.3:
+            candidate = self.candidate_pool[0]
+            candidate_id = candidate.id()
+            self.candidate_ids.remove(candidate_id)
+            print(candidate_id)
+            self.knowns = candidate.indices
+            self.add_layer()
+            self.knowns = []
+            self.reset_p()
+            self.epoch = 0
+            self.candidate_pool = []
+            self.candidate_ids = set()
+        elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
+            self.epoch = 0
+            self.num_terms += 1
+            self.candidate_pool = []
+            self.candidate_ids = set()
+            self.knowns = []
+            self.stops = set()
+            self.reset_p()
+        return
+
+        # np.copyto(self.next_p, self.p)
+        for _ in range(0, self.num_candidates):
+            candidate = self.random_candidate()
+            if candidate is None:
+                continue
+            candidate_id = candidate.id()
+            if candidate_id in visited:
+                continue
+            visited.add(candidate_id)
+            if self.actual_N in candidate.indices:
+                continue
+            has_candidate = True
+            for i in range(0, len(self.inputs)):
+                self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
+            # coherence = self.ring_coherence()
+            coherence = self.coherence()
+            # if coherence <= base_coherence:
+            #     continue
+            # for index in candidate.indices:
+            #     self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
+                # self.p_temp[index] += 0
+            for index in candidate.indices:
+                if coherence > self.max_coherences[index]:
+                    self.max_coherences[index] = coherence
+                    self.max_candidates[index] = candidate
+                # self.max_coherences[index] = max(self.max_coherences[index], coherence)
+        # np.copyto(self.p, self.next_p)
+
+        # np.copyto(self.p_temp, self.p)
+        for i in range(0, self.actual_N):
+            candidate = self.max_candidates[i]
+            if candidate is None:
+                continue
+            for index in candidate.indices:
+                self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
+            # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
+        self.normalize_p()
+        # print(self.p)
+
+        # np.subtract(self.p_temp, self.p, self.p_temp)
+        # np.abs(self.p_temp, self.p_temp)
+        # delta = np.sum(self.p_temp) / len(self.p_temp)
+        # print(delta, np.argmax(self.p))
+        # np.copyto(self.p_temp, self.p)
+        # for i in range(0, len(self.p_temp)):
+        #     self.p_temp[i] = round(self.p_temp[i] * 100) / 100
+        # print(self.p_temp)
+
+        index = np.argmax(self.p)
+        delta_over_null = self.p[index] - self.p[self.actual_N]
+        if self.epoch == 0:
+            self.average_delta_over_null = delta_over_null
+        else:
+            self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
+        diff = self.num_terms - len(self.knowns)
+
+        print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
+
+        # Always iterate for a minimum number of epochs
+        if self.epoch < 15:
+            return
+        if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
+            return
+        if self.average_delta_over_null < 0.001:
+            index = self.actual_N
+        else:
+            index = np.argmax(self.p)
+
+        # index = np.argmax(self.p)
+        # if index == self.last_value:
+        #     self.rounds += 1
+        # else:
+        #     self.rounds = 0
+        #     self.last_value = index
+
+        # if self.rounds < 10 and self.epoch < 100:
+        #     return
+
+        # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
+        #     return
+
+        # index = np.argmax(self.p)
+        
+        # print(self.p)
+        # print(self.threshold())
+        # print(self.p)
+        # index = self.get_converged_index()
+        if not index is None or not has_candidate:
+            # print(index, delta, np.argmax(self.p))
+            self.epoch = 0
+            if index == self.actual_N or not has_candidate:
+                if len(self.knowns) > 0:
+                    self.add_stop()
+                    self.knowns.pop()
+                    print('Backtrack: ' + str(self.knowns))
+                    self.reset_p()
+                    return
+                self.num_terms += 1
+                self.knowns = []
+                self.stops = set()
+                self.reset_p()
+                print(self.num_terms)
+                return
+            self.knowns.append(index)
+            # bisect.insort(self.knowns, index)
+            if len(self.knowns) == self.num_terms:
+                print('Add layer: ' + str(self.knowns))
+                self.add_layer()
+            else:
+                print('Found term: ' + str(self.knowns))
+                self.reset_p()
+            print(base_coherence)
+            return
+
+    def cache_layers(self):
+        expr = 'def f(x):\n\tresult=0\n'
+        for layer in self.layers:
+            expr += '\tresult^=' + layer.eval_str() + '\n'
+        expr += '\treturn result\n'
+        scope = {}
+        exec(expr, scope)
+        return scope['f']
+
+def main():
+    probabilities = Probabilities()
+    # probabilities.knowns = [14]
+    # probabilities.add_layer()
+    # probabilities.knowns = [8]
+    # probabilities.add_layer()
+    # probabilities.knowns = [4]
+    # probabilities.add_layer()
+    while probabilities.num_terms <= probabilities.N:
+        probabilities.update()
+
+if __name__ == "__main__":
+    main()
--- a/mutations16.py
+++ b/mutations16.py
@ -0,0 +1,663 @@
+import bisect
+from cmath import isnan
+from email.mime import base
+import matplotlib.pyplot as plt
+import hashlib
+import math
+import numpy as np
+import random
+import statistics
+
+from pkg_resources import get_distribution
+from scipy import stats
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def index_hash(indices):
+    return ','.join([str(index) for index in sorted(indices)])
+
+def bin_div(a, b):
+    if a == 0 and b == 0:
+        return 2
+    if a == 1 and b == 0:
+        return -1
+    if a == 0 and b == 1:
+        return 0
+    return 1
+
+class Candidate():
+    def __init__(self, indices):
+        self.indices = indices[:]
+        self.uplift = 0
+
+    def evaluate(self, x):
+        if len(x) in self.indices:
+            return 0
+        value = 1
+        for index in self.indices:
+            value *= x[index]
+        return value
+
+    def id(self):
+        return index_hash(self.indices)
+
+    def eval_str(self):
+        parts = []
+        for index in self.indices:
+            parts.append('x[' + str(index) + ']')
+        return '*'.join(parts)
+
+class Probabilities():
+    def __init__(self):
+        self.N = 16
+        self.actual_N = self.N * 2
+        self.num_terms = 1
+        self.num_candidates = 100
+        self.sample_size = 64
+        self.p = np.zeros((self.actual_N + 1,))
+        self.p_temp = np.empty_like(self.p)
+        self.next_p = np.empty_like(self.p)
+        self.knowns = []
+        self.stops = set()
+        self.reset_p()
+        self.epoch = 0
+
+        self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
+        self.masked_distances = np.zeros((self.sample_size, self.sample_size))
+        self.distances = np.zeros((self.sample_size, self.sample_size))
+        self.xor_square = np.zeros((self.sample_size, self.sample_size))
+        self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
+        self.mask = np.zeros((self.sample_size))
+        self.numerators = np.zeros((self.sample_size))
+        self.denominators = np.zeros((self.sample_size))
+        self.coherences = np.zeros((self.sample_size))
+        self.max_coherences = np.zeros((self.actual_N + 1))
+        self.max_candidates = [None for _ in range(0, self.actual_N)]
+        self.uplifts = np.zeros((self.actual_N))
+        self.uplift_means = np.zeros((self.actual_N))
+        self.uplift_medians = np.zeros((self.actual_N))
+        self.uplift_convergences = np.zeros((self.actual_N))
+        self.uplift_samples = [[] for _ in range(0, self.actual_N)]
+        self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)]
+        self.superspace_uplift_samples = []
+        self.subspace_uplifts = np.zeros((self.actual_N))
+        self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)]
+        self.uplift_stddevs = np.zeros((self.actual_N))
+
+        self.layers = []
+        self.layer_confidence = {}
+        self.base = None
+
+        self.scratch = np.zeros((self.actual_N,))
+        
+        self.last_value = -1
+        self.rounds = 0
+        self.average_delta_over_null = 0
+        self.visited = set()
+
+        self.candidate_pool = []
+        self.candidate_ids = set()
+        self.has_added_layer = False
+
+    def randomize_inputs(self):
+        for i in range(0, self.sample_size):
+            for j in range(0, self.N):
+                val = random.randint(0, 1)
+                self.inputs[i][j * 2] = val
+                self.inputs[i][j * 2 + 1] = val ^ 1
+
+    def populate_distances(self):
+        for i in range(0, len(self.inputs)):
+            x_a = self.inputs[i]
+            for j in range(0, len(self.inputs)):
+                if i == j:
+                    continue
+                x_b = self.inputs[j]
+                distance = hamming_distance(x_a, x_b, self.scratch)
+                self.distances[i][j] = 1.0 / (2 ** distance)
+
+    def compute_expected_outputs(self):
+        for i in range(0, len(self.inputs)):
+            self.expected_outputs[i] = sha(self.inputs[i])
+
+    def compute_base_outputs(self):
+        if self.base is None:
+            self.base_outputs.fill(0)
+            return
+        for i in range(0, len(self.inputs)):
+            self.base_outputs[i] = self.base(self.inputs[i])
+
+    def mat_coherence(self):
+        np.abs(self.output_xor, self.mask)
+        np.subtract(self.output_xor, self.mask, self.mask)
+        np.divide(self.mask, 2.0, self.mask)
+        np.add(1.0, self.mask, self.mask)
+        self.xor_square.fill(0)
+        np.copyto(self.masked_distances, self.distances)
+        masked_distances_t = self.masked_distances.transpose()
+        for i in range(0, len(self.xor_square)):
+            self.xor_square[i] = self.output_xor
+            np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i])
+            np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i])
+        np.sum(self.masked_distances, axis=0, out=self.denominators)
+        self.xor_square = self.xor_square.transpose()
+        np.logical_xor(self.xor_square, self.output_xor, self.xor_square)
+        np.multiply(self.xor_square, self.masked_distances, self.xor_square)
+        np.sum(self.xor_square, axis=0, out=self.numerators)
+        np.divide(self.numerators, self.denominators, self.coherences)
+        mean = np.nanmean(self.coherences)
+        if isnan(mean):
+            mean = 1.0
+        return 1.0 - mean
+
+    def coherence(self, outputs=None):
+        if outputs is None:
+            outputs = self.outputs
+        np.logical_xor(outputs, self.expected_outputs, self.output_xor)
+        return self.mat_coherence()
+        coherences = []
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            numerator = 0
+            denominator = 0
+            for j in range(0, len(self.output_xor)):
+                if i == j:
+                    continue
+                y_b = self.output_xor[j]
+                weight = self.distances[i][j]
+                denominator += weight
+                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                    numerator += weight
+            coherence = numerator / denominator if denominator > 0 else 0
+            coherences.append(coherence)
+
+        raw_coherence = sum(coherences) / len(coherences)
+        check_coherence = self.mat_coherence()
+
+        return raw_coherence
+
+    def div_coherence(self):
+        coherences = []
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            if y_a < 0:
+                continue
+            numerator = 0
+            denominator = 0
+            for j in range(0, len(self.output_xor)):
+                if i == j:
+                    continue
+                y_b = self.output_xor[j]
+                if y_b < 0:
+                    continue
+                weight = self.distances[i][j]
+                denominator += weight
+                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                    numerator += weight
+                # if y_a < 0 or y_b < 0:
+                #     numerator += weight
+            coherence = numerator / denominator if denominator > 0 else 0
+            coherences.append(coherence)
+        if len(coherences) == 0:
+            return 1.0
+        return sum(coherences) / len(coherences)
+
+    def normalize_p(self):
+        check = self.knowns[:]
+        for i in range(0, len(self.p)):
+            if self.p[i] < 0:
+                self.p[i] = 0
+        for i in range(0, len(self.p)):
+            if i in self.knowns:
+                flip = i ^ 0b1
+                self.p[i] = 0.0
+                self.p[flip] = 0.0
+            else:
+                check.append(i)
+                stop_id = index_hash(check)
+                check.pop()
+                if stop_id in self.stops:
+                    self.p[i] = 0.0
+        total = np.sum(self.p)
+        if total > 0:
+            for i in range(0, len(self.p)):
+                self.p[i] = self.p[i] / total
+
+    def reset_p(self):
+        self.p.fill(1.0)
+        self.normalize_p()
+
+    def threshold(self):
+        # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
+        return 1.0 - (self.epoch / 1000)
+
+    def get_converged_index(self):
+        for i in range(0, len(self.p)):
+            if self.p[i] > self.threshold():
+                return i
+        return None
+
+    def add_layer(self):
+        self.has_added_layer = True
+        self.add_stop()
+        layer = Candidate(self.knowns)
+        self.layers.append(layer)
+        self.base = self.cache_layers()
+        self.knowns.pop()
+        self.reset_p()
+
+    def random_sample(self):
+        self.randomize_inputs()
+        self.populate_distances()
+        self.compute_expected_outputs()
+        self.compute_base_outputs()
+        return self.coherence(self.base_outputs)
+
+    def random_candidate(self):
+        indices = self.knowns[:]
+        np.copyto(self.p_temp, self.p)
+        self.p_temp[self.actual_N] = 0
+        total = np.sum(self.p_temp)
+        if total == 0:
+            return None
+        np.divide(self.p_temp, total, self.p_temp)
+        for _ in range(0, self.num_terms - len(self.knowns)):
+            index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
+            indices.append(index)
+            flip = index ^ 0b1
+            self.p_temp[index] = 0
+            self.p_temp[flip] = 0
+            for i in range(0, len(self.p_temp)):
+                if i not in indices:
+                    indices.append(i)
+                    stop_id = index_hash(indices)
+                    indices.pop()
+                    if stop_id in self.stops:
+                        self.p_temp[i] = 0.0
+            total = np.sum(self.p_temp)
+            if total == 0:
+                return None
+            np.divide(self.p_temp, total, self.p_temp)
+        return Candidate(indices)
+
+    def seed_candidate_pool(self):
+        for _ in range(0, self.num_candidates):
+            candidate = self.random_candidate()
+            if candidate is None:
+                continue
+            candidate_id = candidate.id()
+            if candidate_id in self.candidate_ids:
+                continue
+            self.candidate_pool.append(candidate)
+            self.candidate_ids.add(candidate_id)
+
+    def add_stop(self):
+        stop_id = index_hash(self.knowns)
+        self.stops.add(stop_id)
+
+    def get_distribution(self, candidate, half = 1):
+        count = 0
+        for i in range(0, len(self.inputs)):
+            value = candidate.evaluate(self.inputs[i])
+            if value == half:
+                self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i]
+                count += 1
+            else:
+                self.output_xor[i] = -1
+        return (count, self.mat_coherence())
+
+    def update(self):
+        self.epoch += 1
+
+        base_coherence = self.random_sample()
+        candidate = Candidate(self.knowns[:])
+
+        if len(candidate.indices) > 0:
+            index = candidate.indices.pop()
+            try:
+                count_0, superspace_coherence_0 = self.get_distribution(candidate, 0)
+                count_1, superspace_coherence_1 = self.get_distribution(candidate, 1)
+                # delta = (superspace_coherence - base_coherence) * count / self.sample_size
+                delta = superspace_coherence_0 - superspace_coherence_1
+                self.superspace_uplift_samples.append(delta)
+            finally:
+                candidate.indices.append(index)
+
+        for i in range(0, self.actual_N):
+            candidate.indices.append(i)
+            try:
+                if i in self.knowns:
+                    continue
+
+                count_0, subspace_coherence_0 = self.get_distribution(candidate, 0)
+                # count_1, subspace_coherence_1 = self.get_distribution(candidate, 1)
+                delta = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size
+                # delta = subspace_coherence_0 - subspace_coherence_1
+                self.subspace_uplift_samples[i].append(delta)
+
+                # if index_hash(candidate.indices) in self.stops:
+                #     continue
+
+                for j in range(0, len(self.inputs)):
+                    self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j])
+
+                coherence = self.coherence()
+                delta = coherence - base_coherence
+                self.uplift_samples[i].append(delta)
+            finally:
+                candidate.indices.pop()
+
+        if self.epoch >= 100:
+            # for i in range(0, self.actual_N):
+            #     parameters = stats.norm.fit(self.uplift_samples[i])
+            #     print(i, parameters)
+            #     print(i, stats.kstest(self.uplift_samples[i], "norm", parameters))
+
+            # fig, axs = plt.subplots(4, 4)
+            # for i in range(0, 4):
+            #     for j in range(0, 4):
+            #         n, bins, patches = axs[i][j].hist(self.uplift_samples[i * 4 + j], 50, density=True, facecolor='g', alpha=0.75)
+            # plt.show()
+
+            try:
+                index = -1
+                best_mu = -1
+                confidence = -1
+                for i in range(0, self.actual_N):
+                    if len(self.uplift_samples[i]) == 0:
+                        continue
+                    parameters = stats.norm.fit(self.uplift_samples[i])
+                    (mu, _) = parameters
+                    # median = statistics.median(self.uplift_samples[i])
+                    if mu > 0:
+                        result = stats.kstest(self.uplift_samples[i], stats.norm.cdf, parameters)
+                        layer_id = index_hash(self.knowns + [i])
+                        if layer_id in self.layer_confidence:
+                            layer_confidence = self.layer_confidence[layer_id]
+                            if layer_confidence >= result.pvalue:
+                                continue
+                        if index < 0 or mu > best_mu:
+                            best_mu = mu
+                            index = i
+                            confidence = result.pvalue
+                if index >= 0:
+                    self.knowns.append(index)
+                    self.layer_confidence[index_hash(self.knowns)] = confidence
+                    # num_terms = len(self.knowns)
+                    print(self.knowns, best_mu, confidence)
+                    print(base_coherence)
+                    self.add_layer()
+                    # if num_terms > self.num_terms:
+                    #     self.stops = set()
+                    # self.num_terms = num_terms
+                    self.knowns = []
+                    return
+                
+                index = -1
+                best_mu = -1
+                superspace_median = statistics.median(self.superspace_uplift_samples) if len(self.superspace_uplift_samples) > 0 else -1
+                for i in range(0, self.actual_N):
+                    if len(self.subspace_uplift_samples[i]) == 0:
+                        continue
+                    # median = statistics.median(self.subspace_uplift_samples[i])
+                    parameters = stats.norm.fit(self.subspace_uplift_samples[i])
+                    (mu, _) = parameters
+                    if mu > 0:
+                        result = stats.kstest(self.subspace_uplift_samples[i], stats.norm.cdf, parameters)
+                        # print(i, mu, result.pvalue)
+                        if result.pvalue > 0.95:
+                            if index < 0 or mu > best_mu:
+                            # if median > best_median:
+                                best_mu = mu
+                                index = i
+
+                if index >= 0:
+                    self.knowns.append(index)
+                    print(self.knowns, best_mu)
+                    return
+
+                if len(self.knowns) > 0:
+                    # self.add_stop()
+                    self.knowns = []
+            finally:
+                self.epoch = 0
+                self.uplift_samples = [[] for _ in range(0, self.actual_N)]
+                self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)]
+                self.superspace_uplift_samples = []
+        return
+
+        # print('=====' + str(base_coherence))
+        # print(self.uplifts)
+        # print(self.uplift_means)
+        # print(self.uplift_medians)
+        # print(self.uplift_stddevs)
+        # print(self.uplift_ranges)
+        # print(self.uplift_convergences)
+        # print(self.subspace_uplifts)
+
+        if index >= 0:
+            self.knowns.append(index)
+            print(base_coherence)
+            print(self.knowns, self.epoch)
+            # print(self.uplift_medians)
+            # print(self.uplifts)
+            # print(self.subspace_uplifts)
+            self.add_layer()
+            self.uplifts.fill(0)
+            self.subspace_uplifts.fill(0)
+            self.uplift_medians.fill(0)
+            self.uplift_convergences.fill(0)
+            self.uplift_samples = [[] for _ in range(0, self.actual_N)]
+            self.epoch = 0
+            return
+
+        if subspace_index >= 0:
+            self.knowns.append(subspace_index)
+            print(self.knowns, self.epoch)
+            # print(self.uplifts)
+            # print(self.subspace_uplifts)
+            self.uplifts.fill(0)
+            self.subspace_uplifts.fill(0)
+            self.uplift_medians.fill(0)
+            self.uplift_convergences.fill(0)
+            self.uplift_samples = [[] for _ in range(0, self.actual_N)]
+            self.epoch = 0
+        return
+
+        # print('======')
+        # print(self.epoch, base_coherence)
+        # print('======')
+
+        # if len(self.candidate_pool) == 0:
+        #     print(self.p)
+
+        # for i in range(0, min(5, len(self.candidate_pool))):
+        #     candidate = self.candidate_pool[i]
+        #     print(candidate.id(), candidate.uplift)
+
+        # if self.epoch < 15:
+        #     return
+        
+        if self.candidate_pool[0].uplift > 0.3:
+            candidate = self.candidate_pool[0]
+            candidate_id = candidate.id()
+            self.candidate_ids.remove(candidate_id)
+            print(candidate_id)
+            self.knowns = candidate.indices
+            self.add_layer()
+            self.knowns = []
+            self.reset_p()
+            self.epoch = 0
+            self.candidate_pool = []
+            self.candidate_ids = set()
+        elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
+            self.epoch = 0
+            self.num_terms += 1
+            self.candidate_pool = []
+            self.candidate_ids = set()
+            self.knowns = []
+            self.stops = set()
+            self.reset_p()
+        return
+
+        # np.copyto(self.next_p, self.p)
+        for _ in range(0, self.num_candidates):
+            candidate = self.random_candidate()
+            if candidate is None:
+                continue
+            candidate_id = candidate.id()
+            if candidate_id in visited:
+                continue
+            visited.add(candidate_id)
+            if self.actual_N in candidate.indices:
+                continue
+            has_candidate = True
+            for i in range(0, len(self.inputs)):
+                self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
+            # coherence = self.ring_coherence()
+            coherence = self.coherence()
+            # if coherence <= base_coherence:
+            #     continue
+            # for index in candidate.indices:
+            #     self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
+                # self.p_temp[index] += 0
+            for index in candidate.indices:
+                if coherence > self.max_coherences[index]:
+                    self.max_coherences[index] = coherence
+                    self.max_candidates[index] = candidate
+                # self.max_coherences[index] = max(self.max_coherences[index], coherence)
+        # np.copyto(self.p, self.next_p)
+
+        # np.copyto(self.p_temp, self.p)
+        for i in range(0, self.actual_N):
+            candidate = self.max_candidates[i]
+            if candidate is None:
+                continue
+            for index in candidate.indices:
+                self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
+            # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
+        self.normalize_p()
+        # print(self.p)
+
+        # np.subtract(self.p_temp, self.p, self.p_temp)
+        # np.abs(self.p_temp, self.p_temp)
+        # delta = np.sum(self.p_temp) / len(self.p_temp)
+        # print(delta, np.argmax(self.p))
+        # np.copyto(self.p_temp, self.p)
+        # for i in range(0, len(self.p_temp)):
+        #     self.p_temp[i] = round(self.p_temp[i] * 100) / 100
+        # print(self.p_temp)
+
+        index = np.argmax(self.p)
+        delta_over_null = self.p[index] - self.p[self.actual_N]
+        if self.epoch == 0:
+            self.average_delta_over_null = delta_over_null
+        else:
+            self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
+        diff = self.num_terms - len(self.knowns)
+
+        print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
+
+        # Always iterate for a minimum number of epochs
+        if self.epoch < 15:
+            return
+        if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
+            return
+        if self.average_delta_over_null < 0.001:
+            index = self.actual_N
+        else:
+            index = np.argmax(self.p)
+
+        # index = np.argmax(self.p)
+        # if index == self.last_value:
+        #     self.rounds += 1
+        # else:
+        #     self.rounds = 0
+        #     self.last_value = index
+
+        # if self.rounds < 10 and self.epoch < 100:
+        #     return
+
+        # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
+        #     return
+
+        # index = np.argmax(self.p)
+        
+        # print(self.p)
+        # print(self.threshold())
+        # print(self.p)
+        # index = self.get_converged_index()
+        if not index is None or not has_candidate:
+            # print(index, delta, np.argmax(self.p))
+            self.epoch = 0
+            if index == self.actual_N or not has_candidate:
+                if len(self.knowns) > 0:
+                    self.add_stop()
+                    self.knowns.pop()
+                    print('Backtrack: ' + str(self.knowns))
+                    self.reset_p()
+                    return
+                self.num_terms += 1
+                self.knowns = []
+                self.stops = set()
+                self.reset_p()
+                print(self.num_terms)
+                return
+            self.knowns.append(index)
+            # bisect.insort(self.knowns, index)
+            if len(self.knowns) == self.num_terms:
+                print('Add layer: ' + str(self.knowns))
+                self.add_layer()
+            else:
+                print('Found term: ' + str(self.knowns))
+                self.reset_p()
+            print(base_coherence)
+            return
+
+    def cache_layers(self):
+        expr = 'def f(x):\n\tresult=0\n'
+        for layer in self.layers:
+            expr += '\tresult^=' + layer.eval_str() + '\n'
+        expr += '\treturn result\n'
+        scope = {}
+        exec(expr, scope)
+        return scope['f']
+
+def main():
+    probabilities = Probabilities()
+    # probabilities.knowns = [14]
+    # probabilities.add_layer()
+    # probabilities.knowns = [8]
+    # probabilities.add_layer()
+    # probabilities.knowns = [4]
+    # probabilities.add_layer()
+    while probabilities.num_terms <= probabilities.N:
+        probabilities.update()
+
+if __name__ == "__main__":
+    main()
--- a/mutations17.py
+++ b/mutations17.py
@ -0,0 +1,669 @@
+import bisect
+from cmath import isnan
+from email.mime import base
+import matplotlib.pyplot as plt
+import hashlib
+import math
+import numpy as np
+import random
+import statistics
+
+from pkg_resources import get_distribution
+from scipy import stats
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def xor(v):
+    total = np.sum(v)
+    value = total % 2
+    return np.sum(v) % 2
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def index_hash(indices):
+    return ','.join([str(index) for index in sorted(indices)])
+
+def bin_div(a, b):
+    if a == 0 and b == 0:
+        return 2
+    if a == 1 and b == 0:
+        return -1
+    if a == 0 and b == 1:
+        return 0
+    return 1
+
+class Candidate():
+    def __init__(self, indices):
+        self.indices = indices[:]
+        self.uplift = 0
+
+    def evaluate(self, x):
+        if len(x) in self.indices:
+            return 0
+        value = 1
+        for index in self.indices:
+            value *= x[index]
+        return value
+
+    def id(self):
+        return index_hash(self.indices)
+
+    def eval_str(self):
+        parts = []
+        for index in self.indices:
+            parts.append('x[' + str(index) + ']')
+        return '*'.join(parts)
+
+class Probabilities():
+    def __init__(self):
+        self.N = 8
+        self.actual_N = self.N * 2
+        self.num_terms = 1
+        self.num_candidates = 100
+        # self.sample_size = self.N ** 2
+        self.sample_size = 64
+        self.p = np.zeros((self.actual_N + 1,))
+        self.p_temp = np.empty_like(self.p)
+        self.next_p = np.empty_like(self.p)
+        self.knowns = []
+        self.stops = set()
+        self.reset_p()
+        self.epoch = 0
+
+        self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
+        self.raw_inputs = np.zeros((self.sample_size, self.N)).astype(np.int32)
+        self.masked_distances = np.zeros((self.sample_size, self.sample_size))
+        self.distances = np.zeros((self.sample_size, self.sample_size))
+        self.xor_square = np.zeros((self.sample_size, self.sample_size))
+        self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
+        self.mask = np.zeros((self.sample_size))
+        self.numerators = np.zeros((self.sample_size))
+        self.denominators = np.zeros((self.sample_size))
+        self.coherences = np.zeros((self.sample_size))
+        self.max_coherences = np.zeros((self.actual_N + 1))
+        self.max_candidates = [None for _ in range(0, self.actual_N)]
+        self.uplifts = np.zeros((self.actual_N))
+        self.uplift_means = np.zeros((self.actual_N))
+        self.uplift_medians = np.zeros((self.actual_N))
+        self.uplift_convergences = np.zeros((self.actual_N))
+        # self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)]
+        self.superspace_uplift_samples = []
+        self.subspace_uplifts = np.zeros((self.actual_N))
+        self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)]
+        self.uplift_stddevs = np.zeros((self.actual_N))
+
+        self.samples = 1000
+        # self.samples = 200
+        self.base_coherence_samples = np.zeros((self.samples))
+        self.coherence_samples = np.zeros((self.actual_N, self.samples))
+        self.subspace_uplift_left_samples = np.zeros((self.actual_N, self.samples))
+        self.subspace_uplift_right_samples = np.zeros((self.actual_N, self.samples))
+
+        self.layers = []
+        self.layer_confidence = {}
+        self.base = None
+
+        self.scratch = np.zeros((self.N,))
+        
+        self.last_value = -1
+        self.rounds = 0
+        self.average_delta_over_null = 0
+        self.visited = set()
+
+        self.candidate_pool = []
+        self.candidate_ids = set()
+        self.has_added_layer = False
+
+    def randomize_inputs(self):
+        for i in range(0, self.sample_size):
+            for j in range(0, self.N):
+                val = random.randint(0, 1)
+                self.raw_inputs[i][j] = val
+                self.inputs[i][j * 2] = val
+                self.inputs[i][j * 2 + 1] = val ^ 1
+
+    def populate_distances(self):
+        for i in range(0, len(self.raw_inputs)):
+            x_a = self.raw_inputs[i]
+            for j in range(0, len(self.raw_inputs)):
+                if i == j:
+                    continue
+                x_b = self.raw_inputs[j]
+                distance = hamming_distance(x_a, x_b, self.scratch)
+                self.distances[i][j] = 1.0 / (2 ** (distance - 1)) if distance > 0 else 0
+                # self.distances[i][j] = 1.0 / (distance ** 2) if distance > 0 else 0
+
+    def compute_expected_outputs(self):
+        for i in range(0, len(self.raw_inputs)):
+            self.expected_outputs[i] = xor(self.raw_inputs[i])
+
+    def compute_base_outputs(self):
+        if self.base is None:
+            self.base_outputs.fill(0)
+            return
+        for i in range(0, len(self.inputs)):
+            self.base_outputs[i] = self.base(self.inputs[i])
+
+    def mat_coherence(self):
+        np.abs(self.output_xor, self.mask)
+        np.subtract(self.output_xor, self.mask, self.mask)
+        np.divide(self.mask, 2.0, self.mask)
+        np.add(1.0, self.mask, self.mask)
+        self.xor_square.fill(0)
+        np.copyto(self.masked_distances, self.distances)
+        masked_distances_t = self.masked_distances.transpose()
+        for i in range(0, len(self.xor_square)):
+            self.xor_square[i] = self.output_xor
+            np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i])
+            np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i])
+        np.sum(self.masked_distances, axis=0, out=self.denominators)
+        self.xor_square = self.xor_square.transpose()
+        np.logical_xor(self.xor_square, self.output_xor, self.xor_square)
+        np.multiply(self.xor_square, self.masked_distances, self.xor_square)
+        np.sum(self.xor_square, axis=0, out=self.numerators)
+        np.divide(self.numerators, self.denominators, self.coherences)
+        mean = np.nanmean(self.coherences)
+        if isnan(mean):
+            mean = 1.0
+        return 1.0 - mean
+
+    def coherence(self, outputs=None):
+        if outputs is None:
+            outputs = self.outputs
+        np.logical_xor(outputs, self.expected_outputs, self.output_xor)
+        return self.mat_coherence()
+        coherences = []
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            numerator = 0
+            denominator = 0
+            for j in range(0, len(self.output_xor)):
+                if i == j:
+                    continue
+                y_b = self.output_xor[j]
+                weight = self.distances[i][j]
+                denominator += weight
+                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                    numerator += weight
+            coherence = numerator / denominator if denominator > 0 else 0
+            coherences.append(coherence)
+
+        raw_coherence = sum(coherences) / len(coherences)
+        check_coherence = self.mat_coherence()
+
+        return raw_coherence
+
+    def div_coherence(self):
+        coherences = []
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            if y_a < 0:
+                continue
+            numerator = 0
+            denominator = 0
+            for j in range(0, len(self.output_xor)):
+                if i == j:
+                    continue
+                y_b = self.output_xor[j]
+                if y_b < 0:
+                    continue
+                weight = self.distances[i][j]
+                denominator += weight
+                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                    numerator += weight
+                # if y_a < 0 or y_b < 0:
+                #     numerator += weight
+            coherence = numerator / denominator if denominator > 0 else 0
+            coherences.append(coherence)
+        if len(coherences) == 0:
+            return 1.0
+        return sum(coherences) / len(coherences)
+
+    def normalize_p(self):
+        check = self.knowns[:]
+        for i in range(0, len(self.p)):
+            if self.p[i] < 0:
+                self.p[i] = 0
+        for i in range(0, len(self.p)):
+            if i in self.knowns:
+                flip = i ^ 0b1
+                self.p[i] = 0.0
+                self.p[flip] = 0.0
+            else:
+                check.append(i)
+                stop_id = index_hash(check)
+                check.pop()
+                if stop_id in self.stops:
+                    self.p[i] = 0.0
+        total = np.sum(self.p)
+        if total > 0:
+            for i in range(0, len(self.p)):
+                self.p[i] = self.p[i] / total
+
+    def reset_p(self):
+        self.p.fill(1.0)
+        self.normalize_p()
+
+    def threshold(self):
+        # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
+        return 1.0 - (self.epoch / 1000)
+
+    def get_converged_index(self):
+        for i in range(0, len(self.p)):
+            if self.p[i] > self.threshold():
+                return i
+        return None
+
+    def add_layer(self):
+        self.has_added_layer = True
+        self.add_stop()
+        layer = Candidate(self.knowns)
+        self.layers.append(layer)
+        self.base = self.cache_layers()
+        self.knowns.pop()
+        self.reset_p()
+
+    def random_sample(self):
+        self.randomize_inputs()
+        self.populate_distances()
+        self.compute_expected_outputs()
+        self.compute_base_outputs()
+        return self.coherence(self.base_outputs)
+
+    def random_candidate(self):
+        indices = self.knowns[:]
+        np.copyto(self.p_temp, self.p)
+        self.p_temp[self.actual_N] = 0
+        total = np.sum(self.p_temp)
+        if total == 0:
+            return None
+        np.divide(self.p_temp, total, self.p_temp)
+        for _ in range(0, self.num_terms - len(self.knowns)):
+            index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
+            indices.append(index)
+            flip = index ^ 0b1
+            self.p_temp[index] = 0
+            self.p_temp[flip] = 0
+            for i in range(0, len(self.p_temp)):
+                if i not in indices:
+                    indices.append(i)
+                    stop_id = index_hash(indices)
+                    indices.pop()
+                    if stop_id in self.stops:
+                        self.p_temp[i] = 0.0
+            total = np.sum(self.p_temp)
+            if total == 0:
+                return None
+            np.divide(self.p_temp, total, self.p_temp)
+        return Candidate(indices)
+
+    def seed_candidate_pool(self):
+        for _ in range(0, self.num_candidates):
+            candidate = self.random_candidate()
+            if candidate is None:
+                continue
+            candidate_id = candidate.id()
+            if candidate_id in self.candidate_ids:
+                continue
+            self.candidate_pool.append(candidate)
+            self.candidate_ids.add(candidate_id)
+
+    def add_stop(self):
+        stop_id = index_hash(self.knowns)
+        self.stops.add(stop_id)
+
+    def get_distribution(self, candidate, half = 1):
+        count = 0
+        for i in range(0, len(self.inputs)):
+            value = candidate.evaluate(self.inputs[i])
+            if value == half:
+                self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i]
+                count += 1
+            else:
+                self.output_xor[i] = -1
+        return (count, self.mat_coherence())
+
+    def update(self):
+        sample = self.epoch
+        self.epoch += 1
+
+        base_coherence = self.random_sample()
+        self.base_coherence_samples[sample] = base_coherence - 0.5
+        candidate = Candidate(self.knowns[:])
+
+        for i in range(0, self.actual_N):
+            candidate.indices.append(i)
+            try:
+                count_0, subspace_coherence_0 = self.get_distribution(candidate, 0)
+                count_1, subspace_coherence_1 = self.get_distribution(candidate, 1)
+                # delta = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size
+                # delta = subspace_coherence_0 - subspace_coherence_1
+                self.subspace_uplift_left_samples[i][sample] = subspace_coherence_0 - 0.5
+                self.subspace_uplift_right_samples[i][sample] = subspace_coherence_1 - 0.5
+
+                # if index_hash(candidate.indices) in self.stops:
+                #     continue
+
+                for j in range(0, len(self.inputs)):
+                    self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j])
+
+                coherence = self.coherence()
+                self.coherence_samples[i][sample] = coherence - 0.5
+            finally:
+                candidate.indices.pop()
+
+        if self.epoch >= self.samples:
+            # for i in range(0, self.actual_N):
+            #     parameters = stats.norm.fit(self.uplift_samples[i])
+            #     print(i, parameters)
+            #     print(i, stats.kstest(self.uplift_samples[i], "norm", parameters))
+
+            added = False
+            # parameters = stats.norm.fit(self.base_coherence_samples)
+            # (base_mu, _) = parameters
+
+            try:
+                index = -1
+                lowest_pvalue = -1
+                is_subspace = False
+                for i in range(0, self.actual_N):
+                    if i in self.knowns:
+                        continue
+                    result = stats.kstest(self.base_coherence_samples, self.coherence_samples[i], alternative='greater')
+                    print(i, result)
+                    # value = result.pvalue * (1 - result.statistic)
+                    if index < 0 or result.pvalue < lowest_pvalue:
+                    # if index < 0 or value < lowest_pvalue:
+                        index = i
+                        lowest_pvalue = result.pvalue
+                
+                for i in range(0, self.actual_N):
+                    if i in self.knowns:
+                        continue
+                    result = stats.kstest(self.base_coherence_samples, self.subspace_uplift_left_samples[i], alternative='greater')
+                    # result = stats.kstest(self.subspace_uplift_left_samples[i], self.subspace_uplift_right_samples[i], alternative='greater')
+                    print(i, result)
+                    # value = result.pvalue * (1 - result.statistic)
+                    if index < 0 or result.pvalue < lowest_pvalue:
+                    # if index < 0 or value < lowest_pvalue:
+                        index = i
+                        lowest_pvalue = result.pvalue
+                        is_subspace = True
+
+                    # if result.pvalue > 0.95:
+                    #     index = i
+                    # parameters = stats.norm.fit(self.subspace_uplift_samples[i])
+                    # (mu, _) = parameters
+                    # if mu > base_mu:
+                    #     if index < 0 or mu > highest_mu:
+                    #         index = i
+                    #         highest_mu = mu
+
+                if index >= 0:
+                    if is_subspace:
+                        # print('subspace')
+                        self.knowns.append(index)
+                        print(self.knowns, lowest_pvalue)
+                    else:
+                        # print('flat')
+                        self.knowns.append(index)
+                        # self.layer_confidence[index_hash(self.knowns)] = confidence
+                        # num_terms = len(self.knowns)
+                        print(self.knowns, lowest_pvalue)
+                        print(base_coherence)
+                        self.add_layer()
+                        # if num_terms > self.num_terms:
+                        #     self.stops = set()
+                        # self.num_terms = num_terms
+                        self.knowns = []
+                    return
+
+                # if len(self.knowns) > 0:
+                #     # self.add_stop()
+                #     self.knowns = []
+            finally:
+                fig, axs = plt.subplots(4, 4)
+                for i in range(0, 4):
+                    for j in range(0, 4):
+                        axs[i][j].hist(self.base_coherence_samples, 50, density=True, facecolor='r', alpha=0.5)
+                        n, bins, patches = axs[i][j].hist(self.coherence_samples[i * 4 + j], 50, density=True, facecolor='g', alpha=0.5)
+                        n, bins, patches = axs[i][j].hist(self.subspace_uplift_left_samples[i * 4 + j], 50, density=True, facecolor='b', alpha=0.5)
+                        # n, bins, patches = axs[i][j].hist(self.subspace_uplift_right_samples[i * 4 + j], 50, density=True, facecolor='b', alpha=0.5)
+                plt.show()
+                self.epoch = 0
+        
+        return
+
+        # print('=====' + str(base_coherence))
+        # print(self.uplifts)
+        # print(self.uplift_means)
+        # print(self.uplift_medians)
+        # print(self.uplift_stddevs)
+        # print(self.uplift_ranges)
+        # print(self.uplift_convergences)
+        # print(self.subspace_uplifts)
+
+        if index >= 0:
+            self.knowns.append(index)
+            print(base_coherence)
+            print(self.knowns, self.epoch)
+            # print(self.uplift_medians)
+            # print(self.uplifts)
+            # print(self.subspace_uplifts)
+            self.add_layer()
+            self.uplifts.fill(0)
+            self.subspace_uplifts.fill(0)
+            self.uplift_medians.fill(0)
+            self.uplift_convergences.fill(0)
+            self.uplift_samples = [[] for _ in range(0, self.actual_N)]
+            self.epoch = 0
+            return
+
+        if subspace_index >= 0:
+            self.knowns.append(subspace_index)
+            print(self.knowns, self.epoch)
+            # print(self.uplifts)
+            # print(self.subspace_uplifts)
+            self.uplifts.fill(0)
+            self.subspace_uplifts.fill(0)
+            self.uplift_medians.fill(0)
+            self.uplift_convergences.fill(0)
+            self.uplift_samples = [[] for _ in range(0, self.actual_N)]
+            self.epoch = 0
+        return
+
+        # print('======')
+        # print(self.epoch, base_coherence)
+        # print('======')
+
+        # if len(self.candidate_pool) == 0:
+        #     print(self.p)
+
+        # for i in range(0, min(5, len(self.candidate_pool))):
+        #     candidate = self.candidate_pool[i]
+        #     print(candidate.id(), candidate.uplift)
+
+        # if self.epoch < 15:
+        #     return
+        
+        if self.candidate_pool[0].uplift > 0.3:
+            candidate = self.candidate_pool[0]
+            candidate_id = candidate.id()
+            self.candidate_ids.remove(candidate_id)
+            print(candidate_id)
+            self.knowns = candidate.indices
+            self.add_layer()
+            self.knowns = []
+            self.reset_p()
+            self.epoch = 0
+            self.candidate_pool = []
+            self.candidate_ids = set()
+        elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
+            self.epoch = 0
+            self.num_terms += 1
+            self.candidate_pool = []
+            self.candidate_ids = set()
+            self.knowns = []
+            self.stops = set()
+            self.reset_p()
+        return
+
+        # np.copyto(self.next_p, self.p)
+        for _ in range(0, self.num_candidates):
+            candidate = self.random_candidate()
+            if candidate is None:
+                continue
+            candidate_id = candidate.id()
+            if candidate_id in visited:
+                continue
+            visited.add(candidate_id)
+            if self.actual_N in candidate.indices:
+                continue
+            has_candidate = True
+            for i in range(0, len(self.inputs)):
+                self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
+            # coherence = self.ring_coherence()
+            coherence = self.coherence()
+            # if coherence <= base_coherence:
+            #     continue
+            # for index in candidate.indices:
+            #     self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
+                # self.p_temp[index] += 0
+            for index in candidate.indices:
+                if coherence > self.max_coherences[index]:
+                    self.max_coherences[index] = coherence
+                    self.max_candidates[index] = candidate
+                # self.max_coherences[index] = max(self.max_coherences[index], coherence)
+        # np.copyto(self.p, self.next_p)
+
+        # np.copyto(self.p_temp, self.p)
+        for i in range(0, self.actual_N):
+            candidate = self.max_candidates[i]
+            if candidate is None:
+                continue
+            for index in candidate.indices:
+                self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
+            # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
+        self.normalize_p()
+        # print(self.p)
+
+        # np.subtract(self.p_temp, self.p, self.p_temp)
+        # np.abs(self.p_temp, self.p_temp)
+        # delta = np.sum(self.p_temp) / len(self.p_temp)
+        # print(delta, np.argmax(self.p))
+        # np.copyto(self.p_temp, self.p)
+        # for i in range(0, len(self.p_temp)):
+        #     self.p_temp[i] = round(self.p_temp[i] * 100) / 100
+        # print(self.p_temp)
+
+        index = np.argmax(self.p)
+        delta_over_null = self.p[index] - self.p[self.actual_N]
+        if self.epoch == 0:
+            self.average_delta_over_null = delta_over_null
+        else:
+            self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
+        diff = self.num_terms - len(self.knowns)
+
+        print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
+
+        # Always iterate for a minimum number of epochs
+        if self.epoch < 15:
+            return
+        if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
+            return
+        if self.average_delta_over_null < 0.001:
+            index = self.actual_N
+        else:
+            index = np.argmax(self.p)
+
+        # index = np.argmax(self.p)
+        # if index == self.last_value:
+        #     self.rounds += 1
+        # else:
+        #     self.rounds = 0
+        #     self.last_value = index
+
+        # if self.rounds < 10 and self.epoch < 100:
+        #     return
+
+        # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
+        #     return
+
+        # index = np.argmax(self.p)
+        
+        # print(self.p)
+        # print(self.threshold())
+        # print(self.p)
+        # index = self.get_converged_index()
+        if not index is None or not has_candidate:
+            # print(index, delta, np.argmax(self.p))
+            self.epoch = 0
+            if index == self.actual_N or not has_candidate:
+                if len(self.knowns) > 0:
+                    self.add_stop()
+                    self.knowns.pop()
+                    print('Backtrack: ' + str(self.knowns))
+                    self.reset_p()
+                    return
+                self.num_terms += 1
+                self.knowns = []
+                self.stops = set()
+                self.reset_p()
+                print(self.num_terms)
+                return
+            self.knowns.append(index)
+            # bisect.insort(self.knowns, index)
+            if len(self.knowns) == self.num_terms:
+                print('Add layer: ' + str(self.knowns))
+                self.add_layer()
+            else:
+                print('Found term: ' + str(self.knowns))
+                self.reset_p()
+            print(base_coherence)
+            return
+
+    def cache_layers(self):
+        expr = 'def f(x):\n\tresult=0\n'
+        for layer in self.layers:
+            expr += '\tresult^=' + layer.eval_str() + '\n'
+        expr += '\treturn result\n'
+        scope = {}
+        exec(expr, scope)
+        return scope['f']
+
+def main():
+    probabilities = Probabilities()
+    # probabilities.knowns = [14]
+    # probabilities.add_layer()
+    # probabilities.knowns = [8]
+    # probabilities.add_layer()
+    # probabilities.knowns = [4]
+    # probabilities.add_layer()
+    while probabilities.num_terms <= probabilities.N:
+        probabilities.update()
+
+if __name__ == "__main__":
+    main()
--- a/mutations18.py
+++ b/mutations18.py
@ -0,0 +1,845 @@
+import bisect
+from cmath import isnan
+from email.mime import base
+import matplotlib.pyplot as plt
+import hashlib
+import math
+import numpy as np
+import random
+import statistics
+
+from pkg_resources import get_distribution
+from scipy import optimize, stats
+from astropy import modeling
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def xor(v):
+    return np.sum(v[1:]) % 2
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def index_hash(indices):
+    return ','.join([str(index) for index in sorted(indices)])
+
+def bin_div(a, b):
+    if a == 0 and b == 0:
+        return 2
+    if a == 1 and b == 0:
+        return -1
+    if a == 0 and b == 1:
+        return 0
+    return 1
+
+class Candidate():
+    def __init__(self, indices):
+        self.indices = indices[:]
+        self.uplift = 0
+
+    def evaluate(self, x):
+        if len(x) in self.indices:
+            return 0
+        value = 1
+        for index in self.indices:
+            value *= x[index]
+        return value
+
+    def id(self):
+        return index_hash(self.indices)
+
+    def eval_str(self):
+        parts = []
+        for index in self.indices:
+            parts.append('x[' + str(index) + ']')
+        return '*'.join(parts)
+
+class Probabilities():
+    def __init__(self):
+        self.N = 16
+        self.actual_N = self.N * 2
+        self.num_terms = 1
+        self.num_candidates = 100
+        # self.sample_size = self.N ** 2
+        self.sample_size = 64
+        self.p = np.zeros((self.actual_N + 1,))
+        self.p_temp = np.empty_like(self.p)
+        self.next_p = np.empty_like(self.p)
+        self.knowns = []
+        self.stops = set()
+        self.reset_p()
+        self.epoch = 0
+
+        self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
+        self.raw_inputs = np.zeros((self.sample_size, self.N)).astype(np.int32)
+        self.masked_distances = np.zeros((self.sample_size, self.sample_size))
+        self.distances = np.zeros((self.sample_size, self.sample_size))
+        self.xor_square = np.zeros((self.sample_size, self.sample_size))
+        self.nn = np.zeros((self.sample_size, self.sample_size)).astype(np.int32)
+        self.nn_distances = np.zeros((self.sample_size, 2)).astype(np.int32)
+        self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
+        self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
+        self.mask = np.zeros((self.sample_size))
+        self.numerators = np.zeros((self.sample_size))
+        self.denominators = np.zeros((self.sample_size))
+        self.coherences = np.zeros((self.sample_size))
+        self.max_coherences = np.zeros((self.actual_N + 1))
+        self.max_candidates = [None for _ in range(0, self.actual_N)]
+        self.uplifts = np.zeros((self.actual_N))
+        self.uplift_means = np.zeros((self.actual_N))
+        self.uplift_medians = np.zeros((self.actual_N))
+        self.uplift_convergences = np.zeros((self.actual_N))
+        # self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)]
+        self.superspace_uplift_samples = []
+        self.subspace_uplifts = np.zeros((self.actual_N))
+        self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)]
+        self.uplift_stddevs = np.zeros((self.actual_N))
+
+        self.last_index = -1
+        self.last_pvalue = -1
+        self.left_half = True
+
+        self.samples = 10
+        self.num_bins = 1000
+        # self.samples = 200
+        self.base_coherence_samples = np.zeros((self.samples))
+        self.coherence_samples = np.zeros((self.actual_N, self.samples))
+        self.subspace_uplift_samples = np.zeros((self.actual_N, self.samples))
+        self.subspace_uplift_weights = np.zeros((self.actual_N, self.samples))
+
+        self.layers = []
+        self.layer_confidence = {}
+        self.base = None
+
+        self.scratch = np.zeros((self.N,))
+        
+        self.last_value = -1
+        self.rounds = 0
+        self.average_delta_over_null = 0
+        self.visited = set()
+
+        self.candidate_pool = []
+        self.candidate_ids = set()
+        self.has_added_layer = False
+
+    def randomize_inputs(self):
+        for i in range(0, self.sample_size):
+            for j in range(0, self.N):
+                val = random.randint(0, 1)
+                self.raw_inputs[i][j] = val
+                self.inputs[i][j * 2] = val
+                self.inputs[i][j * 2 + 1] = val ^ 1
+
+    def populate_distances(self):
+        self.nn.fill(-1)
+        self.nn_distances.fill(-1)
+        for i in range(0, len(self.raw_inputs)):
+            x_a = self.raw_inputs[i]
+            for j in range(0, len(self.raw_inputs)):
+                if i == j:
+                    continue
+                x_b = self.raw_inputs[j]
+                distance = hamming_distance(x_a, x_b, self.scratch)
+                if (self.nn_distances[i][0] < 0 or distance < self.nn_distances[i][0]) and distance > 0:
+                    self.nn_distances[i][0] = distance
+                    self.nn_distances[i][1] = 1
+                    self.nn[i][0] = j
+                elif distance == self.nn_distances[i][0]:
+                    count = self.nn_distances[i][1]
+                    self.nn_distances[i][1] = count + 1
+                    self.nn[i][count] = j
+                # self.distances[i][j] = 1.0 / (2 ** (distance - 1)) if distance > 0 else 0
+                self.distances[i][j] = 1.0 / (distance ** 12) if distance > 0 else 0
+
+    def compute_expected_outputs(self):
+        for i in range(0, len(self.raw_inputs)):
+            self.expected_outputs[i] = xor(self.raw_inputs[i])
+
+    def compute_base_outputs(self):
+        if self.base is None:
+            self.base_outputs.fill(0)
+            return
+        for i in range(0, len(self.inputs)):
+            self.base_outputs[i] = self.base(self.inputs[i])
+
+    def mat_coherence(self):
+        np.abs(self.output_xor, self.mask)
+        np.subtract(self.output_xor, self.mask, self.mask)
+        np.divide(self.mask, 2.0, self.mask)
+        np.add(1.0, self.mask, self.mask)
+        self.xor_square.fill(0)
+        np.copyto(self.masked_distances, self.distances)
+        masked_distances_t = self.masked_distances.transpose()
+        for i in range(0, len(self.xor_square)):
+            self.xor_square[i] = self.output_xor
+            np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i])
+            np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i])
+        np.sum(self.masked_distances, axis=0, out=self.denominators)
+        self.xor_square = self.xor_square.transpose()
+        np.logical_xor(self.xor_square, self.output_xor, self.xor_square)
+        np.multiply(self.xor_square, self.masked_distances, self.xor_square)
+        np.sum(self.xor_square, axis=0, out=self.numerators)
+        np.divide(self.numerators, self.denominators, self.coherences)
+        mean = np.nanmean(self.coherences)
+        if isnan(mean):
+            mean = 1.0
+        return 1.0 - mean
+
+    def nn_coherence(self):
+        for i in range(0, len(self.output_xor)):
+            total = 0
+            y_a = self.output_xor[i]
+            [distance, count] = self.nn_distances[i]
+            for index in range(0, count):
+                j = self.nn[i][index]
+                y_b = self.output_xor[j]
+                total += 1 if y_a == 1 and y_b == 1 or y_a == 0 and y_b == 0 else 0
+            self.coherences[i] = total / count
+        return np.mean(self.coherences)
+
+    def coherence(self, outputs=None):
+        if outputs is None:
+            outputs = self.outputs
+        np.logical_xor(outputs, self.expected_outputs, self.output_xor)
+        return self.nn_coherence()
+        # return self.mat_coherence()
+        coherences = []
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            numerator = 0
+            denominator = 0
+            for j in range(0, len(self.output_xor)):
+                if i == j:
+                    continue
+                y_b = self.output_xor[j]
+                weight = self.distances[i][j]
+                denominator += weight
+                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                    numerator += weight
+            coherence = numerator / denominator if denominator > 0 else 0
+            coherences.append(coherence)
+
+        raw_coherence = sum(coherences) / len(coherences)
+        check_coherence = self.mat_coherence()
+
+        return raw_coherence
+
+    def div_coherence(self):
+        coherences = []
+        for i in range(0, len(self.output_xor)):
+            y_a = self.output_xor[i]
+            if y_a < 0:
+                continue
+            numerator = 0
+            denominator = 0
+            for j in range(0, len(self.output_xor)):
+                if i == j:
+                    continue
+                y_b = self.output_xor[j]
+                if y_b < 0:
+                    continue
+                weight = self.distances[i][j]
+                denominator += weight
+                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                    numerator += weight
+                # if y_a < 0 or y_b < 0:
+                #     numerator += weight
+            coherence = numerator / denominator if denominator > 0 else 0
+            coherences.append(coherence)
+        if len(coherences) == 0:
+            return 1.0
+        return sum(coherences) / len(coherences)
+
+    def normalize_p(self):
+        check = self.knowns[:]
+        for i in range(0, len(self.p)):
+            if self.p[i] < 0:
+                self.p[i] = 0
+        for i in range(0, len(self.p)):
+            if i in self.knowns:
+                flip = i ^ 0b1
+                self.p[i] = 0.0
+                self.p[flip] = 0.0
+            else:
+                check.append(i)
+                stop_id = index_hash(check)
+                check.pop()
+                if stop_id in self.stops:
+                    self.p[i] = 0.0
+        total = np.sum(self.p)
+        if total > 0:
+            for i in range(0, len(self.p)):
+                self.p[i] = self.p[i] / total
+
+    def reset_p(self):
+        self.p.fill(1.0)
+        self.normalize_p()
+
+    def threshold(self):
+        # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
+        return 1.0 - (self.epoch / 1000)
+
+    def get_converged_index(self):
+        for i in range(0, len(self.p)):
+            if self.p[i] > self.threshold():
+                return i
+        return None
+
+    def add_layer(self):
+        self.has_added_layer = True
+        self.add_stop()
+        layer = Candidate(self.knowns)
+        self.layers.append(layer)
+        self.base = self.cache_layers()
+        self.knowns.pop()
+        self.reset_p()
+
+    def random_sample(self):
+        self.randomize_inputs()
+        self.populate_distances()
+        self.compute_expected_outputs()
+        self.compute_base_outputs()
+        return self.coherence(self.base_outputs)
+
+    def random_candidate(self):
+        indices = self.knowns[:]
+        np.copyto(self.p_temp, self.p)
+        self.p_temp[self.actual_N] = 0
+        total = np.sum(self.p_temp)
+        if total == 0:
+            return None
+        np.divide(self.p_temp, total, self.p_temp)
+        for _ in range(0, self.num_terms - len(self.knowns)):
+            index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
+            indices.append(index)
+            flip = index ^ 0b1
+            self.p_temp[index] = 0
+            self.p_temp[flip] = 0
+            for i in range(0, len(self.p_temp)):
+                if i not in indices:
+                    indices.append(i)
+                    stop_id = index_hash(indices)
+                    indices.pop()
+                    if stop_id in self.stops:
+                        self.p_temp[i] = 0.0
+            total = np.sum(self.p_temp)
+            if total == 0:
+                return None
+            np.divide(self.p_temp, total, self.p_temp)
+        return Candidate(indices)
+
+    def seed_candidate_pool(self):
+        for _ in range(0, self.num_candidates):
+            candidate = self.random_candidate()
+            if candidate is None:
+                continue
+            candidate_id = candidate.id()
+            if candidate_id in self.candidate_ids:
+                continue
+            self.candidate_pool.append(candidate)
+            self.candidate_ids.add(candidate_id)
+
+    def add_stop(self):
+        stop_id = index_hash(self.knowns)
+        self.stops.add(stop_id)
+
+    def get_distribution(self, candidate, half = 1):
+        count = 0
+        for i in range(0, len(self.inputs)):
+            value = candidate.evaluate(self.inputs[i])
+            if value == half:
+                self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i]
+                count += 1
+            else:
+                self.output_xor[i] = -1
+        # return (count, self.mat_coherence())
+        return (count, self.nn_coherence())
+
+    def err(self, fitted_model, bins, hist):
+        err = 0
+        for i in range(0, self.num_bins):
+            x = bins[i + 1]
+            y = hist[i]
+            delta = fitted_model(x) - y
+            err += delta * delta
+        return err / self.num_bins
+
+    def update(self):
+        sample = self.epoch
+        self.epoch += 1
+
+        base_coherence = self.random_sample()
+        self.base_coherence_samples[sample] = base_coherence
+        candidate = Candidate(self.knowns[:])
+
+        for i in range(0, self.actual_N):
+            candidate.indices.append(i)
+            try:
+                count_0, subspace_coherence_0 = self.get_distribution(candidate, 0)
+                # count_1, subspace_coherence_1 = self.get_distribution(candidate, 1)
+                # delta = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size
+                # delta = subspace_coherence_0 - subspace_coherence_1
+                self.subspace_uplift_samples[i][sample] = subspace_coherence_0 - base_coherence
+                self.subspace_uplift_weights[i][sample] = count_0 / self.sample_size
+                # self.subspace_uplift_left_samples[i][sample] = subspace_coherence_0
+                # self.subspace_uplift_right_samples[i][sample] = subspace_coherence_1 - base_coherence
+
+                # if index_hash(candidate.indices) in self.stops:
+                #     continue
+
+                for j in range(0, len(self.inputs)):
+                    self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j])
+
+                coherence = self.coherence()
+                self.coherence_samples[i][sample] = coherence - base_coherence
+                # self.coherence_samples[i][sample] = coherence
+            finally:
+                candidate.indices.pop()
+
+        if self.epoch >= self.samples:
+            # for i in range(0, self.actual_N):
+            #     parameters = stats.norm.fit(self.uplift_samples[i])
+            #     print(i, parameters)
+            #     print(i, stats.kstest(self.uplift_samples[i], "norm", parameters))
+
+            added = False
+            # parameters = stats.norm.fit(self.base_coherence_samples)
+            # (base_mu, _) = parameters
+
+            # (hist, bins) = np.histogram(self.base_coherence_samples, self.num_bins, density=True)
+            # fitter = modeling.fitting.LevMarLSQFitter()
+            # model = modeling.models.Gaussian1D()
+            # fitted_model = fitter(model, bins[1:], hist)
+            # print('Base', fitted_model.mean.value, self.err(fitted_model, bins, hist))
+
+            # x = np.linspace(0, 1.0, 10000)
+            # density = stats.gaussian_kde(self.base_coherence_samples)(x)
+            # mode = x[np.argsort(density)[-1]]
+            # print(mode)
+
+            # for i in range(0, self.actual_N):
+            #     count = 0
+            #     for j in range(0, self.samples):
+            #         for k in range(0, self.samples):
+            #             if self.coherence_samples[i][j] > self.base_coherence_samples[k]:
+            #                 count += 1
+            #     print(i, count)
+
+            try:
+                index = -1
+                lowest_index = -1
+                lowest_pvalue = -1
+                highest_index = -1
+                highest_pvalue = -1
+                best_pvalue = -1
+                pvalue_sum = 0
+                pvalue_denom = 0
+                is_subspace = False
+                
+                for i in range(0, self.actual_N):
+                    if i in self.knowns:
+                        continue
+                    try:
+                        result = stats.ttest_1samp(self.coherence_samples[i], 0, alternative='greater')
+                        print(i, result)
+                        # (hist, bins) = np.histogram(self.coherence_samples[i], 20, range=(-0.01, 0.01))
+                        # total = 0
+                        # for j in range(0, 20):
+                        #     total += hist[j] * (bins[j] + bins[j + 1]) / 2
+                        # mode = total / sum(hist)
+
+                        # fitter = modeling.fitting.LevMarLSQFitter()
+                        # model = modeling.models.Gaussian1D()
+                        # fitted_model = fitter(model, bins[1:], hist)
+                        # mode = fitted_model.mean.value
+                        # print(i, total)
+
+                        # result = stats.kstest(self.base_coherence_samples, self.coherence_samples[i], alternative='greater')
+                        # print(i, result)
+                        # value = result.pvalue * (1 - result.statistic)
+                        # parameters = stats.norm.fit(self.coherence_samples[i])
+                        # (mu, _) = parameters
+                        # density = stats.gaussian_kde(self.coherence_samples[i])(x)
+                        # mode = x[np.argsort(density)[-1]]
+                        # print(i, mode)
+                        # print(i, mu)
+                        if not isnan(result.pvalue):
+                            if i == self.last_index:
+                                delta = abs(result.pvalue - self.last_pvalue)
+                                if delta < 0.1:
+                                    print('Low delta!')
+                                    print(self.last_index, delta)
+                                    # self.last_index = -1
+                                    self.left_half = not self.left_half
+                                    # self.layers.pop()
+                                    # self.base = self.cache_layers()
+                                    # return
+
+                            pvalue_sum += result.pvalue
+                            pvalue_denom += 1
+                            if lowest_index < 0 or result.pvalue < lowest_pvalue:
+                                lowest_index = i
+                                lowest_pvalue = result.pvalue
+                            if highest_index < 0 or result.pvalue > highest_pvalue:
+                                highest_index = i
+                                highest_pvalue = result.pvalue
+                    except Exception as e:
+                        print(e)
+                        pass
+                average_pvalue = pvalue_sum / pvalue_denom
+                print(average_pvalue)
+                index = highest_index if self.left_half else lowest_index
+                best_pvalue = highest_pvalue if self.left_half else lowest_pvalue
+
+                self.last_index = index
+                self.last_pvalue = best_pvalue
+                # if average_pvalue < 0.5:
+                #     index = lowest_index
+                #     best_pvalue = lowest_pvalue
+                # else:
+                #     index = highest_index
+                #     best_pvalue = highest_pvalue
+                        # print(e)
+                
+                # for i in range(0, self.actual_N):
+                #     if i in self.knowns:
+                #         continue
+                #     # result = stats.kstest(self.base_coherence_samples, self.subspace_uplift_left_samples[i], alternative='greater')
+                #     # # result = stats.kstest(self.subspace_uplift_left_samples[i], self.subspace_uplift_right_samples[i], alternative='greater')
+                #     # print(i, result)
+                #     # value = result.pvalue * (1 - result.statistic)
+                #     # parameters = stats.norm.fit(self.subspace_uplift_left_samples[i])
+                #     # (mu, _) = parameters
+                #     try:
+                #         result = stats.ttest_1samp(self.subspace_uplift_samples[i], 0, alternative='greater')
+                #         print(i, result)
+                #         # (hist, bins) = np.histogram(self.subspace_uplift_samples[i], 20, range=(-0.01, 0.01))
+                #         # bin_index = np.argsort(hist)[-1]
+                #         # mode = (bins[bin_index] + bins[bin_index + 1]) / 2
+                #         # fitter = modeling.fitting.LevMarLSQFitter()
+                #         # model = modeling.models.Gaussian1D()
+                #         # fitted_model = fitter(model, bins[1:], hist)
+                #         # mode = fitted_model.mean.value
+                #         # print(i, mode)
+                #         # density = stats.gaussian_kde(self.subspace_uplift_samples[i], weights=self.subspace_uplift_weights[i])(x)
+                #         # density = stats.gaussian_kde(self.subspace_uplift_samples[i])(x)
+                #         # mode = x[np.argsort(density)[-1]]
+                #         # print(i, mode)
+                #         # print(i, mu)
+                #         if (index < 0 or result.pvalue < lowest_pvalue) and not isnan(result.pvalue):
+                #         # if index < 0 or value < lowest_pvalue:
+                #             index = i
+                #             lowest_pvalue = result.pvalue
+                #             is_subspace = True
+
+                #         # if result.pvalue > 0.95:
+                #         #     index = i
+                #         # parameters = stats.norm.fit(self.subspace_uplift_samples[i])
+                #         # (mu, _) = parameters
+                #         # if mu > base_mu:
+                #         #     if index < 0 or mu > highest_mu:
+                #         #         index = i
+                #         #         highest_mu = mu
+                #     except Exception as e:
+                #         print(e)
+                #         pass
+                #         # print(e)
+
+                if index >= 0:
+                    if is_subspace:
+                        # print('subspace')
+                        self.knowns.append(index)
+                        print(self.knowns, best_pvalue)
+                    else:
+                        # print('flat')
+                        self.knowns.append(index)
+                        # self.layer_confidence[index_hash(self.knowns)] = confidence
+                        # num_terms = len(self.knowns)
+                        print(self.knowns, best_pvalue)
+                        print(base_coherence)
+                        self.add_layer()
+                        # if num_terms > self.num_terms:
+                        #     self.stops = set()
+                        # self.num_terms = num_terms
+                        self.knowns = []
+                    return
+                else:
+                    self.knowns = []
+                # else:
+                #     self.knowns = []
+
+                # if len(self.knowns) > 0:
+                #     # self.add_stop()
+                #     self.knowns = []
+            finally:
+                # fig, axs = plt.subplots(int(self.actual_N / 4), 4)
+                # x_eval = np.linspace(-1.0, 1.0, num=1000)
+                # for i in range(0, int(self.actual_N / 4)):
+                #     for j in range(0, 4):
+                #         # (hist, bins) = np.histogram(self.base_coherence_samples, self.num_bins, density=True)
+                #         # fitter = modeling.fitting.LevMarLSQFitter()
+                #         # model = modeling.models.Gaussian1D()
+                #         # fitted_model = fitter(model, bins[1:], hist)
+                #         # axs[i][j].scatter(bins[1:], hist, s=1, color='r', alpha=0.5)
+                #         # axs[i][j].plot(x_eval, fitted_model(x_eval), color='r')
+
+                #         (hist, bins) = np.histogram(self.coherence_samples[i * 4 + j], self.num_bins, density=True)
+                #         # fitter = modeling.fitting.LevMarLSQFitter()
+                #         # model = modeling.models.Gaussian1D()
+                #         # fitted_model = fitter(model, bins[1:], hist)
+                #         axs[i][j].scatter(bins[1:], hist, s=1, color='g', alpha=0.5)
+                #         # axs[i][j].plot(x_eval, fitted_model(x_eval), color='g')
+
+                #         (hist, bins) = np.histogram(self.subspace_uplift_samples[i * 4 + j], self.num_bins, density=True)
+                #         # fitter = modeling.fitting.LevMarLSQFitter()
+                #         # model = modeling.models.Gaussian1D()
+                #         # fitted_model = fitter(model, bins[1:], hist)
+                #         axs[i][j].scatter(bins[1:], hist, s=1, color='b', alpha=0.5)
+                #         # axs[i][j].plot(x_eval, fitted_model(x_eval), color='b')
+
+                #         # kde0 = stats.gaussian_kde(self.base_coherence_samples)
+                #         kde1 = stats.gaussian_kde(self.coherence_samples[i * 4 + j])
+                #         # kde2 = stats.gaussian_kde(self.subspace_uplift_samples[i * 4 + j], weights=self.subspace_uplift_weights[i])
+                #         kde2 = stats.gaussian_kde(self.subspace_uplift_samples[i * 4 + j])
+                #         # axs[i][j].plot(x_eval, kde0(x_eval), color='r')
+                #         axs[i][j].plot(x_eval, kde1(x_eval), color='g')
+                #         axs[i][j].plot(x_eval, kde2(x_eval), color='b')
+                #         # n, bins, patches = axs[i][j].hist(self.base_coherence_samples, 50, density=True, facecolor='r', alpha=0.5)
+                #         # n, bins, patches = axs[i][j].hist(self.coherence_samples[i * 4 + j], 50, density=True, facecolor='g', alpha=0.5)
+                #         # n, bins, patches = axs[i][j].hist(self.subspace_uplift_samples[i * 4 + j], 50, density=True, facecolor='b', alpha=0.5)
+                # plt.show()
+                self.epoch = 0
+        
+        return
+
+        # print('=====' + str(base_coherence))
+        # print(self.uplifts)
+        # print(self.uplift_means)
+        # print(self.uplift_medians)
+        # print(self.uplift_stddevs)
+        # print(self.uplift_ranges)
+        # print(self.uplift_convergences)
+        # print(self.subspace_uplifts)
+
+        if index >= 0:
+            self.knowns.append(index)
+            print(base_coherence)
+            print(self.knowns, self.epoch)
+            # print(self.uplift_medians)
+            # print(self.uplifts)
+            # print(self.subspace_uplifts)
+            self.add_layer()
+            self.uplifts.fill(0)
+            self.subspace_uplifts.fill(0)
+            self.uplift_medians.fill(0)
+            self.uplift_convergences.fill(0)
+            self.uplift_samples = [[] for _ in range(0, self.actual_N)]
+            self.epoch = 0
+            return
+
+        if subspace_index >= 0:
+            self.knowns.append(subspace_index)
+            print(self.knowns, self.epoch)
+            # print(self.uplifts)
+            # print(self.subspace_uplifts)
+            self.uplifts.fill(0)
+            self.subspace_uplifts.fill(0)
+            self.uplift_medians.fill(0)
+            self.uplift_convergences.fill(0)
+            self.uplift_samples = [[] for _ in range(0, self.actual_N)]
+            self.epoch = 0
+        return
+
+        # print('======')
+        # print(self.epoch, base_coherence)
+        # print('======')
+
+        # if len(self.candidate_pool) == 0:
+        #     print(self.p)
+
+        # for i in range(0, min(5, len(self.candidate_pool))):
+        #     candidate = self.candidate_pool[i]
+        #     print(candidate.id(), candidate.uplift)
+
+        # if self.epoch < 15:
+        #     return
+        
+        if self.candidate_pool[0].uplift > 0.3:
+            candidate = self.candidate_pool[0]
+            candidate_id = candidate.id()
+            self.candidate_ids.remove(candidate_id)
+            print(candidate_id)
+            self.knowns = candidate.indices
+            self.add_layer()
+            self.knowns = []
+            self.reset_p()
+            self.epoch = 0
+            self.candidate_pool = []
+            self.candidate_ids = set()
+        elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
+            self.epoch = 0
+            self.num_terms += 1
+            self.candidate_pool = []
+            self.candidate_ids = set()
+            self.knowns = []
+            self.stops = set()
+            self.reset_p()
+        return
+
+        # np.copyto(self.next_p, self.p)
+        for _ in range(0, self.num_candidates):
+            candidate = self.random_candidate()
+            if candidate is None:
+                continue
+            candidate_id = candidate.id()
+            if candidate_id in visited:
+                continue
+            visited.add(candidate_id)
+            if self.actual_N in candidate.indices:
+                continue
+            has_candidate = True
+            for i in range(0, len(self.inputs)):
+                self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
+            # coherence = self.ring_coherence()
+            coherence = self.coherence()
+            # if coherence <= base_coherence:
+            #     continue
+            # for index in candidate.indices:
+            #     self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
+                # self.p_temp[index] += 0
+            for index in candidate.indices:
+                if coherence > self.max_coherences[index]:
+                    self.max_coherences[index] = coherence
+                    self.max_candidates[index] = candidate
+                # self.max_coherences[index] = max(self.max_coherences[index], coherence)
+        # np.copyto(self.p, self.next_p)
+
+        # np.copyto(self.p_temp, self.p)
+        for i in range(0, self.actual_N):
+            candidate = self.max_candidates[i]
+            if candidate is None:
+                continue
+            for index in candidate.indices:
+                self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
+            # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
+        self.normalize_p()
+        # print(self.p)
+
+        # np.subtract(self.p_temp, self.p, self.p_temp)
+        # np.abs(self.p_temp, self.p_temp)
+        # delta = np.sum(self.p_temp) / len(self.p_temp)
+        # print(delta, np.argmax(self.p))
+        # np.copyto(self.p_temp, self.p)
+        # for i in range(0, len(self.p_temp)):
+        #     self.p_temp[i] = round(self.p_temp[i] * 100) / 100
+        # print(self.p_temp)
+
+        index = np.argmax(self.p)
+        delta_over_null = self.p[index] - self.p[self.actual_N]
+        if self.epoch == 0:
+            self.average_delta_over_null = delta_over_null
+        else:
+            self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
+        diff = self.num_terms - len(self.knowns)
+
+        print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
+
+        # Always iterate for a minimum number of epochs
+        if self.epoch < 15:
+            return
+        if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
+            return
+        if self.average_delta_over_null < 0.001:
+            index = self.actual_N
+        else:
+            index = np.argmax(self.p)
+
+        # index = np.argmax(self.p)
+        # if index == self.last_value:
+        #     self.rounds += 1
+        # else:
+        #     self.rounds = 0
+        #     self.last_value = index
+
+        # if self.rounds < 10 and self.epoch < 100:
+        #     return
+
+        # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
+        #     return
+
+        # index = np.argmax(self.p)
+        
+        # print(self.p)
+        # print(self.threshold())
+        # print(self.p)
+        # index = self.get_converged_index()
+        if not index is None or not has_candidate:
+            # print(index, delta, np.argmax(self.p))
+            self.epoch = 0
+            if index == self.actual_N or not has_candidate:
+                if len(self.knowns) > 0:
+                    self.add_stop()
+                    self.knowns.pop()
+                    print('Backtrack: ' + str(self.knowns))
+                    self.reset_p()
+                    return
+                self.num_terms += 1
+                self.knowns = []
+                self.stops = set()
+                self.reset_p()
+                print(self.num_terms)
+                return
+            self.knowns.append(index)
+            # bisect.insort(self.knowns, index)
+            if len(self.knowns) == self.num_terms:
+                print('Add layer: ' + str(self.knowns))
+                self.add_layer()
+            else:
+                print('Found term: ' + str(self.knowns))
+                self.reset_p()
+            print(base_coherence)
+            return
+
+    def cache_layers(self):
+        expr = 'def f(x):\n\tresult=0\n'
+        for layer in self.layers:
+            expr += '\tresult^=' + layer.eval_str() + '\n'
+        expr += '\treturn result\n'
+        scope = {}
+        exec(expr, scope)
+        return scope['f']
+
+def main():
+    probabilities = Probabilities()
+    # probabilities.knowns = [14]
+    # probabilities.add_layer()
+    # probabilities.knowns = [8]
+    # probabilities.add_layer()
+    # probabilities.knowns = [4]
+    # probabilities.add_layer()
+    while probabilities.num_terms <= probabilities.N:
+        probabilities.update()
+
+if __name__ == "__main__":
+    main()
--- a/mutations19.py
+++ b/mutations19.py
--- a/mutations2.py
+++ b/mutations2.py
@ -0,0 +1,570 @@
+import hashlib
+import math
+from matplotlib import offsetbox
+import numpy as np
+import random
+from struct import pack, pack_into, unpack_from
+import secrets
+
+from numpy import hamming
+
+N = 32
+M = 2
+
+def bit_at_index(buffer, index):
+    offset = (index >> 3) % len(buffer)
+    return buffer[offset] & (1 << (index & 0b111)) != 0
+
+def count_one_bits(n):
+    return bin(n).count("1")
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def encode_f(f, buffer, offset=0):
+    (inverted, flips, child) = f
+    pack_into('I', buffer, offset, inverted)
+    offset += 4
+    for index in flips:
+        pack_into('I', buffer, offset, 0)
+        offset += 4
+        pack_into('I', buffer, offset, index)
+        offset += 4
+    if child is None:
+        pack_into('I', buffer, offset, 1)
+        offset += 4
+        return offset
+    (inverted, left, right) = child
+    pack_into('I', buffer, offset, 2 if not inverted else 3)
+    offset += 4
+    offset = encode_f(left, buffer, offset)
+    offset = encode_f(right, buffer, offset)
+    return offset
+
+def generate_random_branch(p_mutation):
+    global N
+
+    p_add_indices = p_mutation * random.random()
+    p_add_children = p_mutation * random.random()
+
+    inverted = random.randint(0, 1)
+    indices = set()
+    children = []
+
+    # randomly add indices
+    while random.random() < p_add_indices and len(indices) < N:
+        available_indices = [i for i in range(0, N) if i not in indices]
+        if len(available_indices) == 1:
+            indices.add(available_indices[0])
+            continue
+        indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
+    # randomly add children
+    while random.random() < p_add_children:
+        child_inverted = random.randint(0, 1)
+        left = generate_random_branch(p_add_children)
+        right = generate_random_branch(p_add_children)
+        children.append((child_inverted, left, right))
+    return (inverted, indices, children)
+
+def mutate_f(f, p_mutation):
+    global N
+    (inverted, indices, children) = f    
+    mutated_indices = set(indices)
+    mutated_children = children[:]
+
+    p_invert = p_mutation * random.random()
+    p_drop_indices = p_mutation * random.random()
+    p_add_indices = p_mutation * random.random()
+    p_drop_children = p_mutation * random.random()
+    p_mutate_child = p_mutation * random.random()
+    p_clone_child = p_mutation * random.random()
+    p_invert_child = p_mutation * random.random()
+    p_add_children = p_mutation * random.random()
+
+    # randomly invert
+    if random.random() < p_invert:
+        inverted ^= 1
+    # randomly drop indices
+    while random.random() < p_drop_indices and len(mutated_indices) > 0: 
+        mutated_indices.pop()
+    # randomly add indices
+    while random.random() < p_add_indices and len(mutated_indices) < N:
+        available_indices = [i for i in range(0, N) if i not in mutated_indices]
+        if len(available_indices) == 1:
+            mutated_indices.add(available_indices[0])
+            continue
+        mutated_indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
+    # randomly drop children
+    while random.random() < p_drop_children and len(mutated_children) > 0:
+        if len(mutated_children) == 1:
+            del mutated_children[0]
+            break
+        del mutated_children[random.randint(0, len(mutated_children) - 1)]
+    # randomly clone children
+    while random.random() < p_clone_child and len(mutated_children) > 0:
+        index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
+        (child_inverted, left, right) = mutated_children[index]
+        if random.random() < p_invert_child:
+            child_inverted ^= 1
+        clone = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
+        mutated_children.append(clone)
+    # randomly mutate children
+    while random.random() < p_mutate_child and len(mutated_children) > 0:
+        index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
+        (child_inverted, left, right) = mutated_children[index]
+        if random.random() < p_invert_child:
+            child_inverted ^= 1
+        mutated_children[index] = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
+    # randomly add children
+    while random.random() < p_add_children:
+        child_inverted = random.randint(0, 1)
+        left = generate_random_branch(p_mutation)
+        right = generate_random_branch(p_mutation)
+        mutated_children.append((child_inverted, left, right))
+    return (inverted, mutated_indices, mutated_children)
+
+def decode_f(buffer, mutate = False, offset = 0, skip_invert = False):
+    global N
+    inverted = 0
+    if not skip_invert:
+        [inverted] = unpack_from('I', buffer, offset)
+        offset += 4
+    # random invert
+    if mutate and random.random() < 0.01:
+        inverted ^= 1
+    inverted &= 0b1
+    flips = set()
+    # random add flip
+    while mutate and random.random() < 0.5 and len(flips) < N:
+        available_indices = [i for i in range(0, N) if i not in flips]
+        if len(available_indices) == 1:
+            flips.add(available_indices[0])
+            continue
+        flips.add(available_indices[random.randint(0, len(available_indices) - 1)])
+    while offset < len(buffer):
+        # random create branch
+        if mutate and random.random() < 0.01:
+            gate_inverted = random.randint(0, 1)
+            left = generate_random_branch()
+            (offset, right) = decode_f(buffer, mutate, offset, True)
+            return (offset, (inverted, flips, (gate_inverted, left, right)))
+        [opcode] = unpack_from('I', buffer, offset)
+        offset += 4            
+        opcode &= 0b11
+        if opcode == 0:
+            [index] = unpack_from('I', buffer, offset)
+            offset += 4
+            # random skip flip
+            if mutate and random.random() < 0.01:
+                continue
+            if index in flips:
+                flips.remove(index)
+            else:
+                flips.add(index)
+        elif opcode == 1:
+            return (offset, (inverted, flips, None))
+        else:
+            (offset, left) = decode_f(buffer, mutate, offset)
+            (offset, right) = decode_f(buffer, mutate, offset)
+            gate_inverted = 0 if opcode == 2 else 1
+            # random invert
+            if mutate and random.random() < 0.01:
+                gate_inverted ^= 1
+            # random skip branch
+            if mutate and random.random() < 0.01:
+                return (offset, (inverted, flips, None))
+            return (offset, (inverted, flips, (gate_inverted, left, right)))
+    return (offset, (inverted, [], None))
+
+def generate_program(model, output_var='output'):
+    global N, M
+    (constant, indices, child) = model
+
+    statement = 'multiply(' + np.array2string(indices, separator=',') + ', x, temp)\n\t'
+    statement += output_var + '=' + str(constant) + '+sum(temp)\n\t'
+
+    if not child is None:
+        left_output = output_var + '0'
+        right_output = output_var + '1'
+        (left, right) = child
+        statement += generate_program(left, left_output)
+        statement += generate_program(right, right_output)
+        statement += output_var + '+=' + left_output + '*' + right_output + '\n\t'
+    statement += output_var + '%=' + str(M) + '\n\t'
+    return statement
+
+def compile(model):
+    program = 'def f(x, temp):\n\t' + generate_program(model) + 'return output'
+    scope = {'multiply': np.multiply, 'sum': np.sum}
+    exec(program, scope)
+    return scope['f']
+
+def evaluate(model, x, value = 0):
+    (inverted, indices, children) = model
+    for i in indices:
+        if bit_at_index(x, i) != 0:
+            value ^= 1
+    for child in children:
+        (child_inverted, left, right) = child
+        left = evaluate(left, x)
+        right = evaluate(right, x)
+        if left & right != child_inverted:
+            value ^= 1
+    if inverted:
+        value ^= 1
+    return value
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(N / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(x)
+
+def sha(v):
+    global M
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] % M
+
+def xor(x):
+    num_one_bits = 0
+    for n in x:
+        num_one_bits += count_one_bits(n)
+    return num_one_bits % 2
+
+def random_sample(m, n):
+    inputs = np.zeros((m, n))
+    for i in range(0, m):
+        for j in range(0, n):
+            inputs[i][j] = random.randint(0, 1)
+    return inputs
+
+def update_sample(sample, index):
+    global N
+    for j in range(0, N):
+        sample[index][j] = random.randint(0, 1)
+
+def coherence(inputs, outputs, scratch):
+    coherences = []
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        y_a = outputs[i]
+        numerator = 0
+        denominator = 0
+        for j in range(0, len(inputs)):
+            if i == j:
+                continue
+            x_b = inputs[j]
+            y_b = outputs[j]
+            distance = hamming_distance(x_a, x_b, scratch)
+            weight = 1.0 / (2 ** distance)
+            denominator += weight
+            if y_a == y_b:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def build_coherence_models(inputs, scratch):
+    coherence_models = []
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        distances = [hamming_distance(x_a, inputs[j], scratch) for j in range(0, len(inputs))]
+        indices = sorted(range(len(distances)), key=lambda i: distances[i])
+        lowest = -1
+        denominator = 0
+        components = []
+        for index in range(0, len(indices)):
+            j = indices[index]
+            if distances[j] == 0:
+                continue
+            if lowest < 0:
+                lowest = distances[j]
+            distance = distances[j] - lowest
+            if distance >= 8:
+                break
+            weight = 2 ** -distance
+            denominator += weight
+            components.append((weight, j))  
+        coherence_models.append((denominator, components))      
+    return coherence_models
+
+def fast_coherence(coherence_models, outputs):
+    coherences = []
+    for i in range(0, len(coherence_models)):
+        (denominator, components) = coherence_models[i]
+        numerator = 0
+        for component in components:
+            (weight, j) = component
+            if outputs[i] == outputs[j]:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def score(f, sample, distances):
+    return coherence([(x, f(x) ^ y) for (x, y) in sample], distances)
+
+def compute_distances(inputs, distances, scratch):
+    for i in range(0, len(inputs)):
+        a = inputs[i]
+        for j in range(i, len(inputs)):
+            if i == j:
+                distances[i][j] = 0
+                continue
+            b = inputs[j]
+            distance = 2 ** -hamming_distance(a, b, scratch)
+            distances[i][j] = distance
+            distances[j][i] = distance
+
+def update_distances(inputs, distances, i, scratch):
+    a = inputs[i]
+    for j in range(0, len(inputs)):
+        if i == j:
+            distances[i][j] = 0
+            continue
+        b = inputs[j]
+        distance = 2 ** -hamming_distance(a, b, scratch)
+        distances[i][j] = distance
+        distances[j][i] = distance
+
+def evaluate_sample(model, sample, output):
+    stack = [model]
+    (_, _, _, root_scratch, _) = model
+    while len(stack) > 0:
+        layer = stack.pop()
+        (inverted, xors, child, scratch, touched) = layer
+        if child is None:
+            np.matmul(sample, xors, scratch)
+            np.mod(scratch, 2, scratch)
+            if inverted == 1:
+                np.logical_xor(1, scratch, scratch)
+            touched[0] = 1
+        else:
+            (child_inverted, left, right) = child
+            (_, _, _, left_scratch, left_touched) = left
+            (_, _, _, right_scratch, right_touched) = right
+            if left_touched[0] and right_touched[0]:
+                np.multiply(left_scratch, right_scratch, output)
+                np.matmul(sample, xors, scratch)
+                np.mod(scratch, 2, scratch)
+                if inverted:
+                    np.logical_xor(scratch, 1, scratch)
+                if child_inverted:
+                    np.logical_xor(output, 1, output)
+                np.logical_xor(scratch, output, scratch)
+                touched[0] = 1
+            else:
+                stack.insert(0, layer)
+                stack.insert(0, left)
+                stack.insert(0, right)
+    np.copyto(output, root_scratch)
+    reset_model(model)
+
+def reset_model(model):
+    stack = [model]
+    while len(stack) > 0:
+        layer = stack.pop()
+        (_, _, child, _, touched) = layer
+        touched[0] = 0
+        if not child is None:
+            (_, left, right) = child
+            stack.append(left)
+            stack.append(right)
+
+def clone_model(model, p_mutation):
+    global N, M
+
+    p_constant = p_mutation * random.random()
+    p_flip = p_mutation * random.random()
+    p_add_child = p_mutation * random.random()
+    p_drop_child = p_mutation * random.random()
+
+    (constant, xors, child) = model
+    if random.random() < p_constant:
+        constant += random.randint(0, M - 1)
+        constant %= M
+    clone_xors = np.zeros((N,))
+    np.copyto(clone_xors, xors)
+    for i in range(0, N):
+        if random.random() < p_flip:
+            offset = 1 if M == 2 else random.randint(1, M - 1)
+            clone_xors[i] += offset
+            clone_xors[i] %= M
+    if child is None:
+        if random.random() < p_add_child:
+            left = random_child(p_mutation)
+            right = random_child(p_mutation)
+            return (constant, clone_xors, (left, right))
+        return (constant, clone_xors, None)
+    if random.random() < p_drop_child:
+        return (constant, clone_xors, None)
+    (left, right) = child
+    clone_left = clone_model(left, p_mutation)
+    clone_right = clone_model(right, p_mutation)
+    return (constant, clone_xors, (clone_left, clone_right))
+
+def random_child(p_mutation):
+    global N, M
+    constant = random.randint(0, M - 1)
+    xors = np.zeros((N,))
+
+    p_flip = p_mutation * random.random()
+    p_child = p_mutation * random.random()
+
+    index = random.randint(0, N - 1)
+    xors[index] = 1 if M == 2 else random.randint(1, M - 1)
+    for i in range(0, N):
+        if i != index and random.random() < p_flip:
+            xors[i] = 1 if M == 2 else random.randint(1, M - 1)
+    # if random.random() < p_child:
+    #     left = random_child(p_mutation * random.random())
+    #     right = random_child(p_mutation * random.random())
+    #     return (constant, xors, (left, right))
+    return (constant, xors, None)
+
+def null_candidate():
+    global N
+    return (0, np.zeros((N,)), None)
+
+def size(model):
+    (_, xors, child) = model
+    xor_size = np.sum(xors)
+    if not child is None:
+        (left, right) = child
+        return xor_size + size(left) * size(right)
+    return xor_size
+
+def main():
+    global N, M
+    epochs = 10000
+    num_survivors = 100
+    num_offspring = 10
+    num_candidates = num_survivors + num_survivors * num_offspring
+    sample_size = 128
+    eval_size = 100
+    p_mutation = 0.5
+    g = sha
+    current_generation = [null_candidate() for _ in range(0, num_candidates)]
+
+    distances = np.zeros((sample_size, sample_size))
+    output_equality = np.zeros((sample_size, sample_size))
+    inputs = random_sample(sample_size, N)
+    scratch = np.zeros(N,)
+    # compute_distances(inputs, distances, scratch)
+    expected_outputs = np.zeros((sample_size,))
+    for i in range(0, sample_size):
+        expected_outputs[i] = g(inputs[i])
+    outputs = np.zeros((sample_size,))
+    output_xor = np.zeros((sample_size,))
+    ones = np.ones((sample_size,))
+    numerators = np.zeros((sample_size,))
+    denominators = np.zeros((sample_size,))
+    coherences = np.zeros((sample_size,))
+    np.matmul(ones, distances, denominators)
+    scores = np.zeros((num_candidates,))
+    max_score = 0
+    last_score = 0
+    streak = 0
+
+    coherence_models = build_coherence_models(inputs, scratch)
+
+    for epoch in range(0, epochs):
+        for i in range(0, num_candidates):
+            candidate = current_generation[i]
+            f = compile(candidate)
+            for j in range(0, sample_size):
+                outputs[j] = f(inputs[j], scratch)
+            np.subtract(outputs, expected_outputs, output_xor)
+            np.mod(output_xor, M, output_xor)
+            # for p in range(0, sample_size):
+            #     for q in range(0, sample_size):
+            #         m = int(output_xor[p])
+            #         n = int(output_xor[q])
+            #         distance = abs(m - n)
+            #         if distance > M / 2:
+            #             distance = M - distance
+            #         distance /= (M / 2)
+            #         distance **= 2
+            #         output_equality[p][q] = distance
+            #         # output_equality[p][q] = 1 if m == n else 0
+            # np.multiply(output_equality, distances, output_equality)
+            # np.matmul(ones, output_equality, numerators)
+            # np.divide(numerators, denominators, coherences)
+            # score = np.average(coherences)
+            score = fast_coherence(coherence_models, output_xor)
+            # if random.random() < 0.1:
+            #     check = coherence(inputs, output_xor, scratch)
+            #     if check - score > 1e-3:
+            #         print('not equal')
+            scores[i] = score
+
+        top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
+        survivors = [current_generation[index] for index in top_n]
+
+        # f = lambda x: evaluate(current_generation[0], x)
+        # correct = 0
+        # for i in range(0, eval_size):
+        #     x = random_input()
+        #     if f(x) == g(x):
+        #         correct += 1
+
+        top_score = scores[top_n[-1]]
+        print(epoch, top_score, size(survivors[-1]))
+        if top_score <= max_score:
+            p_mutation += 0.01
+        else:
+            p_mutation = 0.5
+            max_score = top_score
+
+        for i in range(0, num_survivors):
+            current_generation[i] = survivors[i]
+
+        for i in range(0, num_survivors):
+            candidate = survivors[i]
+            for j in range(0, num_offspring):
+                index = num_survivors + j * num_survivors + i
+                current_generation[index] = clone_model(candidate, random.random())
+        
+        # inputs = random_sample(sample_size, N)
+        # coherence_models = build_coherence_models(inputs, scratch)
+        # for i in range(0, sample_size):
+        #     expected_outputs[i] = g(inputs[i])
+
+        # while random.random() < 0.5:
+        if last_score == top_score:
+            streak += 1
+        else:
+            streak = 0
+        if streak >= 4:
+            inputs = random_sample(sample_size, N)
+            coherence_models = build_coherence_models(inputs, scratch)
+            for i in range(0, sample_size):
+                expected_outputs[i] = g(inputs[i])
+        #     inputs = random_sample(sample_size, N)
+        #     coherence_models = build_coherence_models(inputs, scratch)
+        # #     compute_distances(inputs, distances, scratch)
+        # #     np.matmul(ones, distances, denominators)
+        #     for i in range(0, sample_size):
+        #         expected_outputs[i] = g(inputs[i])
+        #     streak = 0
+        # expected_outputs = np.zeros((sample_size,))
+        # for i in range(0, sample_size):
+        #     expected_outputs[i] = g(inputs[i])
+            # index = random.randint(0, sample_size - 1)
+            # update_sample(inputs, index)
+            # expected_outputs[index] = g(inputs[index])
+            # update_distances(inputs, distances, index, scratch)
+            # np.matmul(ones, distances, denominators)
+        last_score = top_score
+
+if __name__ == "__main__":
+    main()
--- a/mutations20.py
+++ b/mutations20.py
@ -0,0 +1,316 @@
+import numpy as np
+import random
+import hashlib
+import math
+
+def get_state_id(state):
+    return ','.join([str(x) for x in sorted(state)])
+
+class Point():
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+    def id(self):
+        return ''.join([str(x) for x in self.x])
+
+class Influence():
+    def __init__(self, a, b):
+        self.a = a
+        self.b = b
+        self.original_dof = set()
+        self.dof = set()
+        for i in range(0, len(a.x)):
+            if a.x[i] != b.x[i]:
+                self.original_dof.add(i)
+                self.dof.add(i)
+        self.coherent = self.a.y == self.b.y
+
+    def coherent(self):
+        return self.a.y == self.b.y
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def hamming_distance(a, b, flips):
+    distance = 0
+    for i in range(0, len(a.x)):
+        if i in flips:
+            continue
+        distance += 1 if a.x[i] != b.x[i] else 0
+    return distance
+
+def random_x(N):
+    x = np.zeros((N))
+    for i in range(0, N):
+        x[i] = random.randint(0, 1)
+    return x
+
+def xor(x):
+    return np.sum(x[16:]) % 2
+
+def create_dof_map(influences):
+    dof_map = {}
+    for influence in influences:
+        for i in influence.dof:
+            if not i in dof_map:
+                dof_map[i] = []
+            dof_map[i].append(influence)
+    return dof_map
+
+def flip(influences, i):
+    for influence in influences:
+        if i in influence.dof:
+            influence.a.y = int(influence.a.y) ^ 1
+
+def remove_dof(dof_map, i, flip = False):
+    for influence in dof_map[i]:
+        influence.dof.remove(i)
+        if flip:
+            influence.a.y = int(influence.a.y) ^ 1
+        # if len(influence.dof) == 0 and not influence.coherent():
+        #     raise Exception('Invalid')
+    del dof_map[i]
+
+def solve(dof_map, all_influences, all_samples):
+    eliminated = True
+    while eliminated:
+        eliminated = False
+        for influence in all_influences:
+            if len(influence.dof) == 1:
+                i = next(iter(influence.dof))
+                if influence.coherent:
+                    remove_dof(dof_map, i)
+                    eliminated = True
+                else:
+                    print('Forced', i)
+                    remove_dof(dof_map, i, True)
+                    eliminated = True
+                
+    lowest_dof = None
+    for influence in all_influences:
+        if not influence.coherent and len(influence.dof) > 1:
+            if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
+                lowest_dof = influence
+
+    flip = None
+    highest_score = -1
+
+    for i in lowest_dof.dof:
+        per_point_scores = {}
+        i_influences = dof_map[i]
+        left = 0
+        right = 0
+        for influence in i_influences:
+            if not influence.a in per_point_scores:
+                per_point_scores[influence.a] = [0, 0]
+            if not influence.b in per_point_scores:
+                per_point_scores[influence.b] = [0, 0]
+            if influence.coherent:
+                per_point_scores[influence.a][0] += 1
+                per_point_scores[influence.b][0] += 1
+                left += 1
+            else:
+                per_point_scores[influence.a][1] += 1
+                per_point_scores[influence.b][1] += 1
+                right += 1
+        print(i, left / (left + right))
+        num = 0
+        denom = 0
+        for _, score in per_point_scores.items():
+            if score[0] == score[1]:
+                continue
+            print(i, score)
+            num += score[1] / (score[0] + score[1])
+            denom += 1
+        score = num / denom if denom > 0 else 0
+        print(score)
+    
+    return None
+
+        
+    #     score_sum = 0
+    #     for j, j_influences in dof_map.items():
+    #         if j in lowest_dof.dof:
+    #             continue
+    #         double_score = 0
+    #         double_totals = [0, 0, 0, 0, 0, 0]
+    #         for influence in i_influences:
+    #             if influence in j_influences:
+    #                 weight = 1.0 / ((len(influence.dof) - 2) ** 2)
+    #                 if influence.coherent:
+    #                     double_score += weight
+    #                     double_totals[0] += 1
+    #                 else:
+    #                     double_score -= weight
+    #                     double_totals[3] += 1
+    #             else:
+    #                 weight = 1.0 / ((len(influence.dof) - 1) ** 2)
+    #                 if influence.coherent:
+    #                     double_score -= weight
+    #                     double_totals[4] += 1
+    #                 else:
+    #                     double_score += weight
+    #                     double_totals[1] += 1
+    #         for influence in j_influences:
+    #             if influence in i_influences:
+    #                 continue
+    #             weight = 1.0 / ((len(influence.dof) - 1) ** 2)
+    #             if influence.coherent:
+    #                 double_score -= weight
+    #                 double_totals[5] += 1
+    #             else:
+    #                 double_score += weight
+    #                 double_totals[2] += 1
+                    
+    #         score = double_score
+    #         score_sum += score
+    #         # print((i, j), score, single_totals, double_totals)
+
+    #     if flip is None or score_sum > highest_score:
+    #         highest_score = score_sum
+    #         flip = [i]
+    #     print(i, score_sum)
+
+    # if flip is None:
+    #     return None
+    # print('Chose', flip, 'from', lowest_dof.dof, highest_score)
+    # for i in flip:
+    #     remove_dof(dof_map, i, True)
+    # return flip
+
+def main():
+    N = 32
+    sample_size = 32
+    p_dist = np.ones(N)
+    p_dist.fill(0.5)
+    epoch = 0
+
+    while True:
+        sample_ids = set()
+        samples = []
+
+        for i in range(0, sample_size):
+            x = random_x(N)
+            y = int(sha(x))
+            p = Point(x, y)
+            p_id = p.id()
+            if p_id in sample_ids:
+                continue
+            sample_ids.add(p_id)
+            samples.append(p)
+
+        influences = []
+        for i in range(0, len(samples)):
+            a = samples[i]
+            for j in range(i + 1, len(samples)):
+                b = samples[j]
+                influences.append(Influence(a, b))
+
+        visited = set()
+        state = []
+
+        iterations = 0
+        while sum([0 if influence.coherent else 1 for influence in influences]) > 0:
+            # if iterations > 5000:
+            #     state = []
+            #     break
+            iterations += 1
+            # print(state)
+            lowest_dof = None
+            num_influences = -1
+            for influence in influences:
+                if influence.coherent:
+                    continue
+
+                if lowest_dof is not None and len(influence.dof) >= num_influences:
+                    continue
+
+                has_unvisited_state = False
+                for i in influence.dof:
+                    state_id = get_state_id(state + [i])
+                    if state_id not in visited:
+                        has_unvisited_state = True
+                        break
+
+                if not has_unvisited_state:
+                    continue
+
+                if lowest_dof is None or len(influence.dof) < num_influences:
+                    lowest_dof = influence
+                    num_influences = len(influence.dof)
+
+            added = False
+            if lowest_dof is not None:
+                valid_choices = []
+                for i in lowest_dof.dof:
+                    state_id = get_state_id(state + [i])
+                    if state_id in visited:
+                        continue
+                    valid_choices.append(i)
+
+                if len(valid_choices) > 0:
+                    i = valid_choices[0]
+                    if len(valid_choices) > 1:
+                        p_partial = np.zeros(len(valid_choices))
+                        index = 0
+                        for j in valid_choices:
+                            p_partial[index] = p_dist[j]
+                        np.divide(p_partial, np.sum(p_partial), p_partial)
+                        i = np.random.choice(valid_choices, p=p_partial)
+
+                    state_id = get_state_id(state + [i])
+                    visited.add(state_id)
+                    state.append(i)
+                    added = True
+
+            revert = False
+            if added:
+                i = state[-1]
+                for influence in influences:
+                    if i in influence.dof:
+                        if len(influence.dof) == 1 and influence.coherent:
+                            revert = True
+                        influence.coherent = not influence.coherent
+                        influence.dof.remove(i)
+
+            if revert or not added:
+                if len(state) == 0:
+                    break
+                i = state.pop(random.randrange(len(state)))
+                for influence in influences:
+                    if i in influence.original_dof and not i in influence.dof:
+                        influence.coherent = not influence.coherent
+                        influence.dof.add(i)
+        
+        if len(state) > 0:
+            epoch += 1
+            p_dist -= 0.0001 * (sample_size ** 2)
+            for i in state:
+                p_dist[i] += 0.0002 * (sample_size ** 2)
+            # sample_size += 1
+            print(p_dist)
+        else:
+            # sample_size -= 1
+            pass
+
+
+if __name__ == "__main__":
+    main()
--- a/mutations21.py
+++ b/mutations21.py
@ -0,0 +1,368 @@
+from cmath import isnan
+import numpy as np
+import random
+import hashlib
+import math
+
+def get_state_id(state):
+    return ','.join([str(x) for x in sorted(state)])
+
+class Point():
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+    def id(self):
+        return ','.join([str(int(x)) for x in self.x])
+
+class Influence():
+    def __init__(self, a, b):
+        self.a = a
+        self.b = b
+        self.original_dof = set()
+        self.dof = set()
+        for i in range(0, len(a.x)):
+            if a.x[i] != b.x[i]:
+                self.original_dof.add(i)
+                self.dof.add(i)
+
+    def coherent(self):
+        return self.a.y == self.b.y
+
+    def id(self):
+        return ','.join(sorted([self.a.id(), self.b.id()]))
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def decode(x, N):
+    index = 0
+    output = np.zeros((N))
+    while x > 0 and index < N:
+        output[index] = x & 0b1
+        x >>= 1
+        index += 1
+    return output
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def hamming_distance(a, b):
+    return np.sum(np.logical_xor(a.x, b.x))
+
+def random_x(N):
+    x = np.zeros((N))
+    for i in range(0, N):
+        x[i] = random.randint(0, 1)
+    return x
+
+def xor(x):
+    # return sum(x[:4]) % 2
+    return sum(x) % 2
+
+def create_dof_map(influences):
+    dof_map = {}
+    for influence in influences:
+        for i in influence.dof:
+            if not i in dof_map:
+                dof_map[i] = []
+            dof_map[i].append(influence)
+    return dof_map
+
+def flip(influences, i):
+    for influence in influences:
+        if i in influence.dof:
+            influence.a.y = int(influence.a.y) ^ 1
+
+def remove_dof(dof_map, i, flip = False):
+    for influence in dof_map[i]:
+        influence.dof.remove(i)
+        if flip:
+            influence.a.y = int(influence.a.y) ^ 1
+        # if len(influence.dof) == 0 and not influence.coherent():
+        #     raise Exception('Invalid')
+    del dof_map[i]
+
+def solve(dof_map, all_influences, all_samples):
+    eliminated = True
+    while eliminated:
+        eliminated = False
+        for influence in all_influences:
+            if len(influence.dof) == 1:
+                i = next(iter(influence.dof))
+                if influence.coherent:
+                    remove_dof(dof_map, i)
+                    eliminated = True
+                else:
+                    print('Forced', i)
+                    remove_dof(dof_map, i, True)
+                    eliminated = True
+                
+    lowest_dof = None
+    for influence in all_influences:
+        if not influence.coherent and len(influence.dof) > 1:
+            if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
+                lowest_dof = influence
+
+    flip = None
+    highest_score = -1
+
+    for i in lowest_dof.dof:
+        per_point_scores = {}
+        i_influences = dof_map[i]
+        left = 0
+        right = 0
+        for influence in i_influences:
+            if not influence.a in per_point_scores:
+                per_point_scores[influence.a] = [0, 0]
+            if not influence.b in per_point_scores:
+                per_point_scores[influence.b] = [0, 0]
+            if influence.coherent:
+                per_point_scores[influence.a][0] += 1
+                per_point_scores[influence.b][0] += 1
+                left += 1
+            else:
+                per_point_scores[influence.a][1] += 1
+                per_point_scores[influence.b][1] += 1
+                right += 1
+        print(i, left / (left + right))
+        num = 0
+        denom = 0
+        for _, score in per_point_scores.items():
+            if score[0] == score[1]:
+                continue
+            print(i, score)
+            num += score[1] / (score[0] + score[1])
+            denom += 1
+        score = num / denom if denom > 0 else 0
+        print(score)
+    
+    return None
+
+
+# 1st row (n+1 choose k+1) * (1-(k mod 2))
+# psuedopascal to compute the follow-on rows
+# assuming solvability, we want to maximize the probability that our current state and our state with
+# a particular single flip are one order apart in the correct direction
+
+
+
+# 2, 0
+# 2, 2, 0
+# 2, 4, 2, 0
+# 2, 6, 6, 2, 0
+# 2, 8,12, 8, 2, 0
+# 2,10,20,20,10, 2, 0
+
+# 3,-9,19,-33,51,-73,99
+# 3,-6,10,-14,18,-22,26
+# 3,-3, 4, -4, 4, -4, 4
+# 3, 0, 1,  0, 0,  0, 0
+# 3, 3, 1,  1, 0,  0, 0
+# 3, 6, 4,  2, 1,  0, 0
+# 3, 9,10,  6, 3,  1, 0
+
+#       4, 0, 4, 0
+#     4, 4, 4, 4, 0
+#   4, 8, 8, 8, 4, 0
+# 4,12,16,16,12, 4, 0
+
+#       5, 0,10, 0, 1
+#     5, 5,10,10, 1, 1
+#   5,
+# 5,
+
+
+
+# 3
+#
+# @1 [1, 2, 1]
+# @2 [2, 2, 0]
+# @3 [3, 0, 1]
+
+# 5  [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)
+#
+# @1 [1, 4, 6,  4, 1], [4, 6,  4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),
+# @2 [2, 6, 6,  2, 0], [3, 4,  4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)
+# @3 [3, 6, 4,  2, 1], [2, 4,  6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)
+# @4 [4, 4, 4,  4, 0], [1, 6,  6, 1, 1] - 16, 15 - 
+# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 - 
+
+# @0 [0.0, 0.0, 0.0, 0.0, 0.0]
+# @1 [0.2, 0.4, 0.6, 0.8, 1.0]
+# @2 [0.4, 0.6, 0.6, 0.4, 0.0]
+# @3 [0.6, 0.6, 0.4, 0.4, 1.0]
+# @4 [0.8, 0.4, 0.4, 0.8, 0.0]
+# @5 [1.0, 0.0, 1.0, 0.0, 1.0]
+
+# 6
+#
+# @1 [1, 5, 10, 10, 5, 1]
+# @2 [2, 8, 12, 8,  2, 0]
+# @3 [3, 9, 10, 6,  3, 1]
+# @4 [4, 8, 8,  8,  4, 0]
+# @5 [5, 5, 10, 10, 1, 1]
+# @6 [6, 0, 20, 0,  6, 0]
+
+# last row, 1 if odd, 0 if even
+# second to last, subtract 2 on odds, add 2 on evens
+
+def compute_distributions(N):
+    dist = np.zeros((N, N))
+    for j in range(0, N):
+        dist[0][j] = math.comb(N - 1, j)
+        dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
+    for i in range(1, N):
+        for j in range(0, i + 1):
+            dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
+        for k in range(i + 1, N):
+            for j in reversed(range(0, k)):
+                dist[i][j+1] = dist[i][j] + dist[i][j+1]
+    for i in range(0, N):
+        for j in range(0, N):
+            denom = math.comb(N, j+1)
+            dist[i][j] /= denom
+    return dist
+            
+
+def main():
+    N = 32
+    sample_size = 2048
+    sample_ids = set()
+    samples = []
+
+    dist = compute_distributions(N)
+    print(dist)
+
+    for i in range(0, sample_size):
+        x = random_x(N)
+        y = int(xor(x))
+        p = Point(x, y)
+        p_id = p.id()
+        if p_id in sample_ids:
+            continue
+        sample_ids.add(p_id)
+        samples.append(p)
+
+    # for i in range(0, 2**N):
+    #     x = decode(i, N)
+    #     y = int(xor(x))
+    #     samples.append(Point(x,y))
+
+    base = np.zeros(N)
+    current = np.zeros(N)
+
+    for _ in range(0, N):
+        lowest_err = -1
+        use_flip = -1
+        for flip in range(-1, N):
+            coherent_distances = {}
+            incoherent_distances = {}
+            all_coherent = True
+            for i in range(0, len(samples)):
+                a = samples[i]
+                for j in range(i + 1, len(samples)):
+                    # if i == j:
+                    #     continue
+                    b = samples[j]
+                    distance = hamming_distance(a, b)
+                    if distance not in coherent_distances:
+                        coherent_distances[distance] = 0
+                    if distance not in incoherent_distances:
+                        incoherent_distances[distance] = 0
+                    is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)
+                    if is_coherent:
+                        coherent_distances[distance] += 1
+                    else:
+                        incoherent_distances[distance] += 1
+                        all_coherent = False
+            if all_coherent:
+                print('Flip and halt', flip)
+                return
+            # print(coherent_distances, incoherent_distances)
+
+            for k in range(0, N):
+                known_incoherence_at_k = dist[k]
+                err = 0
+                # denom = 0
+                for i in range(0, N):
+                    if i not in coherent_distances:
+                        continue
+                    est_incoherence = incoherent_distances[i] / (coherent_distances[i] + incoherent_distances[i])
+                    confidence = 1.0
+                    # print(k, i, est_incoherence)
+                    err += confidence * abs(est_incoherence - known_incoherence_at_k[i - 1])# / ((est_incoherence + known_incoherence_at_k[i - 1]) / 2)
+                    # denom += 1
+                # print(flip, k, err)
+                # err /= denom
+                if flip < 0:
+                    base[k] = err
+                else:
+                    current[k] = err
+            if flip >= 0:
+                # np.divide(current, np.max(current), current)
+                # print(flip, current)
+                index = -1
+                base_sum = 0
+                current_sum = 0
+                base_total = 0
+                current_total = 0
+                for k in range(0, N):
+                    if base[k] > 0:
+                        base_sum += k / base[k]
+                        base_total += 1.0 / base[k]
+                    else:
+                        base_sum += k * 1e6
+                        base_total += 1e6
+                    if current[k] > 0:
+                        current_sum += k / current[k]
+                        current_total += 1.0 / current[k]
+                    else:
+                        current_sum += k * 1e6
+                        current_total += 1e6
+                # print(base_sum, base_total, current_sum, current_total)
+                # print(current_sum / current_total, base_sum / base_total)
+                rel_to_base = (current_sum / current_total) - (base_sum / base_total)
+                
+                # print(base_sum, base_total)
+                # print(base_sum / base_total, current_sum / current_total)
+
+                # for k in range(0, N - 2):
+                #     # err = base[k + 1] * current[k] * 1.0 / (base[k + 1] * current[k + 2])
+                #     err = base[k + 1] * current[k]
+                #     if rel_to_base < 0 or err < rel_to_base:
+                #         rel_to_base = err
+                #         index = k
+
+                if use_flip < 0 or rel_to_base < lowest_err:
+                    lowest_err = rel_to_base
+                    use_flip = flip
+                print(flip, rel_to_base)
+            else:
+                pass
+                # np.divide(base, np.max(base), base)
+                # print(flip, base)
+    
+        if lowest_err > 0:
+            return
+        print('Flip', use_flip, lowest_err)
+        for p in samples:
+            if p.x[use_flip]:
+                p.y ^= 1
+
+if __name__ == "__main__":
+    main()
--- a/mutations22.py
+++ b/mutations22.py
@ -0,0 +1,405 @@
+from cmath import isnan
+import numpy as np
+import random
+import hashlib
+import math
+
+def get_state_id(state):
+    return ','.join([str(x) for x in sorted(state)])
+
+class Point():
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+    def id(self):
+        return ','.join([str(int(x)) for x in self.x])
+
+class Influence():
+    def __init__(self, a, b):
+        self.a = a
+        self.b = b
+        self.original_dof = set()
+        self.dof = set()
+        for i in range(0, len(a.x)):
+            if a.x[i] != b.x[i]:
+                self.original_dof.add(i)
+                self.dof.add(i)
+
+    def coherent(self):
+        return self.a.y == self.b.y
+
+    def id(self):
+        return ','.join(sorted([self.a.id(), self.b.id()]))
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def decode(x, N):
+    index = 0
+    output = np.zeros((N))
+    while x > 0 and index < N:
+        output[index] = x & 0b1
+        x >>= 1
+        index += 1
+    return output
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def hamming_distance(a, b):
+    return np.sum(np.logical_xor(a.x, b.x))
+
+def random_x(N):
+    x = np.zeros((N))
+    for i in range(0, N):
+        x[i] = random.randint(0, 1)
+    return x
+
+def xor(x):
+    # return sum(x[:4]) % 2
+    return sum(x) % 2
+
+def create_dof_map(influences):
+    dof_map = {}
+    for influence in influences:
+        for i in influence.dof:
+            if not i in dof_map:
+                dof_map[i] = []
+            dof_map[i].append(influence)
+    return dof_map
+
+def flip(influences, i):
+    for influence in influences:
+        if i in influence.dof:
+            influence.a.y = int(influence.a.y) ^ 1
+
+def remove_dof(dof_map, i, flip = False):
+    for influence in dof_map[i]:
+        influence.dof.remove(i)
+        if flip:
+            influence.a.y = int(influence.a.y) ^ 1
+        # if len(influence.dof) == 0 and not influence.coherent():
+        #     raise Exception('Invalid')
+    del dof_map[i]
+
+def solve(dof_map, all_influences, all_samples):
+    eliminated = True
+    while eliminated:
+        eliminated = False
+        for influence in all_influences:
+            if len(influence.dof) == 1:
+                i = next(iter(influence.dof))
+                if influence.coherent:
+                    remove_dof(dof_map, i)
+                    eliminated = True
+                else:
+                    print('Forced', i)
+                    remove_dof(dof_map, i, True)
+                    eliminated = True
+                
+    lowest_dof = None
+    for influence in all_influences:
+        if not influence.coherent and len(influence.dof) > 1:
+            if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
+                lowest_dof = influence
+
+    flip = None
+    highest_score = -1
+
+    for i in lowest_dof.dof:
+        per_point_scores = {}
+        i_influences = dof_map[i]
+        left = 0
+        right = 0
+        for influence in i_influences:
+            if not influence.a in per_point_scores:
+                per_point_scores[influence.a] = [0, 0]
+            if not influence.b in per_point_scores:
+                per_point_scores[influence.b] = [0, 0]
+            if influence.coherent:
+                per_point_scores[influence.a][0] += 1
+                per_point_scores[influence.b][0] += 1
+                left += 1
+            else:
+                per_point_scores[influence.a][1] += 1
+                per_point_scores[influence.b][1] += 1
+                right += 1
+        print(i, left / (left + right))
+        num = 0
+        denom = 0
+        for _, score in per_point_scores.items():
+            if score[0] == score[1]:
+                continue
+            print(i, score)
+            num += score[1] / (score[0] + score[1])
+            denom += 1
+        score = num / denom if denom > 0 else 0
+        print(score)
+    
+    return None
+
+
+# 1st row (n+1 choose k+1) * (1-(k mod 2))
+# psuedopascal to compute the follow-on rows
+# assuming solvability, we want to maximize the probability that our current state and our state with
+# a particular single flip are one order apart in the correct direction
+
+
+
+# 2, 0
+# 2, 2, 0
+# 2, 4, 2, 0
+# 2, 6, 6, 2, 0
+# 2, 8,12, 8, 2, 0
+# 2,10,20,20,10, 2, 0
+
+# 3,-9,19,-33,51,-73,99
+# 3,-6,10,-14,18,-22,26
+# 3,-3, 4, -4, 4, -4, 4
+# 3, 0, 1,  0, 0,  0, 0
+# 3, 3, 1,  1, 0,  0, 0
+# 3, 6, 4,  2, 1,  0, 0
+# 3, 9,10,  6, 3,  1, 0
+
+#       4, 0, 4, 0
+#     4, 4, 4, 4, 0
+#   4, 8, 8, 8, 4, 0
+# 4,12,16,16,12, 4, 0
+
+#       5, 0,10, 0, 1
+#     5, 5,10,10, 1, 1
+#   5,
+# 5,
+
+
+
+# 3
+#
+# @1 [1, 2, 1]
+# @2 [2, 2, 0]
+# @3 [3, 0, 1]
+
+# 5  [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)
+#
+# @1 [1, 4, 6,  4, 1], [4, 6,  4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),
+# @2 [2, 6, 6,  2, 0], [3, 4,  4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)
+# @3 [3, 6, 4,  2, 1], [2, 4,  6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)
+# @4 [4, 4, 4,  4, 0], [1, 6,  6, 1, 1] - 16, 15 - 
+# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 - 
+
+# @0 [0.0, 0.0, 0.0, 0.0, 0.0]
+# @1 [0.2, 0.4, 0.6, 0.8, 1.0]
+# @2 [0.4, 0.6, 0.6, 0.4, 0.0]
+# @3 [0.6, 0.6, 0.4, 0.4, 1.0]
+# @4 [0.8, 0.4, 0.4, 0.8, 0.0]
+# @5 [1.0, 0.0, 1.0, 0.0, 1.0]
+
+# 6
+#
+# @1 [1, 5, 10, 10, 5, 1]
+# @2 [2, 8, 12, 8,  2, 0]
+# @3 [3, 9, 10, 6,  3, 1]
+# @4 [4, 8, 8,  8,  4, 0]
+# @5 [5, 5, 10, 10, 1, 1]
+# @6 [6, 0, 20, 0,  6, 0]
+
+# last row, 1 if odd, 0 if even
+# second to last, subtract 2 on odds, add 2 on evens
+
+def compute_distributions(N):
+    dist = np.zeros((N, N))
+    for j in range(0, N):
+        dist[0][j] = math.comb(N - 1, j)
+        dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
+    for i in range(1, N):
+        for j in range(0, i + 1):
+            dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
+        for k in range(i + 1, N):
+            for j in reversed(range(0, k)):
+                dist[i][j+1] = dist[i][j] + dist[i][j+1]
+    print(dist)
+    for i in range(0, N):
+        for j in range(0, N):
+            denom = math.comb(N, j+1)
+            dist[i][j] /= denom
+    return dist
+
+def raised_cosine(x, u, s):
+    if x < (u - s):
+        return 0
+    if x > (u + s):
+        return 0
+    return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s))
+
+def average_index(x):
+    total = 0
+    for k in range(0, len(x)):
+        total += k * x[k]
+    return total / np.sum(x)
+
+# 8, 32,    2^5
+# 10, 64,   2^6
+# 12, 128,  2^7
+# 14, 256,  2^8
+# 16, 512,  2^9
+# 18, 1024, 2^10
+# 20, 2048, 2^11
+# 22, 4096, 2^12
+def main():
+    N = 16
+    sample_size = 128
+    sample_ids = set()
+    samples = []
+
+    dist = compute_distributions(N)
+    print(dist)
+
+    for i in range(0, sample_size):
+        x = random_x(N)
+        y = int(xor(x))
+        p = Point(x, y)
+        p_id = p.id()
+        if p_id in sample_ids:
+            continue
+        sample_ids.add(p_id)
+        samples.append(p)
+    total_sample_count = len(samples)
+
+    # for i in range(0, 2**N):
+    #     x = decode(i, N)
+    #     y = int(xor(x))
+    #     samples.append(Point(x,y))
+
+    base = np.zeros(N)
+    current = np.zeros(N)
+    cumulative_probability = np.ones(N)
+
+    for _ in range(0, N):
+        lowest_err = -1
+        use_flip = -1
+        for flip in range(-1, N):
+            coherent_distances = np.zeros(N+1)
+            incoherent_distances = np.zeros(N+1)
+            all_coherent = True
+            for i in range(0, len(samples)):
+                a = samples[i]
+                for j in range(0, len(samples)):
+                    b = samples[j]
+                    distance = hamming_distance(a, b)
+                    is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)
+                    if is_coherent:
+                        coherent_distances[distance] += 1
+                    else:
+                        incoherent_distances[distance] += 1
+                        all_coherent = False
+            if all_coherent:
+                print('Flip and halt', flip)
+                return
+            # print(coherent_distances, incoherent_distances)
+
+            # print(coherent_distances, incoherent_distances)
+            est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances))
+            # print(est_incoherence)
+
+            for k in range(0, N):
+                known_incoherence_at_k = dist[k]
+                err = 0
+                # denom = 0
+                probability = 1.0
+                for i in range(1, N + 1):
+                    if isnan(est_incoherence[i]):
+                        continue
+                    sample_size = coherent_distances[i] + incoherent_distances[i]
+                    full_size = math.comb(N, i) * (2 ** N)
+                    num_unknowns = full_size - sample_size
+                    min_true_value = incoherent_distances[i] / full_size
+                    max_true_value = (incoherent_distances[i] + num_unknowns) / full_size
+                    s = max(abs(est_incoherence[i] - min_true_value), abs(est_incoherence[i] - max_true_value))
+                    u = est_incoherence[i]
+                    known_incoherence = known_incoherence_at_k[i - 1]
+                    err = raised_cosine(known_incoherence, u, s)
+                    probability *= err
+
+                    # print(k, i, min_true_value, max_true_value)
+
+                    # confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative
+                    # err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence
+                    # denom += 1
+                # print(flip, k, err)
+                # err /= denom
+                if flip < 0:
+                    base[k] = probability
+                else:
+                    current[k] = probability
+            
+            if flip >= 0:
+                if np.sum(current) == 0:
+                    continue
+                np.divide(current, np.sum(current), current)
+                # print(current)
+                # temp = np.roll(cumulative_probability, -1)
+                # temp[-1] = 1.0
+                # np.multiply(current, temp, current)
+                # np.divide(current, np.sum(current), current)
+                p_forward = 0
+                p_backward = 0
+                for i in range(1, N):
+                    p_forward += cumulative_probability[i] * current[i - 1]
+                for i in range(0, N - 1):
+                    p_backward += cumulative_probability[i] * current[i + 1]
+
+                # base_index = average_index(cumulative_probability)
+                # new_index = average_index(current)
+                # if isnan(new_index):
+                #     continue
+                # np.divide(current, np.sum(current), current)
+                # np.subtract(1, current, current)
+                print(flip,p_forward,p_backward,current)
+                delta = p_forward - p_backward
+                if use_flip < 0 or delta > lowest_err:
+                    use_flip = flip
+                    lowest_err = delta
+
+                # for k in range(0, N - 1):
+                #     value = current[k] * cumulative_probability[k + 1]
+                #     if use_flip < 0 or value > lowest_err:
+                #         use_flip = flip
+                #         lowest_err = value
+                # print(flip, highest_value)
+            else:
+                np.divide(base, np.sum(base), base)
+                # np.subtract(1, base, base)
+                # print(cumulative_probability)
+                cumulative_probability = np.roll(cumulative_probability, -1)
+                cumulative_probability[-1] = 1.0
+                # print(cumulative_probability)
+                # print(base)
+                np.multiply(base, cumulative_probability, cumulative_probability)
+                np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
+                print(cumulative_probability)
+        
+        if use_flip < 0:
+            return
+
+        print('Flip', use_flip, lowest_err)
+        for p in samples:
+            if p.x[use_flip]:
+                p.y ^= 1
+
+if __name__ == "__main__":
+    main()
--- a/mutations23.py
+++ b/mutations23.py
@ -0,0 +1,761 @@
+from cmath import isnan
+import numpy as np
+import random
+import hashlib
+import math
+
+def get_state_id(state):
+    return ','.join([str(x) for x in sorted(state)])
+
+class Point():
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+    def id(self):
+        return ','.join([str(int(x)) for x in self.x])
+
+class Influence():
+    def __init__(self, a, b):
+        self.a = a
+        self.b = b
+        self.original_dof = set()
+        self.dof = set()
+        for i in range(0, len(a.x)):
+            if a.x[i] != b.x[i]:
+                self.original_dof.add(i)
+                self.dof.add(i)
+
+    def coherent(self):
+        return self.a.y == self.b.y
+
+    def id(self):
+        return ','.join(sorted([self.a.id(), self.b.id()]))
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def decode(x, N):
+    index = 0
+    output = np.zeros((N))
+    while x > 0 and index < N:
+        output[index] = x & 0b1
+        x >>= 1
+        index += 1
+    return output
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def hamming_distance(a, b):
+    return np.sum(np.logical_xor(a.x, b.x))
+
+def random_x(N):
+    x = np.zeros((N))
+    for i in range(0, N):
+        x[i] = random.randint(0, 1)
+    return x
+
+def xor(x):
+    # return sum(x) % 2
+    half = int(len(x) / 2)
+    return sum(x[:half]) % 2
+
+def create_dof_map(influences):
+    dof_map = {}
+    for influence in influences:
+        for i in influence.dof:
+            if not i in dof_map:
+                dof_map[i] = []
+            dof_map[i].append(influence)
+    return dof_map
+
+def flip(influences, i):
+    for influence in influences:
+        if i in influence.dof:
+            influence.a.y = int(influence.a.y) ^ 1
+
+def remove_dof(dof_map, i, flip = False):
+    for influence in dof_map[i]:
+        influence.dof.remove(i)
+        if flip:
+            influence.a.y = int(influence.a.y) ^ 1
+        # if len(influence.dof) == 0 and not influence.coherent():
+        #     raise Exception('Invalid')
+    del dof_map[i]
+
+def solve(dof_map, all_influences, all_samples):
+    eliminated = True
+    while eliminated:
+        eliminated = False
+        for influence in all_influences:
+            if len(influence.dof) == 1:
+                i = next(iter(influence.dof))
+                if influence.coherent:
+                    remove_dof(dof_map, i)
+                    eliminated = True
+                else:
+                    print('Forced', i)
+                    remove_dof(dof_map, i, True)
+                    eliminated = True
+                
+    lowest_dof = None
+    for influence in all_influences:
+        if not influence.coherent and len(influence.dof) > 1:
+            if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
+                lowest_dof = influence
+
+    flip = None
+    highest_score = -1
+
+    for i in lowest_dof.dof:
+        per_point_scores = {}
+        i_influences = dof_map[i]
+        left = 0
+        right = 0
+        for influence in i_influences:
+            if not influence.a in per_point_scores:
+                per_point_scores[influence.a] = [0, 0]
+            if not influence.b in per_point_scores:
+                per_point_scores[influence.b] = [0, 0]
+            if influence.coherent:
+                per_point_scores[influence.a][0] += 1
+                per_point_scores[influence.b][0] += 1
+                left += 1
+            else:
+                per_point_scores[influence.a][1] += 1
+                per_point_scores[influence.b][1] += 1
+                right += 1
+        print(i, left / (left + right))
+        num = 0
+        denom = 0
+        for _, score in per_point_scores.items():
+            if score[0] == score[1]:
+                continue
+            print(i, score)
+            num += score[1] / (score[0] + score[1])
+            denom += 1
+        score = num / denom if denom > 0 else 0
+        print(score)
+    
+    return None
+
+
+# 1st row (n+1 choose k+1) * (1-(k mod 2))
+# psuedopascal to compute the follow-on rows
+# assuming solvability, we want to maximize the probability that our current state and our state with
+# a particular single flip are one order apart in the correct direction
+
+
+
+# 2, 0
+# 2, 2, 0
+# 2, 4, 2, 0
+# 2, 6, 6, 2, 0
+# 2, 8,12, 8, 2, 0
+# 2,10,20,20,10, 2, 0
+
+# 3,-9,19,-33,51,-73,99
+# 3,-6,10,-14,18,-22,26
+# 3,-3, 4, -4, 4, -4, 4
+# 3, 0, 1,  0, 0,  0, 0
+# 3, 3, 1,  1, 0,  0, 0
+# 3, 6, 4,  2, 1,  0, 0
+# 3, 9,10,  6, 3,  1, 0
+
+#       4, 0, 4, 0
+#     4, 4, 4, 4, 0
+#   4, 8, 8, 8, 4, 0
+# 4,12,16,16,12, 4, 0
+
+#       5, 0,10, 0, 1
+#     5, 5,10,10, 1, 1
+#   5,
+# 5,
+
+
+
+# 3
+#
+# @1 [1, 2, 1]
+# @2 [2, 2, 0]
+# @3 [3, 0, 1]
+
+# 5  [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)
+#
+# @1 [1, 4, 6,  4, 1], [4, 6,  4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),
+# @2 [2, 6, 6,  2, 0], [3, 4,  4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)
+# @3 [3, 6, 4,  2, 1], [2, 4,  6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)
+# @4 [4, 4, 4,  4, 0], [1, 6,  6, 1, 1] - 16, 15 - 
+# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 - 
+
+# @0 [0.0, 0.0, 0.0, 0.0, 0.0]
+# @1 [0.2, 0.4, 0.6, 0.8, 1.0]
+# @2 [0.4, 0.6, 0.6, 0.4, 0.0]
+# @3 [0.6, 0.6, 0.4, 0.4, 1.0]
+# @4 [0.8, 0.4, 0.4, 0.8, 0.0]
+# @5 [1.0, 0.0, 1.0, 0.0, 1.0]
+
+# 6
+#
+# @1 [1, 5, 10, 10, 5, 1]
+# @2 [2, 8, 12, 8,  2, 0]
+# @3 [3, 9, 10, 6,  3, 1]
+# @4 [4, 8, 8,  8,  4, 0]
+# @5 [5, 5, 10, 10, 1, 1]
+# @6 [6, 0, 20, 0,  6, 0]
+
+# last row, 1 if odd, 0 if even
+# second to last, subtract 2 on odds, add 2 on evens
+
+def compute_pseudopascal(N):
+    dist = np.zeros((N, N))
+    for j in range(0, N):
+        dist[0][j] = math.comb(N - 1, j)
+        dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
+    for i in range(1, N):
+        for j in range(0, i + 1):
+            dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
+        for k in range(i + 1, N):
+            for j in reversed(range(0, k)):
+                dist[i][j+1] = dist[i][j] + dist[i][j+1]
+    return dist
+
+def compute_distributions(N):
+    dist = compute_pseudopascal(N)
+    print(dist)
+    for i in range(0, N):
+        for j in range(0, N):
+            denom = math.comb(N, j+1)
+            dist[i][j] /= denom
+    return dist
+
+def confusion_probabilities(N, samples):
+    sample_sizes = np.zeros(N)
+    for i in range(0, len(samples)):
+        a = samples[i]
+        for j in range(0, len(samples)):
+            b = samples[j]
+            if i == j:
+                continue
+            distance = hamming_distance(a, b)
+            sample_sizes[distance - 1] += 1
+
+    confusion = np.zeros((N, N))
+    dist = compute_pseudopascal(N)
+    np.multiply(dist, 2 ** N, dist)
+    # These are the probabilities that we might mix up any two orders given a particular sample size
+    for i in range(0, N):
+        for j in range(0, N):
+            probability = 1.0
+            for k in range(0, N):
+                full_size = math.comb(N, k+1) * (2 ** N)
+                sample_size = sample_sizes[k]
+                num_unknowns = full_size - sample_size
+                i_incoherent = dist[i][k]
+                # Worst case, we sample only the coherent points, 
+                i_min = max(i_incoherent - num_unknowns, 0) / full_size
+                i_max = min(sample_size, i_incoherent) / full_size
+                u = i_min + i_max / 2
+                s = (i_max - i_min) / 2
+                probability *= raised_cosine(dist[j][k] / full_size, u, s)
+            confusion[i][j] = probability
+    return confusion
+
+def raised_cosine(x, u, s):
+    if x < (u - s):
+        return 0
+    if x > (u + s):
+        return 0
+    return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s))
+
+# Probability of getting k red balls after drawing n from a bag with m total balls and j red balls in it
+# (n choose k) * p^k * (1-p)^(n-k)
+
+# p/m chance of getting a red ball
+# (1 - p/m) chance of not getting a red ball
+
+# One way (p/m) * ((p-1)/(m-1)) * ((p-2)/(m-2)) 
+# (1 - (p/m))
+
+def p_bernoulli(n, k, m, j):
+    probabilities = np.zeros((n + 1, n + 1))
+    probabilities.fill(-1)
+    # if n == k:
+    #     return 1.0
+    # if k > p:
+    #     return 0.0
+    stack = [(0,0)]
+    while len(stack) > 0:
+        (a, b) = stack.pop()
+        if a + b == n:
+            probabilities[a][b] = 1 if a == k else 0
+        elif a > j:
+            probabilities[a][b] = 0
+        elif b > (m - j):
+            probabilities[a][b] = 0
+        else:
+            p_left = probabilities[a + 1][b]
+            p_right = probabilities[a][b + 1]
+            if p_left >= 0 and p_right >= 0:
+                p = (j - a) / (m - a - b)
+                probabilities[a][b] = p_left * p + p_right * (1 - p)
+            else:
+                stack.append((a, b))
+                if p_left < 0:
+                    stack.append((a + 1, b))
+                if p_right < 0:
+                    stack.append((a, b + 1))
+    return probabilities[0][0]
+    
+    # P = 1.0
+    # p_k = 0
+    # p_nk = 0
+    # for i in range(1, k + 1):
+    #     P *= (n + 1 - i) / i
+    #     while P > 1.0 and p_k < k:
+    #         P *= p
+    #         p_k += 1
+    #     while P > 1.0 and p_nk < (n - k):
+    #         P *= (1 - p)
+    #         p_nk += 1
+    # while p_k < k:
+    #     P *= p
+    #     p_k += 1
+    # while (p_nk < (n - k)):
+    #     P *= (1 - p)
+    #     p_nk += 1
+    # return P
+
+def average_index(x):
+    total = 0
+    for k in range(0, len(x)):
+        total += k * x[k]
+    return total / np.sum(x)
+
+def compute_cumulative_probability(N, bases, p_n):
+    # p_n = np.zeros(N)
+    # p_n.fill(0.5)
+    states = [[]]
+    flips = set()
+    for i in range(1, len(bases)):
+        # (base, _) = bases[i]
+        (_, flip) = bases[i]
+        # p_forward = 0
+        # p_backward = 0
+        # for k in range(0, N - 1):
+        #     p_forward += base[k + 1] * next_p[k]
+        #     p_backward += base[k] * next_p[k + 1]
+        if flip in flips:
+            # p_n[flip] -= p_forward
+            # p_n[flip] += p_backward
+            flips.remove(flip)
+        else:
+            # p_n[flip] += p_forward
+            # p_n[flip] -= p_backward
+            flips.add(flip)
+        states.append(flips.copy())
+    # np.clip(p_n, 0, 1, p_n)
+    # print('Contribution probabilities', p_n)
+
+    min_p_n = np.min(p_n)
+    max_p_n = np.max(p_n)
+
+
+    p_k = np.zeros(N)
+    for k in range(0, N):
+        stack = [(k, len(bases) - 1)]
+        probabilities = np.zeros((N, len(bases)))
+        probabilities.fill(-1)
+        while len(stack) > 0:
+            (i, base_index) = stack.pop()
+            (base, flip) = bases[base_index]
+            if base_index == 0:
+                probabilities[i, 0] = base[i]
+            else:
+                left = i - 1
+                right = i + 1
+                state = states[base_index - 1]
+                p_flip = max(min(p_n[flip] + 0.5, 1.0), 0)
+                if flip in state:
+                    p_flip = 1 - p_flip
+                p_left = probabilities[left, base_index - 1] if left >= 0 else 0
+                p_right = probabilities[right, base_index - 1] if right < N else 0
+                if p_left >= 0 and p_right >= 0:
+                    probabilities[i, base_index] = base[i] * p_left * (1 - p_flip) + base[i] * p_right * p_flip
+                else:
+                    stack.append((i, base_index))
+                    if p_left < 0:
+                        stack.append((left, base_index - 1))
+                    if p_right < 0:
+                        stack.append((right, base_index - 1))
+        p_k[k] = probabilities[k][-1]
+    np.divide(p_k, np.sum(p_k), p_k)
+    return p_k
+
+# 8, 32,    2^5
+# 10, 64,   2^6
+# 12, 128,  2^7
+# 14, 256,  2^8
+# 16, 512,  2^9
+# 18, 1024, 2^10
+# 20, 2048, 2^11
+# 22, 4096, 2^12
+def main():
+    N = 8
+    sample_size = 16
+    sample_ids = set()
+    samples = []
+
+    dist = compute_pseudopascal(N)
+    print(dist)
+
+    for i in range(0, sample_size):
+        x = random_x(N)
+        y = int(xor(x))
+        p = Point(x, y)
+        p_id = p.id()
+        if p_id in sample_ids:
+            continue
+        sample_ids.add(p_id)
+        samples.append(p)
+    # confusion = confusion_probabilities(N, samples)
+    # print(confusion)
+    # return
+
+    # for i in range(0, 2**N):
+    #     x = decode(i, N)
+    #     y = int(xor(x))
+    #     samples.append(Point(x,y))
+
+    base = np.zeros(N)
+    current = np.zeros(N)
+    cumulative_probability = np.ones(N)
+    flip_likelihood = np.zeros(N)
+    cumulative_deltas = np.zeros(N)
+    direction = -1
+    flips = set()
+    bases = []
+    last_flip = -1
+
+    for _ in range(0, 2 ** N):
+        lowest_err = -1
+        use_flip = -1
+        for flip in range(-1, N):
+            coherent_distances = np.zeros((len(samples), N+1))
+            incoherent_distances = np.zeros((len(samples), N+1))
+            all_coherent = True
+            for i in range(0, len(samples)):
+                a = samples[i]
+                for j in range(0, len(samples)):
+                    b = samples[j]
+                    distance = hamming_distance(a, b)
+                    is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)
+                    if is_coherent:
+                        coherent_distances[i][distance] += 1
+                    else:
+                        incoherent_distances[i][distance] += 1
+                        all_coherent = False
+            if all_coherent:
+                print('Flip and halt', flip)
+                return
+            # print(coherent_distances, incoherent_distances)
+
+            # print(coherent_distances, incoherent_distances)
+            # est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances))
+            # print(est_incoherence)
+
+            probability = np.ones(N)
+            np.divide(probability, np.sum(probability), probability)
+            components = []
+            for i in range(0, len(samples)):
+                for j in range(1, N + 1):
+                    p_k = np.zeros(N)
+                    # confusion = np.zeros((N, N))
+                    n = coherent_distances[i][j] + incoherent_distances[i][j]
+                    if n == 0:
+                        continue
+                    a = incoherent_distances[i][j]
+                    t = math.comb(N, j)
+                    # for k in range(0, N):
+                    #     p = dist[k][j - 1]
+                    #     a_ideal = round(p * n / t)
+                    #     # base_prob = p_bernoulli(int(n), a_ideal, t, int(p))
+                    #     for q in range(0, N):
+                    #         u = dist[q][j - 1]
+                    #         p_ratio = p / t
+                    #         u_ratio = u / t
+                    #         confusion[k][q] = p_bernoulli(int(n), a_ideal, t, int(u))
+                    # np.divide(confusion, np.max(confusion, axis=0), confusion)
+
+                    for k in range(0, N):
+                        p = dist[k][j - 1]
+                        a_ideal = round(p * n / t)
+                        # How likely are we to correctly identify an ideal sample?
+                        # for q in range(0, N):
+                        p_ideal = p_bernoulli(int(n), a_ideal, t, int(p))
+                        # P = math.comb(int(n), int(a)) * math.pow(p, int(a)) * math.pow(1 - p, int(n - a))
+                        p_k[k] = p_bernoulli(int(n), int(a), t, int(p))# * (n / t)
+                        # p_bernoulli(int(n), int(a), math.comb(N, j), int(p))
+                        # probability *= P
+                    components.append(p_k)
+                    np.divide(p_k, np.sum(p_k), p_k)
+                    np.multiply(probability, p_k, probability)
+                    np.divide(probability, np.sum(probability), probability)
+
+                    # p_cross_k is the probability that we correctly identified at k 
+                    # plus the probabilities that we missidentify at q and it is actually k
+
+                    # probability of drawing from sample k = p_bernoulli
+
+                    # p_cross_k = np.zeros(N)
+                    # for k in range(0, N):
+                    #     for q in range(0, N):
+                    #         p_cross_k[k] += p_k[q] * confusion[k][q]
+                            # if k == q:
+                            #     continue
+                            # p_cross_k[k] += (1 - p_k[k]) * p_k[q] * confusion[k][q]
+                            # p_cross_k[k] -= (1 - p_k[q]) * p_k[k] * confusion[q][k]
+
+                            # if q == k:
+                            #     continue
+                            # p_cross_k[k] += (1 - p_k[k]) * p_k[q] * confusion[k][q]
+                            # p_cross_k[k] -= (1 - p_k[k])
+                            # p_cross_k[k] -= p_k[k] * (1 - confusion[k][k]) * confusion[q][k]
+
+
+                    # for k in range(0, N):
+                    #     P = p_k[k]
+                    #     for m in range(0, N):
+                    #         if m == k:
+                    #             continue
+                    #         if p_k[m] == 0:
+                    #             continue
+                    #         P /= p_k[m]
+                    #     p_cross_k[k] = P
+                    # min_value = np.min(p_cross_k)
+                    # np.subtract(p_cross_k, min_value, p_cross_k)
+                    # np.add(probability, p_cross_k, probability)
+                    # total = np.sum(p_k)
+                    # if total > 0:
+                    #     np.divide(p_k, total, p_k)
+                    #     np.multiply(p_k, probability, probability)
+                    #     np.divide(probability, np.sum(probability), probability)
+                        # print(probability)
+
+
+            np.divide(probability, np.sum(probability), probability)
+            if flip < 0:
+                np.copyto(base, probability)
+            else:
+                np.copyto(current, probability)
+
+
+                    # print(k, i, min_true_value, max_true_value)
+
+                    # confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative
+                    # err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence
+                    # denom += 1
+                # print(flip, k, err)
+                # err /= denom
+                # if flip < 0:
+                #     base[k] = probability
+                # else:
+                #     current[k] = probability
+            
+            if flip >= 0:
+                if np.sum(current) == 0:
+                    continue
+                np.divide(current, np.sum(current), current)
+
+                base_mean_index = average_index(base)
+                base_variance = 0
+                for i in range(0, N):
+                    base_variance += base[i] * (base_mean_index - i) ** 2
+                base_err = 0
+                norm = np.zeros(N)
+                for i in range(0, N):
+                    norm[i] = 1 / (base_variance * math.sqrt(2 * math.pi)) * math.exp(-1 / 2 * ((i - base_mean_index) / base_variance) ** 2)
+                np.divide(norm, np.sum(norm), norm)
+                for i in range(0, N):
+                    base_err += (base[i] - norm[i]) ** 2
+                
+                current_mean_index = average_index(current)
+                current_variance = 0
+                for i in range(0, N):
+                    current_variance += current[i] * (current_mean_index - i) ** 2
+                current_err = 0
+                for i in range(0, N):
+                    norm[i] = 1 / (current_variance * math.sqrt(2 * math.pi)) * math.exp(-1 / 2 * ((i - current_mean_index) / current_variance) ** 2)
+                np.divide(norm, np.sum(norm), norm)
+                for i in range(0, N):                   
+                    current_err += (current[i] - norm[i]) ** 2
+
+                delta = abs(1 - (base_mean_index - current_mean_index))
+                print(flip, current)
+                print('Mean', current_mean_index, base_mean_index)
+                print('Variance', current_variance, base_variance)
+                print('Err', current_err, base_err)
+                score = current_variance
+
+                # base_score = 0
+                # for i in range(0, N):
+                #     base_score += (base[round(base_mean_index)] - base[i]) ** 2
+
+                # score = 0
+                # for i in range(0, N):
+                #     score += (current[round(current_mean_index)] - current[i]) ** 2
+                # print('Score', score, base_score)
+
+                # print(current)
+                # temp = np.roll(cumulative_probability, -1)
+                # temp[-1] = 1.0
+                # np.multiply(current, temp, current)
+                # np.divide(current, np.sum(current), current)
+                # p_forward = 0
+                # p_backward = 0
+                # for i in range(1, N):
+                #     p_forward += base[i] * current[i - 1]
+                # for i in range(0, N - 1):
+                #     p_backward += base[i] * current[i + 1]
+                # scale = 0.01
+                # if flip in flips:
+                #     flip_likelihood[flip] += scale * p_backward
+                #     flip_likelihood[flip] -= scale * p_forward
+                # else:
+                #     flip_likelihood[flip] -= scale * p_backward
+                #     flip_likelihood[flip] += scale * p_forward                    
+                # delta = p_forward - p_backward
+                # print(flip, current, p_forward, p_backward)
+                # base_index = average_index(base)
+                # current_index = average_index(current)
+                # err = abs(1 - (base_index - current_index))
+                # print(base_index, current_index, err)
+
+                # base_index = average_index(cumulative_probability)
+                # new_index = average_index(current)
+                # if isnan(new_index):
+                #     continue
+                # np.divide(current, np.sum(current), current)
+                # np.subtract(1, current, current)
+                # print(flip,p_forward,p_backward,current)
+                if use_flip < 0 or delta < lowest_err:
+                    use_flip = flip
+                    lowest_err = score
+
+                # cumulative_deltas[flip] += 0
+
+                # for k in range(0, N - 1):
+                #     value = current[k] * cumulative_probability[k + 1]
+                #     if use_flip < 0 or value > lowest_err:
+                #         use_flip = flip
+                #         lowest_err = value
+                # print(flip, highest_value)
+            else:
+                # p_next = np.zeros(N)
+                # for i in range(0, N):
+                #     P = 0.0
+                #     for j in range(0, N):
+                #         if i == j:
+                #             continue
+                #         P += base[i] * (1 - base[j])
+                #     p_next[i] = P
+                # base = p_next
+
+                # base[0] = 0
+                np.divide(base, np.sum(base), base)
+                bases.append((base.copy(), last_flip))
+                # bases.insert(0, base.copy())
+                # cumulative_probability = compute_cumulative_probability(N, bases)
+                # p_forward = 0
+                # p_backward = 0
+                # for i in range(1, N):
+                #     p_forward += cumulative_probability[i] * base[i - 1]
+                # for i in range(0, N - 1):
+                #     p_backward += cumulative_probability[i] * base[i + 1]
+                print('Base', base)
+                # # # np.subtract(1, base, base)
+                # # # print(cumulative_probability)
+                # shift_left = np.roll(cumulative_probability, -1)
+                # shift_left[-1] = 0.0
+                # # # # print('Shift Left', p_forward, shift_left)
+                # shift_right = np.roll(cumulative_probability, 1)
+                # shift_right[0] = 0.0
+                # # # # print('Shift Right', p_backward, shift_right)
+                # p_next = np.add(np.multiply(shift_left, 0.5), np.multiply(shift_right, 0.5))
+                # p_next[0] = 0
+                # np.divide(p_next, np.sum(p_next), p_next)
+                # # # # print('Next', p_next)
+                # # # # # print(cumulative_probability)
+                # # # # # print(base)
+                # np.multiply(base, p_next, cumulative_probability)
+                # cumulative_probability[0] = 0
+                # # # # # np.multiply(cumulative_probability, shift_right, cumulative_probability)
+                # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
+                cumulative_probability = compute_cumulative_probability(N, bases, flip_likelihood)
+                print('Cumulative', cumulative_probability)
+                print('Likelihood', flip_likelihood)
+
+        # cumulative_probability[0] = 0
+        # use_flip = -1
+        # if direction < 0:
+        #     use_flip = np.argmax(cumulative_deltas)
+        #     if cumulative_deltas[use_flip] < 0:
+        #         use_flip = np.argmin(cumulative_deltas)
+        #         direction = 1
+        #         # cumulative_deltas.fill(0)
+        # else:
+        #     use_flip = np.argmin(cumulative_deltas)
+        #     if cumulative_deltas[use_flip] > 0:
+        #         use_flip = np.argmax(cumulative_deltas)
+        #         direction = -1
+        #         # cumulative_deltas.fill(0)
+        # if direction < 0:
+        #     cumulative_probability[0] = 0
+        # else:
+        #     cumulative_probability[-1] = 0
+        # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
+        # print(cumulative_deltas)
+
+        # use_flip = -1
+        # highest_p = 0
+        # for i in range(0, N):
+        #     p = flip_likelihood[i]
+        #     if i in flips:
+        #         p = -p
+        #     if use_flip < 0 or p > highest_p:
+        #         use_flip = i
+        #         highest_p = p
+        # if not use_flip in flips and highest_p < 0 or use_flip in flips and highest_p > 0:
+        #     flip_likelihood[use_flip] *= -1.0
+
+        if use_flip < 0:
+            return
+        last_flip = use_flip
+        if use_flip in flips:
+            flips.remove(use_flip)
+        else:
+            flips.add(use_flip)
+        print('Flip', use_flip, lowest_err)
+        print(flips)
+        cumulative_deltas[use_flip] = -cumulative_deltas[use_flip]
+        for p in samples:
+            if p.x[use_flip]:
+                p.y ^= 1
+
+if __name__ == "__main__":
+    main()
--- a/mutations24.py
+++ b/mutations24.py
@ -0,0 +1,656 @@
+from cmath import isnan
+import numpy as np
+import random
+import hashlib
+import math
+
+def get_state_id(state):
+    return ','.join([str(x) for x in sorted(state)])
+
+class Point():
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+    def id(self):
+        return ','.join([str(int(x)) for x in self.x])
+
+class Influence():
+    def __init__(self, a, b):
+        self.a = a
+        self.b = b
+        self.original_dof = set()
+        self.dof = set()
+        for i in range(0, len(a.x)):
+            if a.x[i] != b.x[i]:
+                self.original_dof.add(i)
+                self.dof.add(i)
+
+    def coherent(self):
+        return self.a.y == self.b.y
+
+    def id(self):
+        return ','.join(sorted([self.a.id(), self.b.id()]))
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def decode(x, N):
+    index = 0
+    output = np.zeros((N))
+    while x > 0 and index < N:
+        output[index] = x & 0b1
+        x >>= 1
+        index += 1
+    return output
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def hamming_distance(a, b):
+    return np.sum(np.logical_xor(a.x, b.x))
+
+def random_x(N):
+    x = np.zeros((N))
+    for i in range(0, N):
+        x[i] = random.randint(0, 1)
+    return x
+
+def xor(x):
+    # return sum(x) % 2
+    half = int(len(x) / 2)
+    return sum(x[:half]) % 2
+
+def create_dof_map(influences):
+    dof_map = {}
+    for influence in influences:
+        for i in influence.dof:
+            if not i in dof_map:
+                dof_map[i] = []
+            dof_map[i].append(influence)
+    return dof_map
+
+def flip(influences, i):
+    for influence in influences:
+        if i in influence.dof:
+            influence.a.y = int(influence.a.y) ^ 1
+
+def remove_dof(dof_map, i, flip = False):
+    for influence in dof_map[i]:
+        influence.dof.remove(i)
+        if flip:
+            influence.a.y = int(influence.a.y) ^ 1
+        # if len(influence.dof) == 0 and not influence.coherent():
+        #     raise Exception('Invalid')
+    del dof_map[i]
+
+def solve(dof_map, all_influences, all_samples):
+    eliminated = True
+    while eliminated:
+        eliminated = False
+        for influence in all_influences:
+            if len(influence.dof) == 1:
+                i = next(iter(influence.dof))
+                if influence.coherent:
+                    remove_dof(dof_map, i)
+                    eliminated = True
+                else:
+                    print('Forced', i)
+                    remove_dof(dof_map, i, True)
+                    eliminated = True
+                
+    lowest_dof = None
+    for influence in all_influences:
+        if not influence.coherent and len(influence.dof) > 1:
+            if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
+                lowest_dof = influence
+
+    flip = None
+    highest_score = -1
+
+    for i in lowest_dof.dof:
+        per_point_scores = {}
+        i_influences = dof_map[i]
+        left = 0
+        right = 0
+        for influence in i_influences:
+            if not influence.a in per_point_scores:
+                per_point_scores[influence.a] = [0, 0]
+            if not influence.b in per_point_scores:
+                per_point_scores[influence.b] = [0, 0]
+            if influence.coherent:
+                per_point_scores[influence.a][0] += 1
+                per_point_scores[influence.b][0] += 1
+                left += 1
+            else:
+                per_point_scores[influence.a][1] += 1
+                per_point_scores[influence.b][1] += 1
+                right += 1
+        print(i, left / (left + right))
+        num = 0
+        denom = 0
+        for _, score in per_point_scores.items():
+            if score[0] == score[1]:
+                continue
+            print(i, score)
+            num += score[1] / (score[0] + score[1])
+            denom += 1
+        score = num / denom if denom > 0 else 0
+        print(score)
+    
+    return None
+
+
+# 1st row (n+1 choose k+1) * (1-(k mod 2))
+# psuedopascal to compute the follow-on rows
+# assuming solvability, we want to maximize the probability that our current state and our state with
+# a particular single flip are one order apart in the correct direction
+
+
+
+# 2, 0
+# 2, 2, 0
+# 2, 4, 2, 0
+# 2, 6, 6, 2, 0
+# 2, 8,12, 8, 2, 0
+# 2,10,20,20,10, 2, 0
+
+# 3,-9,19,-33,51,-73,99
+# 3,-6,10,-14,18,-22,26
+# 3,-3, 4, -4, 4, -4, 4
+# 3, 0, 1,  0, 0,  0, 0
+# 3, 3, 1,  1, 0,  0, 0
+# 3, 6, 4,  2, 1,  0, 0
+# 3, 9,10,  6, 3,  1, 0
+
+#       4, 0, 4, 0
+#     4, 4, 4, 4, 0
+#   4, 8, 8, 8, 4, 0
+# 4,12,16,16,12, 4, 0
+
+#       5, 0,10, 0, 1
+#     5, 5,10,10, 1, 1
+#   5,
+# 5,
+
+
+
+# 3
+#
+# @1 [1, 2, 1]
+# @2 [2, 2, 0]
+# @3 [3, 0, 1]
+
+# 5  [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)
+#
+# @1 [1, 4, 6,  4, 1], [4, 6,  4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),
+# @2 [2, 6, 6,  2, 0], [3, 4,  4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)
+# @3 [3, 6, 4,  2, 1], [2, 4,  6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)
+# @4 [4, 4, 4,  4, 0], [1, 6,  6, 1, 1] - 16, 15 - 
+# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 - 
+
+# @0 [0.0, 0.0, 0.0, 0.0, 0.0]
+# @1 [0.2, 0.4, 0.6, 0.8, 1.0]
+# @2 [0.4, 0.6, 0.6, 0.4, 0.0]
+# @3 [0.6, 0.6, 0.4, 0.4, 1.0]
+# @4 [0.8, 0.4, 0.4, 0.8, 0.0]
+# @5 [1.0, 0.0, 1.0, 0.0, 1.0]
+
+# 6
+#
+# @1 [1, 5, 10, 10, 5, 1]
+# @2 [2, 8, 12, 8,  2, 0]
+# @3 [3, 9, 10, 6,  3, 1]
+# @4 [4, 8, 8,  8,  4, 0]
+# @5 [5, 5, 10, 10, 1, 1]
+# @6 [6, 0, 20, 0,  6, 0]
+
+# last row, 1 if odd, 0 if even
+# second to last, subtract 2 on odds, add 2 on evens
+
+def compute_pseudopascal(N):
+    dist = np.zeros((N, N))
+    for j in range(0, N):
+        dist[0][j] = math.comb(N - 1, j)
+        dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
+    for i in range(1, N):
+        for j in range(0, i + 1):
+            dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
+        for k in range(i + 1, N):
+            for j in reversed(range(0, k)):
+                dist[i][j+1] = dist[i][j] + dist[i][j+1]
+    return dist
+
+def compute_distributions(N):
+    dist = compute_pseudopascal(N)
+    print(dist)
+    for i in range(0, N):
+        for j in range(0, N):
+            denom = math.comb(N, j+1)
+            dist[i][j] /= denom
+    return dist
+
+def confusion_probabilities(N, samples):
+    sample_sizes = np.zeros(N)
+    for i in range(0, len(samples)):
+        a = samples[i]
+        for j in range(0, len(samples)):
+            b = samples[j]
+            if i == j:
+                continue
+            distance = hamming_distance(a, b)
+            sample_sizes[distance - 1] += 1
+
+    confusion = np.zeros((N, N))
+    dist = compute_pseudopascal(N)
+    np.multiply(dist, 2 ** N, dist)
+    # These are the probabilities that we might mix up any two orders given a particular sample size
+    for i in range(0, N):
+        for j in range(0, N):
+            probability = 1.0
+            for k in range(0, N):
+                full_size = math.comb(N, k+1) * (2 ** N)
+                sample_size = sample_sizes[k]
+                num_unknowns = full_size - sample_size
+                i_incoherent = dist[i][k]
+                # Worst case, we sample only the coherent points, 
+                i_min = max(i_incoherent - num_unknowns, 0) / full_size
+                i_max = min(sample_size, i_incoherent) / full_size
+                u = i_min + i_max / 2
+                s = (i_max - i_min) / 2
+                probability *= raised_cosine(dist[j][k] / full_size, u, s)
+            confusion[i][j] = probability
+    return confusion
+
+def raised_cosine(x, u, s):
+    if x < (u - s):
+        return 0
+    if x > (u + s):
+        return 0
+    return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s))
+
+# Probability of getting k red balls after drawing n from a bag with m total balls and j red balls in it
+# (n choose k) * p^k * (1-p)^(n-k)
+
+# p/m chance of getting a red ball
+# (1 - p/m) chance of not getting a red ball
+
+# One way (p/m) * ((p-1)/(m-1)) * ((p-2)/(m-2)) 
+# (1 - (p/m))
+
+def p_bernoulli(n, k, m, j):
+    # probabilities = np.zeros((n + 1, n + 1))
+    # probabilities.fill(-1)
+    # # if n == k:
+    # #     return 1.0
+    # # if k > p:
+    # #     return 0.0
+    # stack = [(0,0)]
+    # while len(stack) > 0:
+    #     (a, b) = stack.pop()
+    #     if a + b == n:
+    #         probabilities[a][b] = 1 if a == k else 0
+    #     elif a > j:
+    #         probabilities[a][b] = 0
+    #     elif b > (m - j):
+    #         probabilities[a][b] = 0
+    #     else:
+    #         p_left = probabilities[a + 1][b]
+    #         p_right = probabilities[a][b + 1]
+    #         if p_left >= 0 and p_right >= 0:
+    #             p = (j - a) / (m - a - b)
+    #             probabilities[a][b] = p_left * p + p_right * (1 - p)
+    #         else:
+    #             stack.append((a, b))
+    #             if p_left < 0:
+    #                 stack.append((a + 1, b))
+    #             if p_right < 0:
+    #                 stack.append((a, b + 1))
+    # return probabilities[0][0]
+    
+    p = j / m
+    P = 1.0
+    p_k = 0
+    p_nk = 0
+    for i in range(1, k + 1):
+        P *= (n + 1 - i) / i
+        while P > 1.0 and p_k < k:
+            P *= p
+            p_k += 1
+        while P > 1.0 and p_nk < (n - k):
+            P *= (1 - p)
+            p_nk += 1
+    while p_k < k:
+        P *= p
+        p_k += 1
+    while (p_nk < (n - k)):
+        P *= (1 - p)
+        p_nk += 1
+    return P
+
+def average_index(x):
+    total = 0
+    for k in range(0, len(x)):
+        total += k * x[k]
+    return total / np.sum(x)
+
+def compute_cumulative_probability(N, bases, p_n):
+    # p_n = np.zeros(N)
+    # p_n.fill(0.5)
+    states = [[]]
+    flips = set()
+    for i in range(1, len(bases)):
+        # (base, _) = bases[i]
+        (_, flip) = bases[i]
+        # p_forward = 0
+        # p_backward = 0
+        # for k in range(0, N - 1):
+        #     p_forward += base[k + 1] * next_p[k]
+        #     p_backward += base[k] * next_p[k + 1]
+        if flip in flips:
+            # p_n[flip] -= p_forward
+            # p_n[flip] += p_backward
+            flips.remove(flip)
+        else:
+            # p_n[flip] += p_forward
+            # p_n[flip] -= p_backward
+            flips.add(flip)
+        states.append(flips.copy())
+    # np.clip(p_n, 0, 1, p_n)
+    # print('Contribution probabilities', p_n)
+
+    min_p_n = np.min(p_n)
+    max_p_n = np.max(p_n)
+
+
+    p_k = np.zeros(N)
+    for k in range(0, N):
+        stack = [(k, len(bases) - 1)]
+        probabilities = np.zeros((N, len(bases)))
+        probabilities.fill(-1)
+        while len(stack) > 0:
+            (i, base_index) = stack.pop()
+            (base, flip) = bases[base_index]
+            if base_index == 0:
+                probabilities[i, 0] = base[i]
+            else:
+                left = i - 1
+                right = i + 1
+                state = states[base_index - 1]
+                p_flip = max(min(p_n[flip] + 0.5, 1.0), 0)
+                if flip in state:
+                    p_flip = 1 - p_flip
+                p_left = probabilities[left, base_index - 1] if left >= 0 else 0
+                p_right = probabilities[right, base_index - 1] if right < N else 0
+                if p_left >= 0 and p_right >= 0:
+                    probabilities[i, base_index] = base[i] * p_left * (1 - p_flip) + base[i] * p_right * p_flip
+                else:
+                    stack.append((i, base_index))
+                    if p_left < 0:
+                        stack.append((left, base_index - 1))
+                    if p_right < 0:
+                        stack.append((right, base_index - 1))
+        p_k[k] = probabilities[k][-1]
+    np.divide(p_k, np.sum(p_k), p_k)
+    return p_k
+
+# 8, 32,    2^5
+# 10, 64,   2^6
+# 12, 128,  2^7
+# 14, 256,  2^8
+# 16, 512,  2^9
+# 18, 1024, 2^10
+# 20, 2048, 2^11
+# 22, 4096, 2^12
+def main():
+    N = 16
+    sample_size = 128
+    sample_ids = set()
+    samples = []
+
+    dist = compute_pseudopascal(N)
+    print(dist)
+
+    for i in range(0, sample_size):
+        x = random_x(N)
+        y = int(xor(x))
+        p = Point(x, y)
+        p_id = p.id()
+        if p_id in sample_ids:
+            continue
+        sample_ids.add(p_id)
+        samples.append(p)
+    # confusion = confusion_probabilities(N, samples)
+    # print(confusion)
+    # return
+
+    # for i in range(0, 2**N):
+    #     x = decode(i, N)
+    #     y = int(xor(x))
+    #     samples.append(Point(x,y))
+
+    base = np.zeros(N)
+    current = np.zeros(N)
+    cumulative_probability = np.ones(N)
+    flip_likelihood = np.zeros(N)
+    cumulative_deltas = np.zeros(N)
+    direction = -1
+    flips = set()
+    bases = []
+    last_flip = -1
+
+    for _ in range(0, 2 ** N):
+        lowest_err = -1
+        use_flip = -1
+        for flip in range(-1, N):
+            coherent_distances = np.zeros(N+1)
+            incoherent_distances = np.zeros(N+1)
+            all_coherent = True
+            for i in range(0, len(samples)):
+                a = samples[i]
+                for j in range(0, len(samples)):
+                    b = samples[j]
+                    distance = hamming_distance(a, b)
+                    is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)
+                    if is_coherent:
+                        coherent_distances[distance] += 1
+                    else:
+                        incoherent_distances[distance] += 1
+                        all_coherent = False
+            if all_coherent:
+                print('Flip and halt', flip)
+                return
+            # print(coherent_distances, incoherent_distances)
+
+            # print(coherent_distances, incoherent_distances)
+            # est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances))
+            # print(est_incoherence)
+
+            probability = np.ones(N)
+            # np.divide(probability, np.sum(probability), probability)
+            for j in range(1, N + 1):
+                n = coherent_distances[j] + incoherent_distances[j]
+                if n == 0:
+                    continue
+                for k in range(0, N):
+                    a = incoherent_distances[j]
+                    t = math.comb(N, j) * (2 ** N)
+                    p = dist[k][j - 1] * (2 ** N)
+                    prob = p_bernoulli(int(n), int(a), t, p)
+                    probability[k] *= prob
+                np.divide(probability, np.sum(probability), probability)
+
+            if flip < 0:
+                np.copyto(base, probability)
+            else:
+                np.copyto(current, probability)
+
+
+                    # print(k, i, min_true_value, max_true_value)
+
+                    # confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative
+                    # err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence
+                    # denom += 1
+                # print(flip, k, err)
+                # err /= denom
+                # if flip < 0:
+                #     base[k] = probability
+                # else:
+                #     current[k] = probability
+            
+            if flip >= 0:
+                if np.sum(current) == 0:
+                    continue
+                np.divide(current, np.sum(current), current)
+                # print(current)
+                # temp = np.roll(cumulative_probability, -1)
+                # temp[-1] = 1.0
+                # np.multiply(current, temp, current)
+                # np.divide(current, np.sum(current), current)
+                p_forward = 0
+                p_backward = 0
+                for i in range(1, N):
+                    p_forward += base[i] * current[i - 1]
+                for i in range(0, N - 1):
+                    p_backward += base[i] * current[i + 1]
+                scale = 0.01
+                if flip in flips:
+                    flip_likelihood[flip] += scale * p_backward
+                    flip_likelihood[flip] -= scale * p_forward
+                else:
+                    flip_likelihood[flip] -= scale * p_backward
+                    flip_likelihood[flip] += scale * p_forward                    
+                delta = p_forward - p_backward
+                print(flip, current, p_forward, p_backward)
+                base_index = average_index(base)
+                current_index = average_index(current)
+                err = abs(1 - (base_index - current_index))
+                print(base_index, current_index, err)
+
+                # base_index = average_index(cumulative_probability)
+                # new_index = average_index(current)
+                # if isnan(new_index):
+                #     continue
+                # np.divide(current, np.sum(current), current)
+                # np.subtract(1, current, current)
+                # print(flip,p_forward,p_backward,current)
+                if delta > 0 and (use_flip < 0 or delta > lowest_err):
+                    use_flip = flip
+                    lowest_err = delta
+
+                # cumulative_deltas[flip] += 0
+
+                # for k in range(0, N - 1):
+                #     value = current[k] * cumulative_probability[k + 1]
+                #     if use_flip < 0 or value > lowest_err:
+                #         use_flip = flip
+                #         lowest_err = value
+                # print(flip, highest_value)
+            else:
+                # p_next = np.zeros(N)
+                # for i in range(0, N):
+                #     P = 0.0
+                #     for j in range(0, N):
+                #         if i == j:
+                #             continue
+                #         P += base[i] * (1 - base[j])
+                #     p_next[i] = P
+                # base = p_next
+
+                # base[0] = 0
+                np.divide(base, np.sum(base), base)
+                bases.append((base.copy(), last_flip))
+                # bases.insert(0, base.copy())
+                # cumulative_probability = compute_cumulative_probability(N, bases)
+                # p_forward = 0
+                # p_backward = 0
+                # for i in range(1, N):
+                #     p_forward += cumulative_probability[i] * base[i - 1]
+                # for i in range(0, N - 1):
+                #     p_backward += cumulative_probability[i] * base[i + 1]
+                print('Base', base)
+                # # # np.subtract(1, base, base)
+                # # # print(cumulative_probability)
+                # shift_left = np.roll(cumulative_probability, -1)
+                # shift_left[-1] = 0.0
+                # # # # print('Shift Left', p_forward, shift_left)
+                # shift_right = np.roll(cumulative_probability, 1)
+                # shift_right[0] = 0.0
+                # # # # print('Shift Right', p_backward, shift_right)
+                # p_next = np.add(np.multiply(shift_left, 0.5), np.multiply(shift_right, 0.5))
+                # p_next[0] = 0
+                # np.divide(p_next, np.sum(p_next), p_next)
+                # # # # print('Next', p_next)
+                # # # # # print(cumulative_probability)
+                # # # # # print(base)
+                # np.multiply(base, p_next, cumulative_probability)
+                # cumulative_probability[0] = 0
+                # # # # # np.multiply(cumulative_probability, shift_right, cumulative_probability)
+                # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
+                cumulative_probability = compute_cumulative_probability(N, bases, flip_likelihood)
+                print('Cumulative', cumulative_probability)
+                print('Likelihood', flip_likelihood)
+
+        # cumulative_probability[0] = 0
+        # use_flip = -1
+        # if direction < 0:
+        #     use_flip = np.argmax(cumulative_deltas)
+        #     if cumulative_deltas[use_flip] < 0:
+        #         use_flip = np.argmin(cumulative_deltas)
+        #         direction = 1
+        #         # cumulative_deltas.fill(0)
+        # else:
+        #     use_flip = np.argmin(cumulative_deltas)
+        #     if cumulative_deltas[use_flip] > 0:
+        #         use_flip = np.argmax(cumulative_deltas)
+        #         direction = -1
+        #         # cumulative_deltas.fill(0)
+        # if direction < 0:
+        #     cumulative_probability[0] = 0
+        # else:
+        #     cumulative_probability[-1] = 0
+        # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
+        # print(cumulative_deltas)
+
+        # use_flip = -1
+        # highest_p = 0
+        # for i in range(0, N):
+        #     p = flip_likelihood[i]
+        #     if i in flips:
+        #         p = -p
+        #     if use_flip < 0 or p > highest_p:
+        #         use_flip = i
+        #         highest_p = p
+        # if not use_flip in flips and highest_p < 0 or use_flip in flips and highest_p > 0:
+        #     flip_likelihood[use_flip] *= -1.0
+
+        if use_flip < 0:
+            return
+        last_flip = use_flip
+        if use_flip in flips:
+            flips.remove(use_flip)
+        else:
+            flips.add(use_flip)
+        print('Flip', use_flip, lowest_err)
+        print(flips)
+        cumulative_deltas[use_flip] = -cumulative_deltas[use_flip]
+        for p in samples:
+            if p.x[use_flip]:
+                p.y ^= 1
+
+if __name__ == "__main__":
+    main()
--- a/mutations25.py
+++ b/mutations25.py
@ -0,0 +1,791 @@
+from cmath import isnan
+import numpy as np
+import random
+import hashlib
+import math
+
+def get_state_id(state):
+    return ','.join([str(x) for x in sorted(state)])
+
+class Point():
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+    def id(self):
+        return ','.join([str(int(x)) for x in self.x])
+
+class Influence():
+    def __init__(self, a, b):
+        self.a = a
+        self.b = b
+        self.original_dof = set()
+        self.dof = set()
+        for i in range(0, len(a.x)):
+            if a.x[i] != b.x[i]:
+                self.original_dof.add(i)
+                self.dof.add(i)
+
+    def coherent(self):
+        return self.a.y == self.b.y
+
+    def id(self):
+        return ','.join(sorted([self.a.id(), self.b.id()]))
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def decode(x, N):
+    index = 0
+    output = np.zeros((N))
+    while x > 0 and index < N:
+        output[index] = x & 0b1
+        x >>= 1
+        index += 1
+    return output
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def hamming_distance(a, b):
+    return np.sum(np.logical_xor(a.x, b.x))
+
+def random_x(N):
+    x = np.zeros((N))
+    for i in range(0, N):
+        x[i] = random.randint(0, 1)
+    return x
+
+def xor(x):
+    # return sum(x) % 2
+    half = int(len(x) * 3 / 4)
+    return sum(x[:half]) % 2
+
+def create_dof_map(influences):
+    dof_map = {}
+    for influence in influences:
+        for i in influence.dof:
+            if not i in dof_map:
+                dof_map[i] = []
+            dof_map[i].append(influence)
+    return dof_map
+
+def flip(influences, i):
+    for influence in influences:
+        if i in influence.dof:
+            influence.a.y = int(influence.a.y) ^ 1
+
+def remove_dof(dof_map, i, flip = False):
+    for influence in dof_map[i]:
+        influence.dof.remove(i)
+        if flip:
+            influence.a.y = int(influence.a.y) ^ 1
+        # if len(influence.dof) == 0 and not influence.coherent():
+        #     raise Exception('Invalid')
+    del dof_map[i]
+
+def solve(dof_map, all_influences, all_samples):
+    eliminated = True
+    while eliminated:
+        eliminated = False
+        for influence in all_influences:
+            if len(influence.dof) == 1:
+                i = next(iter(influence.dof))
+                if influence.coherent:
+                    remove_dof(dof_map, i)
+                    eliminated = True
+                else:
+                    print('Forced', i)
+                    remove_dof(dof_map, i, True)
+                    eliminated = True
+                
+    lowest_dof = None
+    for influence in all_influences:
+        if not influence.coherent and len(influence.dof) > 1:
+            if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
+                lowest_dof = influence
+
+    flip = None
+    highest_score = -1
+
+    for i in lowest_dof.dof:
+        per_point_scores = {}
+        i_influences = dof_map[i]
+        left = 0
+        right = 0
+        for influence in i_influences:
+            if not influence.a in per_point_scores:
+                per_point_scores[influence.a] = [0, 0]
+            if not influence.b in per_point_scores:
+                per_point_scores[influence.b] = [0, 0]
+            if influence.coherent:
+                per_point_scores[influence.a][0] += 1
+                per_point_scores[influence.b][0] += 1
+                left += 1
+            else:
+                per_point_scores[influence.a][1] += 1
+                per_point_scores[influence.b][1] += 1
+                right += 1
+        print(i, left / (left + right))
+        num = 0
+        denom = 0
+        for _, score in per_point_scores.items():
+            if score[0] == score[1]:
+                continue
+            print(i, score)
+            num += score[1] / (score[0] + score[1])
+            denom += 1
+        score = num / denom if denom > 0 else 0
+        print(score)
+    
+    return None
+
+
+# 1st row (n+1 choose k+1) * (1-(k mod 2))
+# psuedopascal to compute the follow-on rows
+# assuming solvability, we want to maximize the probability that our current state and our state with
+# a particular single flip are one order apart in the correct direction
+
+
+
+# 2, 0
+# 2, 2, 0
+# 2, 4, 2, 0
+# 2, 6, 6, 2, 0
+# 2, 8,12, 8, 2, 0
+# 2,10,20,20,10, 2, 0
+
+# 3,-9,19,-33,51,-73,99
+# 3,-6,10,-14,18,-22,26
+# 3,-3, 4, -4, 4, -4, 4
+# 3, 0, 1,  0, 0,  0, 0
+# 3, 3, 1,  1, 0,  0, 0
+# 3, 6, 4,  2, 1,  0, 0
+# 3, 9,10,  6, 3,  1, 0
+
+#       4, 0, 4, 0
+#     4, 4, 4, 4, 0
+#   4, 8, 8, 8, 4, 0
+# 4,12,16,16,12, 4, 0
+
+#       5, 0,10, 0, 1
+#     5, 5,10,10, 1, 1
+#   5,
+# 5,
+
+
+
+# 3
+#
+# @1 [1, 2, 1]
+# @2 [2, 2, 0]
+# @3 [3, 0, 1]
+
+# 5  [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)
+#
+# @1 [1, 4, 6,  4, 1], [4, 6,  4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),
+# @2 [2, 6, 6,  2, 0], [3, 4,  4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)
+# @3 [3, 6, 4,  2, 1], [2, 4,  6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)
+# @4 [4, 4, 4,  4, 0], [1, 6,  6, 1, 1] - 16, 15 - 
+# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 - 
+
+# @0 [0.0, 0.0, 0.0, 0.0, 0.0]
+# @1 [0.2, 0.4, 0.6, 0.8, 1.0]
+# @2 [0.4, 0.6, 0.6, 0.4, 0.0]
+# @3 [0.6, 0.6, 0.4, 0.4, 1.0]
+# @4 [0.8, 0.4, 0.4, 0.8, 0.0]
+# @5 [1.0, 0.0, 1.0, 0.0, 1.0]
+
+# 6
+#
+# @1 [1, 5, 10, 10, 5, 1]
+# @2 [2, 8, 12, 8,  2, 0]
+# @3 [3, 9, 10, 6,  3, 1]
+# @4 [4, 8, 8,  8,  4, 0]
+# @5 [5, 5, 10, 10, 1, 1]
+# @6 [6, 0, 20, 0,  6, 0]
+
+# last row, 1 if odd, 0 if even
+# second to last, subtract 2 on odds, add 2 on evens
+
+def compute_pseudopascal(N):
+    dist = np.zeros((N, N))
+    for j in range(0, N):
+        dist[0][j] = math.comb(N - 1, j)
+        dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
+    for i in range(1, N):
+        for j in range(0, i + 1):
+            dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
+        for k in range(i + 1, N):
+            for j in reversed(range(0, k)):
+                dist[i][j+1] = dist[i][j] + dist[i][j+1]
+    return dist
+
+def compute_distributions(N):
+    dist = compute_pseudopascal(N)
+    print(dist)
+    for i in range(0, N):
+        for j in range(0, N):
+            denom = math.comb(N, j+1)
+            dist[i][j] /= denom
+    return dist
+
+def compute_pyramids(N):
+    num_orders = max(int(N / 2), 1)
+    pyramids = np.zeros((num_orders, N, N)).astype(np.int32)
+    # 1st order can be filled in as multiplication and forms the base case
+    for i in range(0, N):
+        for j in range(0, i + 1):
+            pyramids[0][i][j] = (i - j + 1) * (j + 1)
+    for order in range(1, num_orders):
+        offset = order * 2
+
+        # fill in the LHS and diagonal
+        for i in range(0, N - offset):
+            value = math.comb(2 * (order + 1) + i - 1, i)
+            pyramids[order][i + offset][0] = value
+            # mirror
+            pyramids[order][i + offset][i + offset] = value
+
+        # accumulate along the diagonals
+        for i in range(1, N):
+            value = pyramids[order][i][0]
+            acc = value
+            for j in range(1, N - i):
+                value += acc
+                pyramids[order][i + j][j] = value
+                acc += pyramids[order - 1][i + j - 1][j - 1]
+
+    return pyramids
+
+def get_total_band_count(distance, band_distance, N):
+    if band_distance % 2 == 1:
+        return 0
+    order = int(band_distance / 2) - 1
+    if order < 0:
+        return 0
+    if distance < order + 1:
+        return 0
+    if distance > N - order - 1:
+        return 0
+    order_root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2
+    scale = math.comb(N - (order + 1) * 2, distance - order - 1)
+    value = math.comb(2 * (order + 1) + N - 2 * (order + 1), N - 2 * (order + 1))
+    return order_root * scale * value
+
+def get_incoherent_band_count(pyramids, distance, band_distance, k, N):
+    if k == 0 or k == N or band_distance % 2 == 1:
+        return 0
+    order = int(band_distance / 2) - 1
+    if order < 0:
+        return 0
+    if distance < order + 1:
+        return 0
+    if distance > N - order - 1:
+        return 0
+    order_root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2
+    scale = math.comb(N - (order + 1) * 2, distance - order - 1)
+    value = pyramids[order][N - 2][k - 1]
+    return order_root * scale * value
+
+def confusion_probabilities(N, samples):
+    sample_sizes = np.zeros(N)
+    for i in range(0, len(samples)):
+        a = samples[i]
+        for j in range(0, len(samples)):
+            b = samples[j]
+            if i == j:
+                continue
+            distance = hamming_distance(a, b)
+            sample_sizes[distance - 1] += 1
+
+    confusion = np.zeros((N, N))
+    dist = compute_pseudopascal(N)
+    np.multiply(dist, 2 ** N, dist)
+    # These are the probabilities that we might mix up any two orders given a particular sample size
+    for i in range(0, N):
+        for j in range(0, N):
+            probability = 1.0
+            for k in range(0, N):
+                full_size = math.comb(N, k+1) * (2 ** N)
+                sample_size = sample_sizes[k]
+                num_unknowns = full_size - sample_size
+                i_incoherent = dist[i][k]
+                # Worst case, we sample only the coherent points, 
+                i_min = max(i_incoherent - num_unknowns, 0) / full_size
+                i_max = min(sample_size, i_incoherent) / full_size
+                u = i_min + i_max / 2
+                s = (i_max - i_min) / 2
+                probability *= raised_cosine(dist[j][k] / full_size, u, s)
+            confusion[i][j] = probability
+    return confusion
+
+def raised_cosine(x, u, s):
+    if x < (u - s):
+        return 0
+    if x > (u + s):
+        return 0
+    return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s))
+
+# Probability of getting k red balls after drawing n from a bag with m total balls and j red balls in it
+# (n choose k) * p^k * (1-p)^(n-k)
+
+# p/m chance of getting a red ball
+# (1 - p/m) chance of not getting a red ball
+
+# One way (p/m) * ((p-1)/(m-1)) * ((p-2)/(m-2)) 
+# (1 - (p/m))
+
+cache = {}
+hits = 0
+misses = 0
+def p_bernoulli(n, k, m, j):
+    global hits, misses
+    key = (n, k, m, j)
+    if key in cache:
+        hits += 1
+        return cache[key]
+    misses += 1
+    probabilities = np.zeros((n + 1, n + 1))
+    probabilities.fill(-1)
+    stack = [(0,0)]
+    while len(stack) > 0:
+        (a, b) = stack.pop()
+        if a + b == n:
+            probabilities[a][b] = 1 if a == k else 0
+        elif a > j:
+            probabilities[a][b] = 0
+        elif b > (m - j):
+            probabilities[a][b] = 0
+        else:
+            p_left = probabilities[a + 1][b]
+            p_right = probabilities[a][b + 1]
+            if p_left >= 0 and p_right >= 0:
+                p = (j - a) / (m - a - b)
+                probabilities[a][b] = p_left * p + p_right * (1 - p)
+            else:
+                stack.append((a, b))
+                if p_left < 0:
+                    stack.append((a + 1, b))
+                if p_right < 0:
+                    stack.append((a, b + 1))
+    cache[key] = probabilities[0][0]
+    # if len(cache) % 100 == 0:
+    #     print('Cache size: ', len(cache), math.floor(10000 * hits / (hits + misses)) / 100, '%')
+    return probabilities[0][0]
+    
+    p = j / m
+    if n == k:
+        return 1.0
+    if k > p:
+        return 0.0
+    P = 1.0
+    p_k = 0
+    p_nk = 0
+    for i in range(1, k + 1):
+        P *= (n + 1 - i) / i
+        while P > 1.0 and p_k < k:
+            P *= p
+            p_k += 1
+        while P > 1.0 and p_nk < (n - k):
+            P *= (1 - p)
+            p_nk += 1
+    while p_k < k:
+        P *= p
+        p_k += 1
+    while (p_nk < (n - k)):
+        P *= (1 - p)
+        p_nk += 1
+    return P
+
+def average_index(x):
+    total = 0
+    for k in range(0, len(x)):
+        total += k * x[k]
+    return total / np.sum(x)
+
+def compute_cumulative_probability(N, bases, p_n):
+    # p_n = np.zeros(N)
+    # p_n.fill(0.5)
+    states = [[]]
+    flips = set()
+    for i in range(1, len(bases)):
+        # (base, _) = bases[i]
+        (_, flip) = bases[i]
+        # p_forward = 0
+        # p_backward = 0
+        # for k in range(0, N - 1):
+        #     p_forward += base[k + 1] * next_p[k]
+        #     p_backward += base[k] * next_p[k + 1]
+        if flip in flips:
+            # p_n[flip] -= p_forward
+            # p_n[flip] += p_backward
+            flips.remove(flip)
+        else:
+            # p_n[flip] += p_forward
+            # p_n[flip] -= p_backward
+            flips.add(flip)
+        states.append(flips.copy())
+    # np.clip(p_n, 0, 1, p_n)
+    # print('Contribution probabilities', p_n)
+
+    min_p_n = np.min(p_n)
+    max_p_n = np.max(p_n)
+
+
+    p_k = np.zeros(N)
+    for k in range(0, N):
+        stack = [(k, len(bases) - 1)]
+        probabilities = np.zeros((N, len(bases)))
+        probabilities.fill(-1)
+        while len(stack) > 0:
+            (i, base_index) = stack.pop()
+            (base, flip) = bases[base_index]
+            if base_index == 0:
+                probabilities[i, 0] = base[i]
+            else:
+                left = i - 1
+                right = i + 1
+                state = states[base_index - 1]
+                p_flip = max(min(p_n[flip] + 0.5, 1.0), 0)
+                if flip in state:
+                    p_flip = 1 - p_flip
+                p_left = probabilities[left, base_index - 1] if left >= 0 else 0
+                p_right = probabilities[right, base_index - 1] if right < N else 0
+                if p_left >= 0 and p_right >= 0:
+                    probabilities[i, base_index] = base[i] * p_left * (1 - p_flip) + base[i] * p_right * p_flip
+                else:
+                    stack.append((i, base_index))
+                    if p_left < 0:
+                        stack.append((left, base_index - 1))
+                    if p_right < 0:
+                        stack.append((right, base_index - 1))
+        p_k[k] = probabilities[k][-1]
+    np.divide(p_k, np.sum(p_k), p_k)
+    return p_k
+
+# 8, 32,    2^5
+# 10, 64,   2^6
+# 12, 128,  2^7
+# 14, 256,  2^8
+# 16, 512,  2^9
+# 18, 1024, 2^10
+# 20, 2048, 2^11
+# 22, 4096, 2^12
+def main():
+    N = 10
+    sample_size = 32
+    sample_ids = set()
+    samples = []
+
+    dist = compute_pseudopascal(N)
+    pyramids = compute_pyramids(N + 1)
+
+    for i in range(0, sample_size):
+        x = random_x(N)
+        y = int(xor(x))
+        p = Point(x, y)
+        p_id = p.id()
+        if p_id in sample_ids:
+            continue
+        sample_ids.add(p_id)
+        samples.append(p)
+    # confusion = confusion_probabilities(N, samples)
+    # print(confusion)
+    # return
+
+    # for i in range(0, 2**N):
+    #     x = decode(i, N)
+    #     y = int(xor(x))
+    #     samples.append(Point(x,y))
+
+    base = np.zeros(N)
+    current = np.zeros(N)
+    cumulative_probability = np.ones(N)
+    flip_likelihood = np.zeros(N)
+    cumulative_deltas = np.zeros(N)
+    direction = -1
+    flips = set()
+    bases = []
+    last_flip = -1
+    max_base_index = -1
+    scores = np.zeros(N)
+    indices = []
+
+    for _ in range(0, 2 ** N):
+        lowest_err = -1
+        use_flip = -1
+        for flip in range(-1, N):
+            coherent_distances = np.zeros(N+1)
+            incoherent_distances = np.zeros(N+1)
+            probability = np.ones(N)
+            all_coherent = True
+            for i in range(0, len(samples)):
+                a = samples[i]
+                bands = [[] for _ in range(0, N + 1)]
+                for j in range(0, len(samples)):
+                    if i == j:
+                        continue
+                    b = samples[j]
+                    distance = hamming_distance(a, b)
+                    bands[distance].append(b)
+                    is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)
+                    if is_coherent:
+                        coherent_distances[distance] += 1
+                    else:
+                        incoherent_distances[distance] += 1
+                        all_coherent = False
+                for distance in range(0, N + 1):
+                    band = bands[distance]
+                    if len(band) < 2:
+                        continue
+                    coherent_bands = np.zeros(N + 1)
+                    incoherent_bands = np.zeros(N + 1)
+                    for j in range(0, len(band)):
+                        c = band[j]
+                        for k in range(0, len(band)):
+                            if j == k:
+                                continue
+                            d = band[k]
+                            band_distance = hamming_distance(c, d)
+                            is_coherent = ((flip < 0 or c.x[flip] == d.x[flip]) and c.y == d.y) or ((flip >= 0 and c.x[flip] != d.x[flip]) and c.y != d.y)
+                            if is_coherent:
+                                coherent_bands[band_distance] += 1
+                            else:
+                                incoherent_bands[band_distance] += 1
+                    for band_distance in range(1, N + 1):
+                        n = coherent_bands[band_distance] + incoherent_bands[band_distance]
+                        if n == 0:
+                            continue
+                        t = get_total_band_count(distance, band_distance, N)
+                        if t == 0:
+                            continue
+                        a = incoherent_bands[band_distance]
+                        for k in range(0, N):
+                            p = get_incoherent_band_count(pyramids, distance, band_distance, k + 1, N)
+                            prob = p_bernoulli(int(n), int(a), t, p)
+                            # if prob == 0 and k == 5:
+                            #     p = get_incoherent_band_count(pyramids, distance, band_distance, k, N)
+                            #     print('test')
+                            probability[k] *= prob
+                        if np.sum(probability) == 0:
+                            print('Uh-oh')
+                        np.divide(probability, np.sum(probability), probability)
+                
+            if all_coherent:
+                print('Flip and halt', flip)
+                return
+            # print(coherent_distances, incoherent_distances)
+
+            # print(coherent_distances, incoherent_distances)
+            # est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances))
+            # print(est_incoherence)
+            # np.divide(probability, np.sum(probability), probability)
+            for j in range(1, N + 1):
+                n = coherent_distances[j] + incoherent_distances[j]
+                if n == 0:
+                    continue
+                t = math.comb(N, j) * (2 ** N)
+                if t == 0:
+                    continue
+                a = incoherent_distances[j]
+                for k in range(0, N):
+                    p = dist[k][j - 1] * (2 ** N)
+                    prob = p_bernoulli(int(n), int(a), t, p)
+                    probability[k] *= prob
+                if np.sum(probability) == 0:
+                    print('Uh-oh')
+                np.divide(probability, np.sum(probability), probability)
+
+            if flip < 0:
+                np.copyto(base, probability)
+            else:
+                np.copyto(current, probability)
+
+
+                    # print(k, i, min_true_value, max_true_value)
+
+                    # confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative
+                    # err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence
+                    # denom += 1
+                # print(flip, k, err)
+                # err /= denom
+                # if flip < 0:
+                #     base[k] = probability
+                # else:
+                #     current[k] = probability
+            
+            if flip >= 0:
+                if np.sum(current) == 0:
+                    continue
+                np.divide(current, np.sum(current), current)
+
+                # print(current)
+                # temp = np.roll(cumulative_probability, -1)
+                # temp[-1] = 1.0
+                # np.multiply(current, temp, current)
+                # np.divide(current, np.sum(current), current)
+                p_forward = 0
+                p_backward = 0
+                for i in range(1, N):
+                    p_forward += base[i] * current[i - 1]
+                for i in range(0, N - 1):
+                    p_backward += base[i] * current[i + 1]
+                scores[flip] += p_forward - p_backward
+
+                scale = 0.01
+                if flip in flips:
+                    flip_likelihood[flip] += scale * p_backward
+                    flip_likelihood[flip] -= scale * p_forward
+                else:
+                    flip_likelihood[flip] -= scale * p_backward
+                    flip_likelihood[flip] += scale * p_forward                    
+                delta = p_forward - p_backward
+                # print(flip, current, p_forward, p_backward)
+                base_index = average_index(cumulative_probability)
+                current_index = average_index(current)
+                err = abs(1 - (base_index - current_index))
+                # print(base_index, current_index, err)
+
+                # base_index = average_index(cumulative_probability)
+                # new_index = average_index(current)
+                # if isnan(new_index):
+                #     continue
+                # np.divide(current, np.sum(current), current)
+                # np.subtract(1, current, current)
+                # print(flip,p_forward,p_backward,current)
+                if use_flip < 0 or delta > lowest_err:
+                    use_flip = flip
+                    lowest_err = delta
+
+                # cumulative_deltas[flip] += 0
+
+                # for k in range(0, N - 1):
+                #     value = current[k] * cumulative_probability[k + 1]
+                #     if use_flip < 0 or value > lowest_err:
+                #         use_flip = flip
+                #         lowest_err = value
+                # print(flip, highest_value)
+            else:
+                # p_next = np.zeros(N)
+                # for i in range(0, N):
+                #     P = 0.0
+                #     for j in range(0, N):
+                #         if i == j:
+                #             continue
+                #         P += base[i] * (1 - base[j])
+                #     p_next[i] = P
+                # base = p_next
+
+                # base[0] = 0
+                np.divide(base, np.sum(base), base)
+                max_base_index = np.argmax(base)
+                bases.append((base.copy(), last_flip))
+                # bases.insert(0, base.copy())
+                # cumulative_probability = compute_cumulative_probability(N, bases)
+                # p_forward = 0
+                # p_backward = 0
+                # for i in range(1, N):
+                #     p_forward += cumulative_probability[i] * base[i - 1]
+                # for i in range(0, N - 1):
+                #     p_backward += cumulative_probability[i] * base[i + 1]
+                print('Base', base)
+                # # np.subtract(1, base, base)
+                # # print(cumulative_probability)
+                # shift_left = np.roll(cumulative_probability, -len(indic))
+                # shift_left[-1] = 0.0
+                # # # # print('Shift Left', p_forward, shift_left)
+                # shift_right = np.roll(cumulative_probability, 1)
+                # shift_right[0] = 0.0
+                # # # # print('Shift Right', p_backward, shift_right)
+                # p_next = np.add(np.multiply(shift_left, 0.5), np.multiply(shift_right, 0.5))
+                # np.divide(p_next, np.sum(p_next), p_next)
+                # # # # # print('Next', p_next)
+                # # # # # # print(cumulative_probability)
+                # # # # # # print(base)
+                # np.multiply(base, p_next, cumulative_probability)
+                # cumulative_probability[0] = 0
+                # # # # # np.multiply(cumulative_probability, shift_right, cumulative_probability)
+                # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
+                # cumulative_probability = compute_cumulative_probability(N, bases, flip_likelihood)
+                # print('Cumulative', cumulative_probability)
+                # print('Likelihood', flip_likelihood)
+
+        # cumulative_probability[0] = 0
+        # use_flip = -1
+        # if direction < 0:
+        #     use_flip = np.argmax(cumulative_deltas)
+        #     if cumulative_deltas[use_flip] < 0:
+        #         use_flip = np.argmin(cumulative_deltas)
+        #         direction = 1
+        #         # cumulative_deltas.fill(0)
+        # else:
+        #     use_flip = np.argmin(cumulative_deltas)
+        #     if cumulative_deltas[use_flip] > 0:
+        #         use_flip = np.argmax(cumulative_deltas)
+        #         direction = -1
+        #         # cumulative_deltas.fill(0)
+        # if direction < 0:
+        #     cumulative_probability[0] = 0
+        # else:
+        #     cumulative_probability[-1] = 0
+        # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
+        # print(cumulative_deltas)
+
+        # use_flip = -1
+        # highest_p = 0
+        # for i in range(0, N):
+        #     p = flip_likelihood[i]
+        #     if i in flips:
+        #         p = -p
+        #     if use_flip < 0 or p > highest_p:
+        #         use_flip = i
+        #         highest_p = p
+        # if not use_flip in flips and highest_p < 0 or use_flip in flips and highest_p > 0:
+        #     flip_likelihood[use_flip] *= -1.0
+        print(scores)
+
+        indices = sorted(range(len(scores)), key=lambda i: scores[i])[-(max_base_index + 1):]
+        print(indices)
+
+        for flip in indices:
+            scores[flip] *= -1.0
+            if flip in flips:
+                flips.remove(flip)
+            else:
+                flips.add(flip)
+            for p in samples:
+                if p.x[flip]:
+                    p.y ^= 1
+        print(flips)
+
+        # if use_flip < 0:
+        #     return
+        # last_flip = use_flip
+        # if use_flip in flips:
+        #     flips.remove(use_flip)
+        # else:
+        #     flips.add(use_flip)
+        # print('Flip', use_flip, lowest_err)
+        # print(flips)
+        # cumulative_deltas[use_flip] = -cumulative_deltas[use_flip]
+        # for p in samples:
+        #     if p.x[use_flip]:
+        #         p.y ^= 1
+
+if __name__ == "__main__":
+    main()
--- a/mutations26.py
+++ b/mutations26.py
@ -0,0 +1,741 @@
+from cmath import isnan
+import numpy as np
+import random
+import hashlib
+import math
+
+def get_state_id(state):
+    return ','.join([str(x) for x in sorted(state)])
+
+class Point():
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+    def id(self):
+        return ','.join([str(int(x)) for x in self.x])
+
+class Influence():
+    def __init__(self, a, b):
+        self.a = a
+        self.b = b
+        self.original_dof = set()
+        self.dof = set()
+        for i in range(0, len(a.x)):
+            if a.x[i] != b.x[i]:
+                self.original_dof.add(i)
+                self.dof.add(i)
+
+    def coherent(self):
+        return self.a.y == self.b.y
+
+    def id(self):
+        return ','.join(sorted([self.a.id(), self.b.id()]))
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(len(v) / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def decode(x, N):
+    index = 0
+    output = np.zeros((N))
+    while x > 0 and index < N:
+        output[index] = x & 0b1
+        x >>= 1
+        index += 1
+    return output
+
+def sha(v):
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def hamming_distance(a, b):
+    return np.sum(np.logical_xor(a.x, b.x))
+
+def random_x(N):
+    x = np.zeros((N))
+    for i in range(0, N):
+        x[i] = random.randint(0, 1)
+    return x
+
+def xor_n(x, n):
+    return sum(x[:n]) % 2
+
+def create_dof_map(influences):
+    dof_map = {}
+    for influence in influences:
+        for i in influence.dof:
+            if not i in dof_map:
+                dof_map[i] = []
+            dof_map[i].append(influence)
+    return dof_map
+
+def flip(influences, i):
+    for influence in influences:
+        if i in influence.dof:
+            influence.a.y = int(influence.a.y) ^ 1
+
+def remove_dof(dof_map, i, flip = False):
+    for influence in dof_map[i]:
+        influence.dof.remove(i)
+        if flip:
+            influence.a.y = int(influence.a.y) ^ 1
+        # if len(influence.dof) == 0 and not influence.coherent():
+        #     raise Exception('Invalid')
+    del dof_map[i]
+
+def solve(dof_map, all_influences, all_samples):
+    eliminated = True
+    while eliminated:
+        eliminated = False
+        for influence in all_influences:
+            if len(influence.dof) == 1:
+                i = next(iter(influence.dof))
+                if influence.coherent:
+                    remove_dof(dof_map, i)
+                    eliminated = True
+                else:
+                    print('Forced', i)
+                    remove_dof(dof_map, i, True)
+                    eliminated = True
+                
+    lowest_dof = None
+    for influence in all_influences:
+        if not influence.coherent and len(influence.dof) > 1:
+            if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
+                lowest_dof = influence
+
+    flip = None
+    highest_score = -1
+
+    for i in lowest_dof.dof:
+        per_point_scores = {}
+        i_influences = dof_map[i]
+        left = 0
+        right = 0
+        for influence in i_influences:
+            if not influence.a in per_point_scores:
+                per_point_scores[influence.a] = [0, 0]
+            if not influence.b in per_point_scores:
+                per_point_scores[influence.b] = [0, 0]
+            if influence.coherent:
+                per_point_scores[influence.a][0] += 1
+                per_point_scores[influence.b][0] += 1
+                left += 1
+            else:
+                per_point_scores[influence.a][1] += 1
+                per_point_scores[influence.b][1] += 1
+                right += 1
+        print(i, left / (left + right))
+        num = 0
+        denom = 0
+        for _, score in per_point_scores.items():
+            if score[0] == score[1]:
+                continue
+            print(i, score)
+            num += score[1] / (score[0] + score[1])
+            denom += 1
+        score = num / denom if denom > 0 else 0
+        print(score)
+    
+    return None
+
+
+# 1st row (n+1 choose k+1) * (1-(k mod 2))
+# psuedopascal to compute the follow-on rows
+# assuming solvability, we want to maximize the probability that our current state and our state with
+# a particular single flip are one order apart in the correct direction
+
+
+
+# 2, 0
+# 2, 2, 0
+# 2, 4, 2, 0
+# 2, 6, 6, 2, 0
+# 2, 8,12, 8, 2, 0
+# 2,10,20,20,10, 2, 0
+
+# 3,-9,19,-33,51,-73,99
+# 3,-6,10,-14,18,-22,26
+# 3,-3, 4, -4, 4, -4, 4
+# 3, 0, 1,  0, 0,  0, 0
+# 3, 3, 1,  1, 0,  0, 0
+# 3, 6, 4,  2, 1,  0, 0
+# 3, 9,10,  6, 3,  1, 0
+
+#       4, 0, 4, 0
+#     4, 4, 4, 4, 0
+#   4, 8, 8, 8, 4, 0
+# 4,12,16,16,12, 4, 0
+
+#       5, 0,10, 0, 1
+#     5, 5,10,10, 1, 1
+#   5,
+# 5,
+
+
+
+# 3
+#
+# @1 [1, 2, 1]
+# @2 [2, 2, 0]
+# @3 [3, 0, 1]
+
+# 5  [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)
+#
+# @1 [1, 4, 6,  4, 1], [4, 6,  4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),
+# @2 [2, 6, 6,  2, 0], [3, 4,  4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)
+# @3 [3, 6, 4,  2, 1], [2, 4,  6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)
+# @4 [4, 4, 4,  4, 0], [1, 6,  6, 1, 1] - 16, 15 - 
+# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 - 
+
+# @0 [0.0, 0.0, 0.0, 0.0, 0.0]
+# @1 [0.2, 0.4, 0.6, 0.8, 1.0]
+# @2 [0.4, 0.6, 0.6, 0.4, 0.0]
+# @3 [0.6, 0.6, 0.4, 0.4, 1.0]
+# @4 [0.8, 0.4, 0.4, 0.8, 0.0]
+# @5 [1.0, 0.0, 1.0, 0.0, 1.0]
+
+# 6
+#
+# @1 [1, 5, 10, 10, 5, 1]
+# @2 [2, 8, 12, 8,  2, 0]
+# @3 [3, 9, 10, 6,  3, 1]
+# @4 [4, 8, 8,  8,  4, 0]
+# @5 [5, 5, 10, 10, 1, 1]
+# @6 [6, 0, 20, 0,  6, 0]
+
+# last row, 1 if odd, 0 if even
+# second to last, subtract 2 on odds, add 2 on evens
+
+def compute_pseudopascal(N):
+    dist = np.zeros((N, N))
+    for j in range(0, N):
+        dist[0][j] = math.comb(N - 1, j)
+        dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
+    for i in range(1, N):
+        for j in range(0, i + 1):
+            dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
+        for k in range(i + 1, N):
+            for j in reversed(range(0, k)):
+                dist[i][j+1] = dist[i][j] + dist[i][j+1]
+    return dist
+
+def compute_distributions(N):
+    dist = compute_pseudopascal(N)
+    print(dist)
+    for i in range(0, N):
+        for j in range(0, N):
+            denom = math.comb(N, j+1)
+            dist[i][j] /= denom
+    return dist
+
+def confusion_probabilities(N, samples):
+    sample_sizes = np.zeros(N)
+    for i in range(0, len(samples)):
+        a = samples[i]
+        for j in range(0, len(samples)):
+            b = samples[j]
+            if i == j:
+                continue
+            distance = hamming_distance(a, b)
+            sample_sizes[distance - 1] += 1
+
+    confusion = np.zeros((N, N))
+    dist = compute_pseudopascal(N)
+    np.multiply(dist, 2 ** N, dist)
+    # These are the probabilities that we might mix up any two orders given a particular sample size
+    for i in range(0, N):
+        for j in range(0, N):
+            probability = 1.0
+            for k in range(0, N):
+                full_size = math.comb(N, k+1) * (2 ** N)
+                sample_size = sample_sizes[k]
+                num_unknowns = full_size - sample_size
+                i_incoherent = dist[i][k]
+                # Worst case, we sample only the coherent points, 
+                i_min = max(i_incoherent - num_unknowns, 0) / full_size
+                i_max = min(sample_size, i_incoherent) / full_size
+                u = i_min + i_max / 2
+                s = (i_max - i_min) / 2
+                probability *= raised_cosine(dist[j][k] / full_size, u, s)
+            confusion[i][j] = probability
+    return confusion
+
+def raised_cosine(x, u, s):
+    if x < (u - s):
+        return 0
+    if x > (u + s):
+        return 0
+    return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s))
+
+# Probability of getting k red balls after drawing n from a bag with m total balls and j red balls in it
+# (n choose k) * p^k * (1-p)^(n-k)
+
+# p/m chance of getting a red ball
+# (1 - p/m) chance of not getting a red ball
+
+# One way (p/m) * ((p-1)/(m-1)) * ((p-2)/(m-2)) 
+# (1 - (p/m))
+
+def p_bernoulli(n, k, m, j):
+    # probabilities = np.zeros((n + 1, n + 1))
+    # probabilities.fill(-1)
+    # # if n == k:
+    # #     return 1.0
+    # # if k > p:
+    # #     return 0.0
+    # stack = [(0,0)]
+    # while len(stack) > 0:
+    #     (a, b) = stack.pop()
+    #     if a + b == n:
+    #         probabilities[a][b] = 1 if a == k else 0
+    #     elif a > j:
+    #         probabilities[a][b] = 0
+    #     elif b > (m - j):
+    #         probabilities[a][b] = 0
+    #     else:
+    #         p_left = probabilities[a + 1][b]
+    #         p_right = probabilities[a][b + 1]
+    #         if p_left >= 0 and p_right >= 0:
+    #             p = (j - a) / (m - a - b)
+    #             probabilities[a][b] = p_left * p + p_right * (1 - p)
+    #         else:
+    #             stack.append((a, b))
+    #             if p_left < 0:
+    #                 stack.append((a + 1, b))
+    #             if p_right < 0:
+    #                 stack.append((a, b + 1))
+    # return probabilities[0][0]
+    
+    p = j / m
+    P = 1.0
+    p_k = 0
+    p_nk = 0
+    for i in range(1, k + 1):
+        P *= (n + 1 - i) / i
+        while P > 1.0 and p_k < k:
+            P *= p
+            p_k += 1
+        while P > 1.0 and p_nk < (n - k):
+            P *= (1 - p)
+            p_nk += 1
+    while p_k < k:
+        P *= p
+        p_k += 1
+    while (p_nk < (n - k)):
+        P *= (1 - p)
+        p_nk += 1
+    return P
+
+def average_index(x):
+    total = 0
+    for k in range(0, len(x)):
+        total += k * x[k]
+    return total / np.sum(x)
+
+def compute_cumulative_probability(N, bases, p_n):
+    # p_n = np.zeros(N)
+    # p_n.fill(0.5)
+    states = [[]]
+    flips = set()
+    for i in range(1, len(bases)):
+        # (base, _) = bases[i]
+        (_, flip) = bases[i]
+        # p_forward = 0
+        # p_backward = 0
+        # for k in range(0, N - 1):
+        #     p_forward += base[k + 1] * next_p[k]
+        #     p_backward += base[k] * next_p[k + 1]
+        if flip in flips:
+            # p_n[flip] -= p_forward
+            # p_n[flip] += p_backward
+            flips.remove(flip)
+        else:
+            # p_n[flip] += p_forward
+            # p_n[flip] -= p_backward
+            flips.add(flip)
+        states.append(flips.copy())
+    # np.clip(p_n, 0, 1, p_n)
+    # print('Contribution probabilities', p_n)
+
+    min_p_n = np.min(p_n)
+    max_p_n = np.max(p_n)
+
+
+    p_k = np.zeros(N)
+    for k in range(0, N):
+        stack = [(k, len(bases) - 1)]
+        probabilities = np.zeros((N, len(bases)))
+        probabilities.fill(-1)
+        while len(stack) > 0:
+            (i, base_index) = stack.pop()
+            (base, flip) = bases[base_index]
+            if base_index == 0:
+                probabilities[i, 0] = base[i]
+            else:
+                left = i - 1
+                right = i + 1
+                state = states[base_index - 1]
+                p_flip = max(min(p_n[flip] + 0.5, 1.0), 0)
+                if flip in state:
+                    p_flip = 1 - p_flip
+                p_left = probabilities[left, base_index - 1] if left >= 0 else 0
+                p_right = probabilities[right, base_index - 1] if right < N else 0
+                if p_left >= 0 and p_right >= 0:
+                    probabilities[i, base_index] = base[i] * p_left * (1 - p_flip) + base[i] * p_right * p_flip
+                else:
+                    stack.append((i, base_index))
+                    if p_left < 0:
+                        stack.append((left, base_index - 1))
+                    if p_right < 0:
+                        stack.append((right, base_index - 1))
+        p_k[k] = probabilities[k][-1]
+    np.divide(p_k, np.sum(p_k), p_k)
+    return p_k
+
+# 8, 32,    2^5
+# 10, 64,   2^6
+# 12, 128,  2^7
+# 14, 256,  2^8
+# 16, 512,  2^9
+# 18, 1024, 2^10
+# 20, 2048, 2^11
+# 22, 4096, 2^12
+def main():
+    N = 16
+    sample_size = 32
+    e_bits = 2
+    sample_ids = set()
+    samples = []
+
+    dist = compute_pseudopascal(N)
+    print(dist)
+
+    for i in range(0, sample_size):
+        x = random_x(N)
+        y = int(xor_n(x, e_bits))
+        p = Point(x, y)
+        p_id = p.id()
+        if p_id in sample_ids:
+            continue
+        sample_ids.add(p_id)
+        samples.append(p)
+
+    chords = [{} for _ in range(0, len(samples))]
+    for i in range(0, len(samples)):
+        a = samples[i]
+        for j in range(i + 1, len(samples)):
+            b = samples[j]
+            distance = hamming_distance(a, b)
+            if distance not in chords[i]:
+                chords[i][distance] = []
+            chords[i][distance].append(j)
+            if distance not in chords[j]:
+                chords[j][distance] = []
+            chords[j][distance].append(i)
+
+    probability = np.zeros((N, N))
+    scalars = np.ones(N)
+    for i in range(0, len(samples)):
+        origin = samples[i]
+        for (distance, points) in chords[i].items():
+            n = len(points)
+            t = math.comb(N, distance)
+            a = sum([0 if origin.y == samples[index].y else 1 for index in points])
+            for k in range(1, N - 1):
+                p = dist[k][distance - 1]
+                prob_at_k = p_bernoulli(n, a, t, p)
+                for flip in range(0, N):
+                    a_flip = sum([0 if origin.y == samples[index].y and origin.x[flip] == samples[index].x[flip] or origin.y != samples[index].y and origin.x[flip] != samples[index].x[flip] else 1 for index in points])
+                    p_forward = dist[k - 1][distance - 1]
+                    p_backward = dist[k + 1][distance - 1]
+                    prob_at_k_forward = p_bernoulli(n, a_flip, t, p_forward)
+                    prob_at_k_backward = p_bernoulli(n, a_flip, t, p_backward)
+                    # prob_at_k_backward = 0
+                    probability[k][flip] += (n / t) * prob_at_k * (prob_at_k_forward - prob_at_k_backward)
+                    # probability[k][flip] *= prob_at_k * prob_at_k_forward
+                # scalars[k] *= np.max(probability[k])
+                # np.divide(probability[k], np.max(probability[k]), probability[k])
+
+    # print(scalars)
+    print(probability)
+    return
+
+    coherent_distances = np.zeros(N + 1)
+    incoherent_distances = np.zeros(N + 1)
+    total_distances = np.zeros(N + 1)
+    for i in range(0, len(samples)):
+        coherent_distances.fill(0)
+        incoherent_distances.fill(0)
+        total_distances.fill(0)
+        a = samples[i]
+        for j in range(0, len(samples)):
+            b = samples[j]
+            distance = hamming_distance(a, b)
+            is_coherent = a.y == b.y
+            total_distances[distance] += 1
+            if is_coherent:
+                coherent_distances[distance] += 1
+            else:
+                incoherent_distances[distance] += 1
+        print(total_distances)
+        print(incoherent_distances)
+        print()
+        for d in range(1, N + 1):
+            n = coherent_distances[d] + incoherent_distances[d]
+            if n == 0:
+                continue
+            local_probability = np.ones(N)
+            for k in range(0, N):
+                a = incoherent_distances[d]
+                t = math.comb(N, d)
+                p = dist[k][d - 1]
+                prob = p_bernoulli(int(n), int(a), t, p)
+                local_probability[k] = prob
+                probability[i][k] *= prob
+            print(local_probability)
+            np.divide(probability[i], np.sum(probability[i]), probability[i])
+        print()
+    print(probability)
+    total_probability = np.ones(N)
+    for i in range(0, len(samples)):
+        np.multiply(probability[i], total_probability, total_probability)
+    np.divide(total_probability, np.sum(total_probability), total_probability)
+    print(total_probability)
+
+    return
+        
+
+    # confusion = confusion_probabilities(N, samples)
+    # print(confusion)
+    # return
+
+    # for i in range(0, 2**N):
+    #     x = decode(i, N)
+    #     y = int(xor(x))
+    #     samples.append(Point(x,y))
+
+    base = np.zeros(N)
+    current = np.zeros(N)
+    cumulative_probability = np.ones(N)
+    flip_likelihood = np.zeros(N)
+    cumulative_deltas = np.zeros(N)
+    direction = -1
+    flips = set()
+    bases = []
+    last_flip = -1
+
+    for _ in range(0, 2 ** N):
+        lowest_err = -1
+        use_flip = -1
+        for flip in range(-1, N):
+            coherent_distances = np.zeros(len(samples), N+1)
+            incoherent_distances = np.zeros(N+1)
+            all_coherent = True
+            for i in range(0, len(samples)):
+                a = samples[i]
+                for j in range(0, len(samples)):
+                    b = samples[j]
+                    distance = hamming_distance(a, b)
+                    is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)
+                    if is_coherent:
+                        coherent_distances[distance] += 1
+                    else:
+                        incoherent_distances[distance] += 1
+                        all_coherent = False
+            if all_coherent:
+                print('Flip and halt', flip)
+                return
+            # print(coherent_distances, incoherent_distances)
+
+            # print(coherent_distances, incoherent_distances)
+            # est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances))
+            # print(est_incoherence)
+
+            probability = np.ones(N)
+            # np.divide(probability, np.sum(probability), probability)
+            for j in range(1, N + 1):
+                n = coherent_distances[j] + incoherent_distances[j]
+                if n == 0:
+                    continue
+                for k in range(0, N):
+                    a = incoherent_distances[j]
+                    t = math.comb(N, j) * (2 ** N)
+                    p = dist[k][j - 1] * (2 ** N)
+                    prob = p_bernoulli(int(n), int(a), t, p)
+                    probability[k] *= prob
+                np.divide(probability, np.sum(probability), probability)
+
+            if flip < 0:
+                np.copyto(base, probability)
+            else:
+                np.copyto(current, probability)
+
+
+                    # print(k, i, min_true_value, max_true_value)
+
+                    # confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative
+                    # err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence
+                    # denom += 1
+                # print(flip, k, err)
+                # err /= denom
+                # if flip < 0:
+                #     base[k] = probability
+                # else:
+                #     current[k] = probability
+            
+            if flip >= 0:
+                if np.sum(current) == 0:
+                    continue
+                np.divide(current, np.sum(current), current)
+                # print(current)
+                # temp = np.roll(cumulative_probability, -1)
+                # temp[-1] = 1.0
+                # np.multiply(current, temp, current)
+                # np.divide(current, np.sum(current), current)
+                p_forward = 0
+                p_backward = 0
+                for i in range(1, N):
+                    p_forward += base[i] * current[i - 1]
+                for i in range(0, N - 1):
+                    p_backward += base[i] * current[i + 1]
+                scale = 0.01
+                if flip in flips:
+                    flip_likelihood[flip] += scale * p_backward
+                    flip_likelihood[flip] -= scale * p_forward
+                else:
+                    flip_likelihood[flip] -= scale * p_backward
+                    flip_likelihood[flip] += scale * p_forward                    
+                delta = p_forward - p_backward
+                print(flip, current, p_forward, p_backward)
+                base_index = average_index(base)
+                current_index = average_index(current)
+                err = abs(1 - (base_index - current_index))
+                print(base_index, current_index, err)
+
+                # base_index = average_index(cumulative_probability)
+                # new_index = average_index(current)
+                # if isnan(new_index):
+                #     continue
+                # np.divide(current, np.sum(current), current)
+                # np.subtract(1, current, current)
+                # print(flip,p_forward,p_backward,current)
+                if delta > 0 and (use_flip < 0 or delta > lowest_err):
+                    use_flip = flip
+                    lowest_err = delta
+
+                # cumulative_deltas[flip] += 0
+
+                # for k in range(0, N - 1):
+                #     value = current[k] * cumulative_probability[k + 1]
+                #     if use_flip < 0 or value > lowest_err:
+                #         use_flip = flip
+                #         lowest_err = value
+                # print(flip, highest_value)
+            else:
+                # p_next = np.zeros(N)
+                # for i in range(0, N):
+                #     P = 0.0
+                #     for j in range(0, N):
+                #         if i == j:
+                #             continue
+                #         P += base[i] * (1 - base[j])
+                #     p_next[i] = P
+                # base = p_next
+
+                # base[0] = 0
+                np.divide(base, np.sum(base), base)
+                bases.append((base.copy(), last_flip))
+                # bases.insert(0, base.copy())
+                # cumulative_probability = compute_cumulative_probability(N, bases)
+                # p_forward = 0
+                # p_backward = 0
+                # for i in range(1, N):
+                #     p_forward += cumulative_probability[i] * base[i - 1]
+                # for i in range(0, N - 1):
+                #     p_backward += cumulative_probability[i] * base[i + 1]
+                print('Base', base)
+                # # # np.subtract(1, base, base)
+                # # # print(cumulative_probability)
+                # shift_left = np.roll(cumulative_probability, -1)
+                # shift_left[-1] = 0.0
+                # # # # print('Shift Left', p_forward, shift_left)
+                # shift_right = np.roll(cumulative_probability, 1)
+                # shift_right[0] = 0.0
+                # # # # print('Shift Right', p_backward, shift_right)
+                # p_next = np.add(np.multiply(shift_left, 0.5), np.multiply(shift_right, 0.5))
+                # p_next[0] = 0
+                # np.divide(p_next, np.sum(p_next), p_next)
+                # # # # print('Next', p_next)
+                # # # # # print(cumulative_probability)
+                # # # # # print(base)
+                # np.multiply(base, p_next, cumulative_probability)
+                # cumulative_probability[0] = 0
+                # # # # # np.multiply(cumulative_probability, shift_right, cumulative_probability)
+                # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
+                cumulative_probability = compute_cumulative_probability(N, bases, flip_likelihood)
+                print('Cumulative', cumulative_probability)
+                print('Likelihood', flip_likelihood)
+
+        # cumulative_probability[0] = 0
+        # use_flip = -1
+        # if direction < 0:
+        #     use_flip = np.argmax(cumulative_deltas)
+        #     if cumulative_deltas[use_flip] < 0:
+        #         use_flip = np.argmin(cumulative_deltas)
+        #         direction = 1
+        #         # cumulative_deltas.fill(0)
+        # else:
+        #     use_flip = np.argmin(cumulative_deltas)
+        #     if cumulative_deltas[use_flip] > 0:
+        #         use_flip = np.argmax(cumulative_deltas)
+        #         direction = -1
+        #         # cumulative_deltas.fill(0)
+        # if direction < 0:
+        #     cumulative_probability[0] = 0
+        # else:
+        #     cumulative_probability[-1] = 0
+        # np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
+        # print(cumulative_deltas)
+
+        # use_flip = -1
+        # highest_p = 0
+        # for i in range(0, N):
+        #     p = flip_likelihood[i]
+        #     if i in flips:
+        #         p = -p
+        #     if use_flip < 0 or p > highest_p:
+        #         use_flip = i
+        #         highest_p = p
+        # if not use_flip in flips and highest_p < 0 or use_flip in flips and highest_p > 0:
+        #     flip_likelihood[use_flip] *= -1.0
+
+        if use_flip < 0:
+            return
+        last_flip = use_flip
+        if use_flip in flips:
+            flips.remove(use_flip)
+        else:
+            flips.add(use_flip)
+        print('Flip', use_flip, lowest_err)
+        print(flips)
+        cumulative_deltas[use_flip] = -cumulative_deltas[use_flip]
+        for p in samples:
+            if p.x[use_flip]:
+                p.y ^= 1
+
+if __name__ == "__main__":
+    main()
--- a/mutations3.py
+++ b/mutations3.py
@ -0,0 +1,541 @@
+import hashlib
+import math
+from matplotlib import offsetbox
+import numpy as np
+import random
+from struct import pack, pack_into, unpack_from
+import secrets
+
+from numpy import hamming
+
+N = 32
+M = 2
+
+def bit_at_index(buffer, index):
+    offset = (index >> 3) % len(buffer)
+    return buffer[offset] & (1 << (index & 0b111)) != 0
+
+def count_one_bits(n):
+    return bin(n).count("1")
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def encode_f(f, buffer, offset=0):
+    (inverted, flips, child) = f
+    pack_into('I', buffer, offset, inverted)
+    offset += 4
+    for index in flips:
+        pack_into('I', buffer, offset, 0)
+        offset += 4
+        pack_into('I', buffer, offset, index)
+        offset += 4
+    if child is None:
+        pack_into('I', buffer, offset, 1)
+        offset += 4
+        return offset
+    (inverted, left, right) = child
+    pack_into('I', buffer, offset, 2 if not inverted else 3)
+    offset += 4
+    offset = encode_f(left, buffer, offset)
+    offset = encode_f(right, buffer, offset)
+    return offset
+
+def generate_random_branch(p_mutation):
+    global N
+
+    p_add_indices = p_mutation * random.random()
+    p_add_children = p_mutation * random.random()
+
+    inverted = random.randint(0, 1)
+    indices = set()
+    children = []
+
+    # randomly add indices
+    while random.random() < p_add_indices and len(indices) < N:
+        available_indices = [i for i in range(0, N) if i not in indices]
+        if len(available_indices) == 1:
+            indices.add(available_indices[0])
+            continue
+        indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
+    # randomly add children
+    while random.random() < p_add_children:
+        child_inverted = random.randint(0, 1)
+        left = generate_random_branch(p_add_children)
+        right = generate_random_branch(p_add_children)
+        children.append((child_inverted, left, right))
+    return (inverted, indices, children)
+
+def mutate_f(f, p_mutation):
+    global N
+    (inverted, indices, children) = f    
+    mutated_indices = set(indices)
+    mutated_children = children[:]
+
+    p_invert = p_mutation * random.random()
+    p_drop_indices = p_mutation * random.random()
+    p_add_indices = p_mutation * random.random()
+    p_drop_children = p_mutation * random.random()
+    p_mutate_child = p_mutation * random.random()
+    p_clone_child = p_mutation * random.random()
+    p_invert_child = p_mutation * random.random()
+    p_add_children = p_mutation * random.random()
+
+    # randomly invert
+    if random.random() < p_invert:
+        inverted ^= 1
+    # randomly drop indices
+    while random.random() < p_drop_indices and len(mutated_indices) > 0: 
+        mutated_indices.pop()
+    # randomly add indices
+    while random.random() < p_add_indices and len(mutated_indices) < N:
+        available_indices = [i for i in range(0, N) if i not in mutated_indices]
+        if len(available_indices) == 1:
+            mutated_indices.add(available_indices[0])
+            continue
+        mutated_indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
+    # randomly drop children
+    while random.random() < p_drop_children and len(mutated_children) > 0:
+        if len(mutated_children) == 1:
+            del mutated_children[0]
+            break
+        del mutated_children[random.randint(0, len(mutated_children) - 1)]
+    # randomly clone children
+    while random.random() < p_clone_child and len(mutated_children) > 0:
+        index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
+        (child_inverted, left, right) = mutated_children[index]
+        if random.random() < p_invert_child:
+            child_inverted ^= 1
+        clone = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
+        mutated_children.append(clone)
+    # randomly mutate children
+    while random.random() < p_mutate_child and len(mutated_children) > 0:
+        index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
+        (child_inverted, left, right) = mutated_children[index]
+        if random.random() < p_invert_child:
+            child_inverted ^= 1
+        mutated_children[index] = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
+    # randomly add children
+    while random.random() < p_add_children:
+        child_inverted = random.randint(0, 1)
+        left = generate_random_branch(p_mutation)
+        right = generate_random_branch(p_mutation)
+        mutated_children.append((child_inverted, left, right))
+    return (inverted, mutated_indices, mutated_children)
+
+def generate_program(model, output_var='output'):
+    global N, M
+    (constant, indices, child) = model
+
+    statement = 'multiply(' + np.array2string(indices, separator=',') + ', x, temp)\n\t'
+    statement += output_var + '=' + str(constant) + '+sum(temp)\n\t'
+
+    if not child is None:
+        left_output = output_var + '0'
+        right_output = output_var + '1'
+        (left, right) = child
+        statement += generate_program(left, left_output)
+        statement += generate_program(right, right_output)
+        statement += output_var + '+=' + left_output + '*' + right_output + '\n\t'
+    statement += output_var + '%=' + str(M) + '\n\t'
+    return statement
+
+def compile(model):
+    program = 'def f(x, temp):\n\t' + generate_program(model) + 'return output'
+    scope = {'multiply': np.multiply, 'sum': np.sum}
+    exec(program, scope)
+    return scope['f']
+
+def evaluate(model, x, value = 0):
+    (inverted, indices, children) = model
+    for i in indices:
+        if bit_at_index(x, i) != 0:
+            value ^= 1
+    for child in children:
+        (child_inverted, left, right) = child
+        left = evaluate(left, x)
+        right = evaluate(right, x)
+        if left & right != child_inverted:
+            value ^= 1
+    if inverted:
+        value ^= 1
+    return value
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(N / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(x)
+
+def sha(v):
+    global M
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] % M
+
+def xor(x):
+    num_one_bits = 0
+    for n in x:
+        num_one_bits += count_one_bits(n)
+    return num_one_bits % 2
+
+def random_sample(m, n):
+    inputs = np.zeros((m, n))
+    for i in range(0, m):
+        for j in range(0, n):
+            inputs[i][j] = random.randint(0, 1)
+    return inputs
+
+def update_sample(sample, index):
+    global N
+    for j in range(0, N):
+        sample[index][j] = random.randint(0, 1)
+
+def coherence(inputs, outputs, scratch):
+    coherences = []
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        y_a = outputs[i]
+        numerator = 0
+        denominator = 0
+        for j in range(0, len(inputs)):
+            if i == j:
+                continue
+            x_b = inputs[j]
+            y_b = outputs[j]
+            distance = hamming_distance(x_a, x_b, scratch)
+            weight = 1.0 / (2 ** distance)
+            denominator += weight
+            if y_a == y_b:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def build_coherence_models(inputs, scratch):
+    coherence_models = []
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        distances = [hamming_distance(x_a, inputs[j], scratch) for j in range(0, len(inputs))]
+        indices = sorted(range(len(distances)), key=lambda i: distances[i])
+        lowest = -1
+        denominator = 0
+        components = []
+        for index in range(0, len(indices)):
+            j = indices[index]
+            if distances[j] == 0:
+                continue
+            if lowest < 0:
+                lowest = distances[j]
+            distance = distances[j] - lowest
+            if distance >= 8:
+                break
+            weight = 2 ** -distance
+            denominator += weight
+            components.append((weight, j))  
+        coherence_models.append((denominator, components))      
+    return coherence_models
+
+def fast_coherence(coherence_models, outputs):
+    coherences = []
+    for i in range(0, len(coherence_models)):
+        (denominator, components) = coherence_models[i]
+        numerator = 0
+        for component in components:
+            (weight, j) = component
+            if outputs[i] == outputs[j]:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def score(f, sample, distances):
+    return coherence([(x, f(x) ^ y) for (x, y) in sample], distances)
+
+def compute_distances(inputs, distances, scratch):
+    for i in range(0, len(inputs)):
+        a = inputs[i]
+        for j in range(i, len(inputs)):
+            if i == j:
+                distances[i][j] = 0
+                continue
+            b = inputs[j]
+            distance = 2 ** -hamming_distance(a, b, scratch)
+            distances[i][j] = distance
+            distances[j][i] = distance
+
+def update_distances(inputs, distances, i, scratch):
+    a = inputs[i]
+    for j in range(0, len(inputs)):
+        if i == j:
+            distances[i][j] = 0
+            continue
+        b = inputs[j]
+        distance = 2 ** -hamming_distance(a, b, scratch)
+        distances[i][j] = distance
+        distances[j][i] = distance
+
+def clone_model(model, p_mutation):
+    global N, M
+
+    clone = model[:]
+    p_insert_node = p_mutation
+
+    i = 0
+    while i < len(clone):
+        (bias, op, indices, (p_modify, p_bias, p_index, p_insert)) = clone[i]
+
+        # if random.random() < p_modify:
+            # p_modify += 0.01
+        p_add_index = p_index
+        indices = indices.copy()
+        if random.random() < p_bias:
+            p_bias += 0.001
+            bias += random.randint(0, M - 1)
+            bias %= M
+        else:
+            p_bias -= 0.001
+        for absolute_index in range(0, N + i):
+            relative_index = N - absolute_index - 1
+            if random.random() < p_add_index:
+                p_index += 0.001
+                if relative_index in indices:
+                    indices.remove(relative_index)
+                else:
+                    indices.add(relative_index)
+            else:
+                p_index -= 0.001
+        # else:
+        #     p_modify -= 0.01
+
+        if random.random() < p_insert:
+            p_insert += 0.001
+            clone.insert(i, random_node(i, p_mutation))
+            for j in range(i + 1, len(clone)):
+                (bias, op, indices, p) = clone[j]
+                modified_indices = set()
+                for index in indices:
+                    if index >= 0:
+                        modified_indices.add(index)
+                        continue
+                    absolute_index = j + index
+                    if absolute_index == i:
+                        if random.random() > 0.5:
+                            modified_indices.add(index)
+                        else:
+                            modified_indices.add(index - 1)
+                        continue
+                    if absolute_index < i:
+                        modified_indices.add(index - 1)
+                    else:
+                        modified_indices.add(index)
+                clone[j] = (bias, op, modified_indices, p)
+            i += 1
+        else:
+            p_insert -= 0.001
+
+        p_modify = min(max(0.001, p_modify), 0.999)
+        p_bias = min(max(0.001, p_bias), 0.999)
+        p_index = min(max(0.001, p_index), 0.999)
+        p_insert = min(max(0.001, p_insert), 0.999)
+        clone[i] = (bias, op, indices, (p_modify, p_bias, p_index, p_insert))
+        i += 1
+
+    if random.random() < p_insert_node:
+        i = len(clone)
+        clone.insert(i, random_node(i, p_mutation))
+        for j in range(i + 1, len(clone)):
+            (bias, op, indices, p) = clone[j]
+            modified_indices = set()
+            for index in indices:
+                if index < N:
+                    modified_indices.add(index)
+                    continue
+                shifted_index = index - N
+                if shifted_index == i:
+                    if random.randint(0, 1) == 0:
+                        modified_indices.add(index)
+                    else:
+                        modified_indices.add(index + 1)
+                if shifted_index > i:
+                    modified_indices.add(index + 1)
+                else:
+                    modified_indices.add(index)
+            clone[j] = (bias, op, modified_indices, p)
+    return clone
+
+def random_node(i, p_mutation):
+    global N, M
+    bias = random.randint(0, M - 1)
+    op = random.randint(0, 1)
+    p_modify = 0.5
+    p_bias = 0.01
+    p_index = 0.5
+    p_insert = 0.01
+    max_index = N + i - 1
+    indices = set()
+    indices.add(N - 1 - random.randint(0, max_index))
+    
+    for index in range(0, max_index + 1):
+        if random.random() < p_index:
+            indices.add(N - 1 - index)
+    return (bias, op, indices, (p_modify, p_bias, p_index, p_insert))
+
+def null_candidate():
+    global N
+    return []
+
+def eval_model(model, buffer, x):
+    global N, M
+    for i in range(0, len(model)):
+        (bias, op, indices, _) = model[i]
+        value = op
+        for index in indices:
+            if op == 1:
+                value *= x[index] if index >= 0 else buffer[i + index]
+                value %= M
+            else:
+                value += x[index] if index >= 0 else buffer[i + index]
+                value %= M
+        value += bias
+        value %= M
+        if i == len(model) - 1:
+            return value
+        else:
+            buffer[i] = value
+    return 0
+
+def size(model):
+    return len(model)
+
+def main():
+    global N, M
+    epochs = 10000
+    num_survivors = 10
+    num_offspring = 10
+    num_candidates = num_survivors + num_survivors * num_offspring
+    sample_size = 64
+    eval_size = 100
+    max_nodes = 65536
+    p_mutation = 0.5
+    g = sha
+    current_generation = [null_candidate() for _ in range(0, num_candidates)]
+
+    distances = np.zeros((sample_size, sample_size))
+    output_equality = np.zeros((sample_size, sample_size))
+    inputs = random_sample(sample_size, N)
+    scratch = np.zeros(N,)
+    # compute_distances(inputs, distances, scratch)
+    expected_outputs = np.zeros((sample_size,))
+    for i in range(0, sample_size):
+        expected_outputs[i] = g(inputs[i])
+    outputs = np.zeros((sample_size,))
+    output_xor = np.zeros((sample_size,))
+    ones = np.ones((sample_size,))
+    numerators = np.zeros((sample_size,))
+    denominators = np.zeros((sample_size,))
+    coherences = np.zeros((sample_size,))
+    np.matmul(ones, distances, denominators)
+    scores = np.zeros((num_candidates,))
+    eval_buffer = np.zeros((max_nodes,))
+    max_score = 0
+    last_score = 0
+    streak = 0
+
+    coherence_models = build_coherence_models(inputs, scratch)
+
+    for epoch in range(0, epochs):
+        for i in range(0, num_candidates):
+            candidate = current_generation[i]
+            for j in range(0, sample_size):
+                outputs[j] = eval_model(candidate, eval_buffer, inputs[j])
+            np.subtract(outputs, expected_outputs, output_xor)
+            np.mod(output_xor, M, output_xor)
+            # for p in range(0, sample_size):
+            #     for q in range(0, sample_size):
+            #         m = int(output_xor[p])
+            #         n = int(output_xor[q])
+            #         distance = abs(m - n)
+            #         if distance > M / 2:
+            #             distance = M - distance
+            #         distance /= (M / 2)
+            #         distance **= 2
+            #         output_equality[p][q] = distance
+            #         # output_equality[p][q] = 1 if m == n else 0
+            # np.multiply(output_equality, distances, output_equality)
+            # np.matmul(ones, output_equality, numerators)
+            # np.divide(numerators, denominators, coherences)
+            # score = np.average(coherences)
+            score = fast_coherence(coherence_models, output_xor)
+            # if random.random() < 0.1:
+            #     check = coherence(inputs, output_xor, scratch)
+            #     if check - score > 1e-3:
+            #         print('not equal')
+            scores[i] = score
+
+        top_n = sorted(range(len(scores)), key=lambda i: (scores[i], -size(current_generation[i])))[-num_survivors:]
+        survivors = [current_generation[index] for index in top_n]
+
+        # f = lambda x: evaluate(current_generation[0], x)
+        # correct = 0
+        # for i in range(0, eval_size):
+        #     x = random_input()
+        #     if f(x) == g(x):
+        #         correct += 1
+
+        top_score = scores[top_n[-1]]
+        print(epoch, top_score, size(survivors[-1]))
+        if top_score <= max_score:
+            p_mutation += 0.001
+        else:
+            p_mutation = 0.5
+            max_score = top_score
+
+        for i in range(0, num_survivors):
+            current_generation[i] = survivors[i]
+
+        for i in range(0, num_survivors):
+            candidate = survivors[i]
+            for j in range(0, num_offspring):
+                index = num_survivors + j * num_survivors + i
+                current_generation[index] = clone_model(candidate, random.random())
+        
+        # inputs = random_sample(sample_size, N)
+        # coherence_models = build_coherence_models(inputs, scratch)
+        # for i in range(0, sample_size):
+        #     expected_outputs[i] = g(inputs[i])
+
+
+        # # while random.random() < 0.5:
+        # if last_score == top_score:
+        #     streak += 1
+        # else:
+        #     streak = 0
+        # if streak >= 4:
+        #     inputs = random_sample(sample_size, N)
+        #     coherence_models = build_coherence_models(inputs, scratch)
+        # #     compute_distances(inputs, distances, scratch)
+        # #     np.matmul(ones, distances, denominators)
+        #     for i in range(0, sample_size):
+        #         expected_outputs[i] = g(inputs[i])
+        #     streak = 0
+        # expected_outputs = np.zeros((sample_size,))
+        # for i in range(0, sample_size):
+        #     expected_outputs[i] = g(inputs[i])
+            # index = random.randint(0, sample_size - 1)
+            # update_sample(inputs, index)
+            # expected_outputs[index] = g(inputs[index])
+            # update_distances(inputs, distances, index, scratch)
+            # np.matmul(ones, distances, denominators)
+        last_score = top_score
+
+if __name__ == "__main__":
+    main()
--- a/mutations4.py
+++ b/mutations4.py
@ -0,0 +1,591 @@
+import hashlib
+import math
+from matplotlib import offsetbox
+import numpy as np
+import random
+from struct import pack, pack_into, unpack_from
+import secrets
+
+from numpy import hamming
+
+N = 32
+M = 2
+
+def bit_at_index(buffer, index):
+    offset = (index >> 3) % len(buffer)
+    return buffer[offset] & (1 << (index & 0b111)) != 0
+
+def count_one_bits(n):
+    return bin(n).count("1")
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def encode_f(f, buffer, offset=0):
+    (inverted, flips, child) = f
+    pack_into('I', buffer, offset, inverted)
+    offset += 4
+    for index in flips:
+        pack_into('I', buffer, offset, 0)
+        offset += 4
+        pack_into('I', buffer, offset, index)
+        offset += 4
+    if child is None:
+        pack_into('I', buffer, offset, 1)
+        offset += 4
+        return offset
+    (inverted, left, right) = child
+    pack_into('I', buffer, offset, 2 if not inverted else 3)
+    offset += 4
+    offset = encode_f(left, buffer, offset)
+    offset = encode_f(right, buffer, offset)
+    return offset
+
+def generate_random_branch(p_mutation):
+    global N
+
+    p_add_indices = p_mutation * random.random()
+    p_add_children = p_mutation * random.random()
+
+    inverted = random.randint(0, 1)
+    indices = set()
+    children = []
+
+    # randomly add indices
+    while random.random() < p_add_indices and len(indices) < N:
+        available_indices = [i for i in range(0, N) if i not in indices]
+        if len(available_indices) == 1:
+            indices.add(available_indices[0])
+            continue
+        indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
+    # randomly add children
+    while random.random() < p_add_children:
+        child_inverted = random.randint(0, 1)
+        left = generate_random_branch(p_add_children)
+        right = generate_random_branch(p_add_children)
+        children.append((child_inverted, left, right))
+    return (inverted, indices, children)
+
+def mutate_f(f, p_mutation):
+    global N
+    (inverted, indices, children) = f    
+    mutated_indices = set(indices)
+    mutated_children = children[:]
+
+    p_invert = p_mutation * random.random()
+    p_drop_indices = p_mutation * random.random()
+    p_add_indices = p_mutation * random.random()
+    p_drop_children = p_mutation * random.random()
+    p_mutate_child = p_mutation * random.random()
+    p_clone_child = p_mutation * random.random()
+    p_invert_child = p_mutation * random.random()
+    p_add_children = p_mutation * random.random()
+
+    # randomly invert
+    if random.random() < p_invert:
+        inverted ^= 1
+    # randomly drop indices
+    while random.random() < p_drop_indices and len(mutated_indices) > 0: 
+        mutated_indices.pop()
+    # randomly add indices
+    while random.random() < p_add_indices and len(mutated_indices) < N:
+        available_indices = [i for i in range(0, N) if i not in mutated_indices]
+        if len(available_indices) == 1:
+            mutated_indices.add(available_indices[0])
+            continue
+        mutated_indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
+    # randomly drop children
+    while random.random() < p_drop_children and len(mutated_children) > 0:
+        if len(mutated_children) == 1:
+            del mutated_children[0]
+            break
+        del mutated_children[random.randint(0, len(mutated_children) - 1)]
+    # randomly clone children
+    while random.random() < p_clone_child and len(mutated_children) > 0:
+        index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
+        (child_inverted, left, right) = mutated_children[index]
+        if random.random() < p_invert_child:
+            child_inverted ^= 1
+        clone = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
+        mutated_children.append(clone)
+    # randomly mutate children
+    while random.random() < p_mutate_child and len(mutated_children) > 0:
+        index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
+        (child_inverted, left, right) = mutated_children[index]
+        if random.random() < p_invert_child:
+            child_inverted ^= 1
+        mutated_children[index] = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
+    # randomly add children
+    while random.random() < p_add_children:
+        child_inverted = random.randint(0, 1)
+        left = generate_random_branch(p_mutation)
+        right = generate_random_branch(p_mutation)
+        mutated_children.append((child_inverted, left, right))
+    return (inverted, mutated_indices, mutated_children)
+
+def decode_f(buffer, mutate = False, offset = 0, skip_invert = False):
+    global N
+    inverted = 0
+    if not skip_invert:
+        [inverted] = unpack_from('I', buffer, offset)
+        offset += 4
+    # random invert
+    if mutate and random.random() < 0.01:
+        inverted ^= 1
+    inverted &= 0b1
+    flips = set()
+    # random add flip
+    while mutate and random.random() < 0.5 and len(flips) < N:
+        available_indices = [i for i in range(0, N) if i not in flips]
+        if len(available_indices) == 1:
+            flips.add(available_indices[0])
+            continue
+        flips.add(available_indices[random.randint(0, len(available_indices) - 1)])
+    while offset < len(buffer):
+        # random create branch
+        if mutate and random.random() < 0.01:
+            gate_inverted = random.randint(0, 1)
+            left = generate_random_branch()
+            (offset, right) = decode_f(buffer, mutate, offset, True)
+            return (offset, (inverted, flips, (gate_inverted, left, right)))
+        [opcode] = unpack_from('I', buffer, offset)
+        offset += 4            
+        opcode &= 0b11
+        if opcode == 0:
+            [index] = unpack_from('I', buffer, offset)
+            offset += 4
+            # random skip flip
+            if mutate and random.random() < 0.01:
+                continue
+            if index in flips:
+                flips.remove(index)
+            else:
+                flips.add(index)
+        elif opcode == 1:
+            return (offset, (inverted, flips, None))
+        else:
+            (offset, left) = decode_f(buffer, mutate, offset)
+            (offset, right) = decode_f(buffer, mutate, offset)
+            gate_inverted = 0 if opcode == 2 else 1
+            # random invert
+            if mutate and random.random() < 0.01:
+                gate_inverted ^= 1
+            # random skip branch
+            if mutate and random.random() < 0.01:
+                return (offset, (inverted, flips, None))
+            return (offset, (inverted, flips, (gate_inverted, left, right)))
+    return (offset, (inverted, [], None))
+
+def generate_program(model, output_var='output'):
+    global N, M
+    (constant, indices, child) = model
+
+    statement = 'multiply(' + np.array2string(indices, separator=',') + ', x, temp)\n\t'
+    statement += output_var + '=' + str(constant) + '+sum(temp)\n\t'
+
+    if not child is None:
+        left_output = output_var + '0'
+        right_output = output_var + '1'
+        (left, right) = child
+        statement += generate_program(left, left_output)
+        statement += generate_program(right, right_output)
+        statement += output_var + '+=' + left_output + '*' + right_output + '\n\t'
+    statement += output_var + '%=' + str(M) + '\n\t'
+    return statement
+
+def compile(model):
+    program = 'def f(x, temp):\n\t' + generate_program(model) + 'return output'
+    scope = {'multiply': np.multiply, 'sum': np.sum}
+    exec(program, scope)
+    return scope['f']
+
+def evaluate(model, x, value = 0):
+    (inverted, indices, children) = model
+    for i in indices:
+        if bit_at_index(x, i) != 0:
+            value ^= 1
+    for child in children:
+        (child_inverted, left, right) = child
+        left = evaluate(left, x)
+        right = evaluate(right, x)
+        if left & right != child_inverted:
+            value ^= 1
+    if inverted:
+        value ^= 1
+    return value
+
+def encode(v):
+    byte_values = []
+    for i in range(0, math.ceil(N / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(x)
+
+def sha(v):
+    global M
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] % M
+
+def xor(x):
+    num_one_bits = 0
+    for n in x:
+        num_one_bits += count_one_bits(n)
+    return num_one_bits % 2
+
+def random_sample(m, n):
+    inputs = np.zeros((m, n))
+    for i in range(0, m):
+        for j in range(0, n):
+            inputs[i][j] = random.randint(0, 1)
+    return inputs
+
+def update_sample(sample, index):
+    global N
+    for j in range(0, N):
+        sample[index][j] = random.randint(0, 1)
+
+def coherence(inputs, outputs, scratch):
+    coherences = []
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        y_a = outputs[i]
+        numerator = 0
+        denominator = 0
+        for j in range(0, len(inputs)):
+            if i == j:
+                continue
+            x_b = inputs[j]
+            y_b = outputs[j]
+            distance = hamming_distance(x_a, x_b, scratch)
+            weight = 1.0 / (2 ** distance)
+            denominator += weight
+            if y_a == y_b:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def build_coherence_models(inputs, scratch):
+    coherence_models = []
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        distances = [hamming_distance(x_a, inputs[j], scratch) for j in range(0, len(inputs))]
+        indices = sorted(range(len(distances)), key=lambda i: distances[i])
+        lowest = -1
+        denominator = 0
+        components = []
+        for index in range(0, len(indices)):
+            j = indices[index]
+            if distances[j] == 0:
+                continue
+            if lowest < 0:
+                lowest = distances[j]
+            distance = distances[j] - lowest
+            if distance >= 8:
+                break
+            weight = 2 ** -distance
+            denominator += weight
+            components.append((weight, j))  
+        coherence_models.append((denominator, components))      
+    return coherence_models
+
+def fast_coherence(coherence_models, outputs):
+    coherences = []
+    for i in range(0, len(coherence_models)):
+        (denominator, components) = coherence_models[i]
+        numerator = 0
+        for component in components:
+            (weight, j) = component
+            if outputs[i] == outputs[j]:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def score(f, sample, distances):
+    return coherence([(x, f(x) ^ y) for (x, y) in sample], distances)
+
+def compute_distances(inputs, distances, scratch):
+    for i in range(0, len(inputs)):
+        a = inputs[i]
+        for j in range(i, len(inputs)):
+            if i == j:
+                distances[i][j] = 0
+                continue
+            b = inputs[j]
+            distance = 2 ** -hamming_distance(a, b, scratch)
+            distances[i][j] = distance
+            distances[j][i] = distance
+
+def update_distances(inputs, distances, i, scratch):
+    a = inputs[i]
+    for j in range(0, len(inputs)):
+        if i == j:
+            distances[i][j] = 0
+            continue
+        b = inputs[j]
+        distance = 2 ** -hamming_distance(a, b, scratch)
+        distances[i][j] = distance
+        distances[j][i] = distance
+
+def clone_model(model, p_mutation):
+    global N, M
+
+    clone = model[:]
+    p_insert_node = p_mutation * random.random()
+
+    i = 0
+    while i < len(clone):
+        (bias, op, indices, (p_modify, p_bias, p_index)) = clone[i]
+        p_modify_node = p_modify
+
+        if random.random() < p_modify_node:
+            p_modify += 0.01
+            p_add_index = p_index
+            p_modify_bias = p_bias
+            indices = indices.copy()
+            if random.random() < p_modify_bias:
+                p_bias += 0.01
+                bias += random.randint(0, M - 1)
+                bias %= M
+            else:
+                p_bias -= 0.01
+            for index in range(0, N + i):
+                if random.random() < p_add_index:
+                    p_index += 0.01
+                    if index in indices:
+                        indices.remove(index)
+                    else:
+                        indices.add(index)
+                else:
+                    p_index -= 0.01
+        else:
+            p_modify -= 0.01
+
+        p_modify = min(max(0.01, p_modify), 0.99)
+        p_bias = min(max(0.01, p_bias), 0.99)
+        p_index = min(max(0.01, p_index), 0.99)
+        clone[i] = (bias, op, indices, (p_modify, p_bias, p_index))
+        i += 1
+
+    if random.random() < p_insert_node:
+        i = random.randint(0, len(clone))
+        clone.insert(i, random_node(N + i - 1, p_mutation))
+        for j in range(i + 1, len(clone)):
+            (bias, op, indices, p) = clone[j]
+            modified_indices = set()
+            for index in indices:
+                if index < N:
+                    modified_indices.add(index)
+                    continue
+                shifted_index = index - N
+                if shifted_index == i:
+                    if random.randint(0, 1) == 0:
+                        modified_indices.add(index)
+                    else:
+                        modified_indices.add(index + 1)
+                if shifted_index > i:
+                    modified_indices.add(index + 1)
+                else:
+                    modified_indices.add(index)
+            clone[j] = (bias, op, modified_indices, p)
+    return clone
+
+def random_node(max_index, p_mutation):
+    global N
+    bias = random.randint(0, M - 1)
+    op = random.randint(0, 1)
+    p_modify = random.random()
+    p_bias = random.random()
+    p_index = random.random()
+    indices = set()
+    indices.add(random.randint(0, max_index))
+    
+    p_add_index = p_mutation * random.random()
+    for index in range(0, max_index):
+        if random.random() < p_add_index:
+            indices.add(index)
+    return (bias, op, indices, (p_modify, p_bias, p_index))
+
+def null_candidate():
+    global N
+    return []
+
+def encode_tree(tree_model):
+    stack = [tree_model]
+    node_indices = {}
+    index = 0
+    while len(stack) > 0:
+        node = stack.pop()
+        node_indices[node] = index
+        index += 1
+        (p, bias, value) = node
+        if isinstance(value, int):
+            continue
+        (left, right) = value
+        stack.append(left)
+        stack.append(right)
+    length = index
+
+    stack = [tree_model]
+    serialized_model = []
+    while len(stack) > 0:
+        node = stack.pop()
+        (p, bias, value) = node
+        serialized_model.insert(0, )
+
+def eval_model(model, buffer, x):
+    global N, M
+    for i in range(0, len(model)):
+        (bias, op, indices, _) = model[i]
+        value = op
+        for index in indices:
+            if index >= N + i:
+                print('This should not happen')
+            if op == 1:
+                value *= x[index] if index < N else buffer[index - N]
+                value %= M
+            else:
+                value += x[index] if index < N else buffer[index - N]
+                value %= M
+        value += bias
+        value %= M
+        if i == len(model) - 1:
+            return value
+        else:
+            buffer[i] = value
+    return 0
+
+def size(model):
+    return len(model)
+
+def main():
+    global N, M
+    epochs = 10000
+    num_survivors = 100
+    num_offspring = 10
+    num_candidates = num_survivors + num_survivors * num_offspring
+    sample_size = 64
+    eval_size = 100
+    max_nodes = 65536
+    p_mutation = 0.5
+    g = sha
+    current_generation = [null_candidate() for _ in range(0, num_candidates)]
+
+    distances = np.zeros((sample_size, sample_size))
+    output_equality = np.zeros((sample_size, sample_size))
+    inputs = random_sample(sample_size, N)
+    scratch = np.zeros(N,)
+    # compute_distances(inputs, distances, scratch)
+    expected_outputs = np.zeros((sample_size,))
+    for i in range(0, sample_size):
+        expected_outputs[i] = g(inputs[i])
+    outputs = np.zeros((sample_size,))
+    output_xor = np.zeros((sample_size,))
+    ones = np.ones((sample_size,))
+    numerators = np.zeros((sample_size,))
+    denominators = np.zeros((sample_size,))
+    coherences = np.zeros((sample_size,))
+    np.matmul(ones, distances, denominators)
+    scores = np.zeros((num_candidates,))
+    eval_buffer = np.zeros((max_nodes,))
+    max_score = 0
+    last_score = 0
+    streak = 0
+
+    coherence_models = build_coherence_models(inputs, scratch)
+
+    for epoch in range(0, epochs):
+        for i in range(0, num_candidates):
+            candidate = current_generation[i]
+            for j in range(0, sample_size):
+                outputs[j] = eval_model(candidate, eval_buffer, inputs[j])
+            np.subtract(outputs, expected_outputs, output_xor)
+            np.mod(output_xor, M, output_xor)
+            # for p in range(0, sample_size):
+            #     for q in range(0, sample_size):
+            #         m = int(output_xor[p])
+            #         n = int(output_xor[q])
+            #         distance = abs(m - n)
+            #         if distance > M / 2:
+            #             distance = M - distance
+            #         distance /= (M / 2)
+            #         distance **= 2
+            #         output_equality[p][q] = distance
+            #         # output_equality[p][q] = 1 if m == n else 0
+            # np.multiply(output_equality, distances, output_equality)
+            # np.matmul(ones, output_equality, numerators)
+            # np.divide(numerators, denominators, coherences)
+            # score = np.average(coherences)
+            score = fast_coherence(coherence_models, output_xor)
+            # if random.random() < 0.1:
+            #     check = coherence(inputs, output_xor, scratch)
+            #     if check - score > 1e-3:
+            #         print('not equal')
+            scores[i] = score
+
+        top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
+        survivors = [current_generation[index] for index in top_n]
+
+        # f = lambda x: evaluate(current_generation[0], x)
+        # correct = 0
+        # for i in range(0, eval_size):
+        #     x = random_input()
+        #     if f(x) == g(x):
+        #         correct += 1
+
+        top_score = scores[top_n[-1]]
+        print(epoch, top_score, size(survivors[-1]))
+        if top_score <= max_score:
+            p_mutation += 0.01
+        else:
+            p_mutation = 0.5
+            max_score = top_score
+
+        for i in range(0, num_survivors):
+            current_generation[i] = survivors[i]
+
+        for i in range(0, num_survivors):
+            candidate = survivors[i]
+            for j in range(0, num_offspring):
+                index = num_survivors + j * num_survivors + i
+                current_generation[index] = clone_model(candidate, random.random() * 0.1)
+        
+        inputs = random_sample(sample_size, N)
+        coherence_models = build_coherence_models(inputs, scratch)
+        for i in range(0, sample_size):
+            expected_outputs[i] = g(inputs[i])
+
+        # while random.random() < 0.5:
+        # if last_score == top_score:
+        #     streak += 1
+        # else:
+        #     streak = 0
+        # if streak >= 4:
+        #     inputs = random_sample(sample_size, N)
+        #     coherence_models = build_coherence_models(inputs, scratch)
+        # #     compute_distances(inputs, distances, scratch)
+        # #     np.matmul(ones, distances, denominators)
+        #     for i in range(0, sample_size):
+        #         expected_outputs[i] = g(inputs[i])
+        #     streak = 0
+        # expected_outputs = np.zeros((sample_size,))
+        # for i in range(0, sample_size):
+        #     expected_outputs[i] = g(inputs[i])
+            # index = random.randint(0, sample_size - 1)
+            # update_sample(inputs, index)
+            # expected_outputs[index] = g(inputs[index])
+            # update_distances(inputs, distances, index, scratch)
+            # np.matmul(ones, distances, denominators)
+        last_score = top_score
+
+if __name__ == "__main__":
+    main()
--- a/mutations5.py
+++ b/mutations5.py
@ -0,0 +1,417 @@
+import hashlib
+import math
+import numpy as np
+import random
+
+N = 8
+M = 2
+
+class Candidate:
+    def __init__(self):
+        global N
+        self.bias = 0
+        self.offsets = np.zeros((N,)).astype(np.int32)
+        self.has_child = 0
+        self.left = None
+        self.right = None
+
+    def addOffset(self, x):
+        self.offsets[x] = 1
+        return self
+
+    def setChild(self, left, right):
+        self.has_child = 1
+        self.left = left
+        self.right = right
+        return self
+
+class Probabilities:
+    def __init__(self):
+        global N, M
+        self.p_bias = np.zeros(2,)
+        self.p_bias.fill(0.5)
+        self.p_offsets = np.zeros((2,N))
+        self.p_offsets.fill(0.5)
+        self.p_has_child = 0
+
+        self.bias_coherences = np.zeros((2, M,))
+        self.bias_coherences.fill(0.5)
+        self.offset_coherences = np.zeros((2, M, N))
+        self.offset_coherences.fill(0.5)
+        self.has_child_coherences = np.zeros((2,))
+        self.has_child_coherences.fill(0.5)
+
+        self.uncertainty = np.zeros((2,))
+        self.totals = np.zeros((2,))
+
+        self.left = None
+        self.right = None
+        self.parent = None
+        self.depth = 1
+
+    def reset_uncertainty(self):
+        if self.totals[0] == 0 and self.totals[1] == 0:
+            return
+        self.uncertainty.fill(0)
+        self.totals.fill(0)
+        if not self.left is None:
+            self.left.reset_uncertainty()
+        if not self.right is None:
+            self.right.reset_uncertainty()
+
+    def min_p_has_child(self):
+        without_child = self.uncertainty[0] / self.totals[0] if self.totals[0] > 0 else 0
+        with_child = self.uncertainty[1] / self.totals[1] if self.totals[1] > 0 else 0
+
+        if without_child == 0 and with_child == 0:
+            return 0.5
+        return without_child / (without_child + with_child)
+
+    def confidence(self):
+        global N
+        total = (2 * self.p_bias[0] - 1) ** 2
+        for i in range(0, N):
+            total += (2 * self.p_offsets[0][i] - 1) ** 2
+        return total / (N + 1)
+
+def clamp(x, min_value = 0.01, max_value = 1):
+    return min(max(x, min_value), max_value)
+
+def encode(v):
+    global N
+    byte_values = []
+    for i in range(0, math.ceil(N / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+def sha(v):
+    global M
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] % M
+
+def xor(x):
+    num_one_bits = 0
+    for i in range(0, len(x)):
+        if i == 0:
+            continue
+        num_one_bits += x[i]
+    return num_one_bits % 2
+
+test_candidate = Candidate().addOffset(0).addOffset(1).setChild(
+    Candidate().addOffset(2), Candidate().addOffset(3).setChild(
+        Candidate().addOffset(4), Candidate().addOffset(5)
+    ))
+
+def eval_test_candidate(x):
+    global test_candidate
+    return evaluate_candidate(test_candidate, x)
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def coherence(inputs, outputs, scratch):
+    coherences = []
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        y_a = outputs[i]
+        numerator = 0
+        denominator = 0
+        for j in range(0, len(inputs)):
+            if i == j:
+                continue
+            x_b = inputs[j]
+            y_b = outputs[j]
+            distance = hamming_distance(x_a, x_b, scratch)
+            weight = 1.0 / (2 ** distance)
+            denominator += weight
+            if y_a == 0 and y_b == 0:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def random_sample(m, n):
+    inputs = np.zeros((m, n))
+    for i in range(0, m):
+        for j in range(0, n):
+            inputs[i][j] = random.randint(0, 1)
+    return inputs
+
+def evaluate_candidate(candidate, x):
+    global N, M
+    value = candidate.bias
+    for i in range(0, N):
+        value += x[i] * candidate.offsets[i]
+    value %= M
+    if candidate.has_child == 0:
+        return value
+    left = evaluate_candidate(candidate.left, x)
+    right = evaluate_candidate(candidate.right, x)
+    value += left * right
+    value %= M
+    return value
+
+def evaluate(probabilities, candidate, x, z, update_uncertainty = True):
+    global N, M
+    value = candidate.bias
+    for i in range(0, N):
+        value += x[i] * candidate.offsets[i]
+    value %= M
+    if candidate.has_child == 0:
+        if update_uncertainty:
+            if value != z:
+                probabilities.uncertainty[0] += 1
+            probabilities.totals[0] += 1
+        return value
+    e = (value - z) % M
+    left = evaluate(probabilities.left, candidate.left, x, e, False)
+    right = evaluate(probabilities.right, candidate.right, x, e, False)
+    if update_uncertainty:
+        if e == 0:
+            if left == 1 and right == 1:
+                evaluate(probabilities.left, candidate.left, x, e)
+                evaluate(probabilities.right, candidate.right, x, e)
+            if left == 0:
+                evaluate(probabilities.left, candidate.left, x, e)
+            if right == 0:
+                evaluate(probabilities.right, candidate.right, x, e)
+        elif e == 1:
+            if left == 1 and right == 1:
+                evaluate(probabilities.left, candidate.left, x, e)
+                evaluate(probabilities.right, candidate.right, x, e)
+            if left == 0:
+                evaluate(probabilities.left, candidate.left, x, e)
+            if right == 0:
+                evaluate(probabilities.right, candidate.right, x, e)
+    value += left * right
+    value %= M
+    if update_uncertainty:
+        if value != z:
+            probabilities.uncertainty[1] += 1
+        probabilities.totals[1] += 1
+    return value
+
+def update_probabilities(probabilities, candidates, scores, depth = 1):
+    global N, M
+    num_candidates = len(candidates)
+    min_p_has_child = probabilities.min_p_has_child()
+
+    for z in range(0, 2):
+        for i in range(0, M):
+            bias_i_max = 0
+            for k in range(0, num_candidates):
+                candidate = candidates[k]
+                if candidate is None:
+                    continue
+                if candidate.bias != i:
+                    continue
+                if candidate.has_child != z:
+                    continue
+                bias_i_max = max(bias_i_max, scores[k])
+            if bias_i_max == 0:
+                continue
+            probabilities.bias_coherences[z][i] = 0.9 * probabilities.bias_coherences[z][i] + 0.1 * bias_i_max
+
+    for z in range(0, 2):
+        for i in range(0, M):
+            for j in range(0, N):
+                offset_ij_max = 0
+                for k in range(0, num_candidates):
+                    candidate = candidates[k]
+                    if candidate is None:
+                        continue
+                    if candidate.offsets[j] != i:
+                        continue
+                    if candidate.has_child != z:
+                        continue
+                    offset_ij_max = max(offset_ij_max, scores[k])
+                if offset_ij_max == 0:
+                    continue
+                probabilities.offset_coherences[z][i][j] = 0.9 * probabilities.offset_coherences[z][i][j] + 0.1 * offset_ij_max
+    
+    for i in range(0, 2):
+        has_child_i_max = 0
+        for k in range(0, num_candidates):
+            candidate = candidates[k]
+            if candidate is None:
+                continue
+            if candidate.has_child != i:
+                continue
+            has_child_i_max = max(has_child_i_max, scores[k])
+        if has_child_i_max == 0:
+            continue
+        probabilities.has_child_coherences[i] = 0.9 * probabilities.has_child_coherences[i] + 0.1 * has_child_i_max
+
+
+    for z in range(0, 2):
+        # direction = 1 if z == 0 and probabilities.has_child_coherences[0] > probabilities.has_child_coherences[1] or z == 1 and probabilities.has_child_coherences[1] > probabilities.has_child_coherences[0] else -1
+        direction = 1
+        p_bias_next = clamp(probabilities.p_bias[z] + direction * (probabilities.bias_coherences[z][1] - probabilities.bias_coherences[z][0]), 0, 1)
+        # if z == 0 and probabilities.has_child_coherences[0] < probabilities.has_child_coherences[1] or z == 1 and probabilities.has_child_coherences[0] > probabilities.has_child_coherences[1]:
+        #     p_bias_next = 0.5
+        probabilities.p_bias[z] = 0.9 * probabilities.p_bias[z] + 0.1 * p_bias_next
+        for j in range(0, N):
+            p_offset_next = clamp(probabilities.p_offsets[z][j] + direction * (probabilities.offset_coherences[z][1][j] - probabilities.offset_coherences[z][0][j]), 0, 1)
+            # if z == 0 and probabilities.has_child_coherences[0] < probabilities.has_child_coherences[1] or z == 1 and probabilities.has_child_coherences[0] > probabilities.has_child_coherences[1]:
+            #     p_offset_next = 0.5            
+            probabilities.p_offsets[z][j] = 0.9 *  probabilities.p_offsets[z][j] + 0.1 * p_offset_next
+    
+    # direction = 1 if probabilities.parent is None or probabilities.parent.has_child_coherences[1] > probabilities.parent.has_child_coherences[0] else -1
+    direction = 1
+    # p_has_child_next = clamp(probabilities.p_has_child + direction * (probabilities.has_child_coherences[1] - probabilities.has_child_coherences[0]), probabilities.min_p_has_child(), 1)
+    # probabilities.p_has_child = 0.9 * probabilities.p_has_child + 0.1 * 
+    if probabilities.confidence() > 0.9 and probabilities.p_has_child == 0:
+        probabilities.p_bias[0] = round(probabilities.p_bias[0])
+        for i in range(0, N):
+            probabilities.p_offsets[0][i] = round(probabilities.p_offsets[0][i])
+        probabilities.p_has_child = 1
+
+    # if probabilities.has_child_coherences[0] > probabilities.has_child_coherences[1]:
+    #     return
+
+    p_left = probabilities.left
+    p_right = probabilities.right
+    if not p_left is None:
+        left = [candidate.left if not candidate is None and candidate.has_child else None for candidate in candidates]
+        if any(x is not None for x in left):
+            update_probabilities(p_left, left, scores, depth + 1)
+    if not p_right is None:
+        right = [candidate.right if not candidate is None and candidate.has_child else None for candidate in candidates]
+        if any(x is not None for x in right):
+            update_probabilities(p_right, right, scores, depth + 1)
+
+
+def create_candidate(probabilities, candidate):
+    global N
+    new_children = 0
+    z = 1 if random.random() < probabilities.p_has_child and probabilities.depth <= 4 else 0
+    candidate.bias = 1 if random.random() < probabilities.p_bias[0] else 0
+    for i in range(0, N):
+        candidate.offsets[i] = 1 if random.random() < probabilities.p_offsets[0][i] else 0
+    if not z:
+        candidate.has_child = 0
+        return new_children
+    if probabilities.p_has_child < 1:
+        new_children += 1
+    candidate.has_child = 1
+    if candidate.left is None:
+        candidate.left = Candidate()
+    if candidate.right is None:
+        candidate.right = Candidate()
+    depth = probabilities.depth + 1
+    if probabilities.left is None:
+        probabilities.left = Probabilities()
+        probabilities.left.parent = probabilities
+        probabilities.left.depth = depth
+        # probabilities.left.p_has_child = 2 ** -depth
+    if probabilities.right is None:
+        probabilities.right = Probabilities()
+        probabilities.right.parent = probabilities
+        probabilities.right.depth = depth
+        # probabilities.right.p_has_child = 2 ** -depth
+    new_children += create_candidate(probabilities.left, candidate.left)
+    new_children += create_candidate(probabilities.right, candidate.right)
+    return new_children
+
+def copy_candidate(src, dest):
+    global N
+    dest.bias = src.bias
+    for i in range(0, N):
+        dest.offsets[i] = src.offsets[i]
+    has_child = src.has_child
+    dest.has_child = has_child
+    if not has_child:
+        return
+    if dest.left is None:
+        dest.left = Candidate()
+    if dest.right is None:
+        dest.right = Candidate()
+    copy_candidate(src.left, dest.left)
+    copy_candidate(src.right, dest.right)
+
+def p(x):
+    return math.ceil(x * 100) / 100
+
+def p_a(x):
+    return [p(z) for z in x]
+
+def print_probabilities(probabilities, depth=0):
+    global M
+    if depth == 0:
+        print('=====================')
+    left = probabilities.left
+    right = probabilities.right
+    if left is None:
+        print('None')
+    else:
+        print_probabilities(left, depth + 1)
+    if right is None:
+        print('None')
+    else:
+        print_probabilities(right, depth + 1)
+    for z in range(0, 2):
+        # for i in range(0, M):
+        #     print(z, i, p(probabilities.bias_coherences[z][i]), p_a(probabilities.offset_coherences[z][i]), p(probabilities.has_child_coherences[i]))
+        print(depth, z, p(probabilities.p_bias[z]), p_a(probabilities.p_offsets[z]), p(probabilities.p_has_child), p(probabilities.confidence()))
+    if depth == 0:
+        print('=====================')
+
+def main():
+    global N, M
+    sample_size = 64
+    num_candidates = 100
+    num_survivors = 10
+    epochs = 1000
+    output_xor = np.zeros(sample_size,)
+    scratch = np.zeros(N,)
+    g = eval_test_candidate
+    expected_outputs = np.zeros((sample_size,))
+    inputs = random_sample(sample_size, N)
+    for i in range(0, sample_size):
+        expected_outputs[i] = g(inputs[i])
+    outputs = np.zeros((sample_size,))
+    probabilities = Probabilities()
+    candidates = [Candidate() for _ in range(0, num_candidates + num_survivors)]
+    scores = np.zeros((num_candidates + num_survivors,))
+
+    while True:
+        max_new_children = 0
+        min_new_children = 1e6
+        probabilities.reset_uncertainty()
+        for i in range(0, len(candidates)):
+            candidate = candidates[i]
+            if i < num_candidates:
+                create_candidate(probabilities, candidate)
+            for j in range(0, sample_size):
+                outputs[j] = evaluate(probabilities, candidate, inputs[j], expected_outputs[j])
+            np.subtract(outputs, expected_outputs, output_xor)
+            np.mod(output_xor, M, output_xor)
+            scores[i] = coherence(inputs, output_xor, scratch)
+        update_probabilities(probabilities, candidates, scores)
+        print_probabilities(probabilities)
+        print(np.max(scores))
+
+        top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
+        for i in range(0, num_survivors):
+            src_index = top_n[i]
+            dest_index = num_candidates + i
+            if src_index == dest_index:
+                continue
+            copy_candidate(candidates[src_index], candidates[dest_index])
+
+        inputs = random_sample(sample_size, N)
+        for i in range(0, sample_size):
+            expected_outputs[i] = g(inputs[i])
+
+
+if __name__ == "__main__":
+    main()
--- a/mutations6.py
+++ b/mutations6.py
@ -0,0 +1,488 @@
+from enum import unique
+import hashlib
+import math
+import numpy as np
+import random
+import time
+
+N = 8
+M = 2
+
+def timeit(f):
+    def timed(*args, **kw):
+        ts = time.time()
+        result = f(*args, **kw)
+        te = time.time()
+
+        print('func:%r took: %2.4f sec' % (f.__name__, te-ts))
+        return result
+    return timed
+
+def vec_to_int(bias, x):
+    global N
+    z = bias
+    for i in range(0, N):
+        z <<= 1
+        z |= x[i]
+    return z
+
+class Candidate:
+    def __init__(self, layer):
+        global N
+        self.layer = layer
+        self.node_count = 2 ** layer
+        self.bias = np.zeros((self.node_count,)).astype(np.int32)
+        self.offsets = np.zeros((self.node_count, N)).astype(np.int32)
+
+    def normalize(self):
+        global N
+        if self.node_count < 2:
+            return
+        # pairs of two must be in order
+        for i in range(0, self.node_count, 2):
+            left_id = vec_to_int(self.bias[i], self.offsets[i])
+            right_id = vec_to_int(self.bias[i + 1], self.offsets[i + 1])
+            if left_id > right_id:
+                temp = self.bias[i]
+                self.bias[i] = self.bias[i + 1]
+                self.bias[i + 1] = temp
+                for j in range(0, N):
+                    temp = self.offsets[i][j]
+                    self.offsets[i][j] = self.offsets[i + 1][j]
+                    self.offsets[i + 1][j] = temp
+
+class Probabilities:
+    def __init__(self, layer):
+        global N
+        self.layer = layer
+        self.node_count = 2 ** layer
+        self.p_bias = np.zeros((self.node_count,))
+        self.p_bias.fill(0.5)
+        self.p_offsets = np.zeros((self.node_count, N))
+        self.p_offsets.fill(0.5)
+
+        self.bias_coherences = np.zeros((2, self.node_count,))
+        self.bias_coherences.fill(0.5)
+        self.offset_coherences = np.zeros((2, self.node_count, N))
+        self.offset_coherences.fill(0.5)
+
+    def inertia(self):
+        global N
+        total = 0
+        for i in range(0, self.node_count):
+            if self.p_bias[i] > 1e-2 and self.p_bias[i] < (1 - 1e-2):
+                total += abs(self.bias_coherences[1][i] - self.bias_coherences[0][i])
+            for j in range(0, N):
+                if self.p_offsets[i][j] > 1e-2 and self.p_offsets[i][j] < (1 - 1e-2):
+                    total += abs(self.offset_coherences[1][i][j] - self.offset_coherences[0][i][j])
+        return total
+
+    def has_converged(self):
+        global N
+        for i in range(0, self.node_count):
+            for j in range(0, N):
+                if self.p_offsets[i][j] > 1e-2 and self.p_offsets[i][j] < 1 - 1e-2:
+                    return False
+        return True
+
+    def confidence(self):
+        global N
+        total = 0
+        for i in range(0, self.node_count):
+            total += (2 * self.p_bias[i] - 1) ** 2
+            for j in range(0, N):
+                total += (2 * self.p_offsets[i][j] - 1) ** 2
+        return total / ((N + 1) * self.node_count)
+
+    def flatten(self):
+        candidate = Candidate(self.layer)
+        for i in range(0, self.node_count):
+            force_zero = True
+            if self.node_count > 1:
+                k = i ^ 0b1
+                if self.p_bias[k] > 1e-2:
+                    force_zero = False
+                if force_zero:
+                    for j in range(0, N):
+                        if self.p_offsets[k][j] > 1e-2:
+                            force_zero = False
+                            break
+            else:
+                force_zero = False
+
+            candidate.bias[i] = 1 if not force_zero and self.p_bias[i] >= (1 - 1e-2) else 0
+            for j in range(0, N):
+                candidate.offsets[i][j] = 1 if not force_zero and self.p_offsets[i][j] >= (1 - 1e-2) else 0
+        return candidate
+
+def clamp(x, min_value = 0.01, max_value = 1):
+    return min(max(x, min_value), max_value)
+
+def encode(v):
+    global N
+    byte_values = []
+    for i in range(0, math.ceil(N / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+# 00100111 x4
+# 00000110 x1
+def sha(v):
+    global M
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] % M
+
+def xor(x):
+    num_one_bits = 0
+    for i in range(0, len(x)):
+        if i == 0:
+            continue
+        num_one_bits += x[i]
+    return num_one_bits % 2
+
+def test_fn(x):
+    # 0 1
+    # 2 | 3
+    # 4 | 5 | 6 | 7
+    #   |   | 0 | 7 |   |   |   |
+    return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7])))
+
+def candidate_fn(x):
+    return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2])
+
+def true_fn(x):
+    return x[0] ^ x[1] ^ (x[3] * x[2])
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def coherence(outputs, distances):
+    coherences = []
+    for i in range(0, len(outputs)):
+        y_a = outputs[i]
+        numerator = 0
+        denominator = 0
+        for j in range(0, len(outputs)):
+            if i == j:
+                continue
+            y_b = outputs[j]
+            weight = distances[i][j]
+            denominator += weight
+            if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def random_sample(m, n):
+    inputs = np.zeros((m, n)).astype(np.int32)
+    for i in range(0, m):
+        for j in range(0, n):
+            inputs[i][j] = random.randint(0, 1)
+    return inputs
+
+def populate_distances(inputs, distances, scratch):
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        for j in range(0, len(inputs)):
+            if i == j:
+                continue
+            x_b = inputs[j]
+            distance = hamming_distance(x_a, x_b, scratch)
+            distances[i][j] = 1.0 / (2 ** distance)
+
+def populate_layers_scratch(layers, x, layers_scratch, compute_scratch):
+    layers_scratch[0].fill(0)
+    for i in range(1, len(layers_scratch)):
+        scratch = layers_scratch[i]
+        layer = layers[i - 1]
+        for j in range(0, layer.node_count):
+            value = layer.bias[j]
+            np.multiply(layer.offsets[j], x, compute_scratch)
+            value ^= np.sum(compute_scratch) % 2
+            left = layers_scratch[i - 1][j * 2]
+            right = layers_scratch[i - 1][j * 2 + 1]
+            value ^= left * right
+            scratch[j] = value
+    return layers_scratch[-1][0]
+
+def evaluate_cached(candidate, x, layers_scratch, layers_scratch_base, compute_scratch):
+    global N
+    maybe_evaluate = set()
+    for j in range(0, candidate.node_count, 2):
+        value = candidate.bias[j]
+        np.multiply(candidate.offsets[j], x, compute_scratch)
+        value ^= np.sum(compute_scratch) % 2
+        layers_scratch[0][j] = value
+        if candidate.node_count > 1:
+            value = candidate.bias[j + 1]
+            np.multiply(candidate.offsets[j + 1], x, compute_scratch)
+            value ^= np.sum(compute_scratch) % 2
+            layers_scratch[0][j + 1] = value
+            if layers_scratch[0][j] == 1 and layers_scratch[0][j + 1] == 1:
+                maybe_evaluate.add(int(j / 2))
+
+    for i in range(1, len(layers_scratch)):
+        np.copyto(layers_scratch[i], layers_scratch_base[i])
+        maybe_evaluate_next = set()
+        for j in maybe_evaluate:
+            left = layers_scratch[i - 1][j * 2]
+            right = layers_scratch[i - 1][j * 2 + 1]
+            child_value = left * right
+            left_base = layers_scratch_base[i - 1][j * 2]
+            right_base = layers_scratch_base[i - 1][j * 2 + 1]
+            child_base_value = left_base * right_base
+            if child_value != child_base_value:
+                layers_scratch[i][j] ^= 1
+                maybe_evaluate_next.add(int(j / 2))
+        maybe_evaluate = maybe_evaluate_next
+    return layers_scratch[-1][0]
+
+def evaluate(layers, candidate, x, layers_scratch, compute_scratch):
+    global N
+    for i in range(0, len(layers_scratch)):
+        scratch = layers_scratch[i]
+        if i == 0:
+            for j in range(0, candidate.node_count):
+                value = candidate.bias[j]
+                np.multiply(candidate.offsets[j], x, compute_scratch)
+                value ^= np.sum(compute_scratch) % 2
+                scratch[j] = value
+        else:
+            layer = layers[i - 1]
+            for j in range(0, layer.node_count):
+                value = layer.bias[j]
+                np.multiply(layer.offsets[j], x, compute_scratch)
+                value ^= np.sum(compute_scratch) % 2
+                left = layers_scratch[i - 1][j * 2]
+                right = layers_scratch[i - 1][j * 2 + 1]
+                value ^= left * right
+                scratch[j] = value
+    return layers_scratch[-1][0]
+
+@timeit
+def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch):
+    global M, N
+    scores.fill(0)
+    unique_candidates = {}
+    for j in range(0, num_candidates):
+        create_candidate(probabilities, candidates[j])        
+        unique_candidates[candidate_str(candidates[j])] = j
+
+    for i in range(0, sample_size):
+        populate_layers_scratch(layers, inputs[i], layers_scratch_base, int_scratch)
+        for _, j in unique_candidates.items():
+            candidate = candidates[j]
+            outputs[j][i] = evaluate_cached(candidate, inputs[i], layers_scratch, layers_scratch_base, int_scratch)
+            # if outputs[j][i] != evaluate(layers, candidate, inputs[i], layers_scratch, int_scratch):
+            #     print('Uh-oh')
+    for _, j in unique_candidates.items():
+        candidate = candidates[j]
+        np.subtract(outputs[j], expected_outputs, output_xor)
+        np.mod(output_xor, M, output_xor)
+        scores[j] = coherence(output_xor, distances)
+
+@timeit
+def update_probabilities(probabilities, candidates, scores):
+    global N
+    num_candidates = len(candidates)
+
+    for i in range(0, 2):
+        for j in range(0, probabilities.node_count):
+            bias_max = 0
+            bias_sum = 0
+            bias_count = 0
+            for p in range(0, num_candidates):
+                candidate = candidates[p]
+                if candidate.bias[j] != i:
+                    continue
+                if scores[p] == 0:
+                    continue
+                bias_max = max(bias_max, scores[p])
+                bias_sum += scores[p]
+                bias_count += 1
+            if bias_max == 0:
+                continue
+            # weight = bias_count / num_candidates
+            weight = 0.1
+            bias_avg = bias_sum / bias_count
+            probabilities.bias_coherences[i][j] = (1.0 - weight) * probabilities.bias_coherences[i][j] + weight * bias_max
+            # probabilities.bias_coherences[i][j] = bias_max
+
+    for i in range(0, 2):
+        for j in range(0, probabilities.node_count):
+            for k in range(0, N):
+                offset_max = 0
+                offset_sum = 0
+                offset_count = 0
+                for p in range(0, num_candidates):
+                    candidate = candidates[p]
+                    if candidate.offsets[j][k] != i:
+                        continue
+                    if scores[p] == 0:
+                        continue
+                    offset_max = max(offset_max, scores[p])
+                    offset_sum += scores[p]
+                    offset_count += 1
+                if offset_max == 0:
+                    continue
+                # weight = offset_count / num_candidates
+                weight = 0.1
+                offset_avg = offset_sum / offset_count
+                probabilities.offset_coherences[i][j][k] = (1.0 - weight) * probabilities.offset_coherences[i][j][k] + weight * offset_max
+                # probabilities.offset_coherences[i][j][k] = offset_max
+
+    for j in range(0, probabilities.node_count):
+        base_delta = probabilities.bias_coherences[1][j] - probabilities.bias_coherences[0][j]
+        delta = base_delta
+        q = j ^ 0b1
+        if probabilities.node_count > 1:
+            q_delta = probabilities.bias_coherences[1][q] - probabilities.bias_coherences[0][q]
+            if base_delta > 0 and q_delta > 0:
+                delta -= 0.5 * q_delta
+
+        p_bias_next = clamp(probabilities.p_bias[j] + delta, 0, 1)
+        probabilities.p_bias[j] = 0.9 * probabilities.p_bias[j] + 0.1 * p_bias_next
+        for k in range(0, N):
+            base_delta = probabilities.offset_coherences[1][j][k] - probabilities.offset_coherences[0][j][k]
+            delta = base_delta
+            if probabilities.node_count > 1:
+                q_delta = probabilities.offset_coherences[1][q][k] - probabilities.offset_coherences[0][q][k]
+                if base_delta > 0 and q_delta > 0:
+                    delta -= 0.5 * q_delta
+                    
+            p_offset_next = clamp(probabilities.p_offsets[j][k] + delta, 0, 1)          
+            probabilities.p_offsets[j][k] = 0.9 *  probabilities.p_offsets[j][k] + 0.1 * p_offset_next
+
+def create_candidate(probabilities, candidate):
+    global N
+    for i in range(0, probabilities.node_count):
+        candidate.bias[i] = 1 if random.random() < probabilities.p_bias[i] else 0
+        # candidate.bias[i] = 0
+        for j in range(0, N):
+            candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0
+    # candidate.normalize()
+
+def copy_candidate(src, dest):
+    global N
+    for i in range(0, src.node_count):
+        dest.bias[i] = src.bias[i]
+    for i in range(0, src.node_count):
+        for j in range(0, N):
+            dest.offsets[i][j] = src.offsets[i][j]
+
+def p(x):
+    return math.ceil(x * 100) / 100
+
+def p_a(x):
+    return [p(z) for z in x]
+
+def print_probabilities(probabilities):
+    print('=====================')
+    for i in range(0, probabilities.node_count):
+        print(i, p(probabilities.p_bias[i]), p_a(probabilities.p_offsets[i]))
+    print('=====================')
+
+def candidate_str(candidate):
+    global N
+    build_str = ''
+    for i in range(0, candidate.node_count):
+        build_str += str(candidate.bias[i])
+        for j in range(0, N):
+            build_str += str(candidate.offsets[i][j])
+    return build_str
+
+def main():
+    global N, M
+    sample_size = 64
+    num_candidates = 100
+    num_survivors = 8
+    output_xor = np.zeros(sample_size,)
+    scratch = np.zeros((N,))
+    int_scratch = np.zeros((N,)).astype(np.int32)
+    g = sha
+    expected_outputs = np.zeros((sample_size,))
+    inputs = random_sample(sample_size, N)
+    distances = np.zeros((sample_size, sample_size))
+    populate_distances(inputs, distances, scratch)
+    for i in range(0, sample_size):
+        expected_outputs[i] = g(inputs[i])
+    outputs = np.zeros((num_candidates + num_survivors, sample_size,))
+    scores = np.zeros((num_candidates + num_survivors,))
+
+    layers = []
+    layers_scratch = [np.zeros(1, ).astype(np.int32)]
+    layers_scratch_base = [np.zeros(1, ).astype(np.int32)]
+    layer = 0
+
+    # for i in range(0, sample_size):
+    #     outputs[0][i] = candidate_fn(inputs[i])
+
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    score = coherence(output_xor, distances)
+    # print(score)
+
+    # for i in range(0, sample_size):
+    #     outputs[0][i] = true_fn(inputs[i])
+
+    # np.subtract(outputs[0], expected_outputs, output_xor)
+    # np.mod(output_xor, M, output_xor)
+    # score = coherence(output_xor, distances)
+    # print(score)
+    # return
+
+    while score < 1:
+        probabilities = Probabilities(layer)
+        candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)]
+        inertia = 1
+        while inertia > 1e-2:
+            compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch)
+            update_probabilities(probabilities, candidates, scores)
+            inertia = 0.9 * inertia + 0.1 * probabilities.inertia()
+                    
+            print_probabilities(probabilities)
+            for candidate in layers:
+                print(candidate.bias, candidate.offsets)
+            print(np.max(scores), probabilities.inertia(), inertia)
+
+            top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
+
+            for i in range(0, num_survivors):
+                src_index = top_n[i]
+                dest_index = num_candidates + i
+                if src_index == dest_index:
+                    continue
+                src = candidates[src_index]
+                dest = candidates[dest_index]
+                candidates[dest_index] = src
+                candidates[src_index] = dest
+
+            inputs = random_sample(sample_size, N)
+            populate_distances(inputs, distances, scratch)
+            for i in range(0, sample_size):
+                expected_outputs[i] = g(inputs[i])
+
+        candidate = probabilities.flatten()
+        for j in range(0, sample_size):
+            outputs[0][j] = evaluate(layers, candidate, inputs[j], layers_scratch, int_scratch)
+            np.subtract(outputs[0], expected_outputs, output_xor)
+            np.mod(output_xor, M, output_xor)
+        score = coherence(output_xor, distances)
+
+        layers.insert(0, candidate)
+        layer += 1
+        layers_scratch.insert(0, np.zeros(2 ** layer,).astype(np.int32))
+        layers_scratch_base.insert(0, np.zeros(2 ** layer,).astype(np.int32))
+
+if __name__ == "__main__":
+    main()
--- a/mutations7.py
+++ b/mutations7.py
@ -0,0 +1,455 @@
+from enum import unique
+import hashlib
+import math
+import numpy as np
+import random
+import time
+
+N = 8
+M = 2
+
+def vec_to_int(x):
+    global N
+    z = 0
+    for i in range(0, N + 1):
+        z <<= 1
+        z |= x[i]
+    return z
+
+def timeit(f):
+    def timed(*args, **kw):
+        ts = time.time()
+        result = f(*args, **kw)
+        te = time.time()
+
+        print('func:%r took: %2.4f sec' % (f.__name__, te-ts))
+        return result
+    return timed
+
+class Candidate:
+    def __init__(self, layer):
+        global N
+        self.layer = layer
+        self.node_count = 2 ** layer
+        self.offsets = np.zeros((self.node_count, N + 1)).astype(np.int32)
+
+class Probabilities:
+    def __init__(self, layer):
+        global N
+        self.layer = layer
+        self.node_count = 2 ** layer
+        self.p_offsets = np.zeros((self.node_count, N + 1))
+        self.p_offsets.fill(0.5)
+        self.offset_coherences = np.zeros((2, self.node_count, N + 1, 2, self.node_count, N + 1))
+        self.offset_coherences.fill(-1)
+
+    def inertia(self):
+        global N
+        total = 0
+        for i in range(0, self.node_count):
+            for j in range(0, N + 1):
+                if self.p_offsets[i][j] > 1e-2 and self.p_offsets[i][j] < (1 - 1e-2):
+                    total += abs(self.offset_coherences[1][i][j][1][i][j] - self.offset_coherences[0][i][j][0][i][j])
+        return total
+
+    def flatten(self):
+        candidate = Candidate(self.layer)
+        for i in range(0, self.node_count):
+            for j in range(0, N + 1):
+                candidate.offsets[i][j] = 1 if self.p_offsets[i][j] >= 0.5 else 0
+        if self.node_count > 1:
+            for i in range(0, self.node_count):
+                if not candidate.offsets[i].any():
+                    q = i ^ 0b1
+                    candidate.offsets[q].fill(0)
+        return candidate
+
+def clamp(x, min_value = 0.01, max_value = 1):
+    return min(max(x, min_value), max_value)
+
+def encode(v):
+    global N
+    byte_values = []
+    for i in range(0, math.ceil(N / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+# 00100111 x4
+# 00000110 x1
+def sha(v):
+    global M
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] % M
+
+def xor(x):
+    num_one_bits = 0
+    for i in range(0, len(x)):
+        if i == 0:
+            continue
+        num_one_bits += x[i]
+    return num_one_bits % 2
+
+def test_fn(x):
+    # 0 1
+    # 2 | 3
+    # 4 | 5 | 6 | 7
+    #   |   | 0 | 7 |   |   |   |
+    return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7])))
+
+def candidate_fn(x):
+    return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2])
+
+def true_fn(x):
+    return x[0] ^ x[1] ^ (x[3] * x[2])
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def coherence(outputs, distances):
+    coherences = []
+    for i in range(0, len(outputs)):
+        y_a = outputs[i]
+        numerator = 0
+        denominator = 0
+        for j in range(0, len(outputs)):
+            if i == j:
+                continue
+            y_b = outputs[j]
+            weight = distances[i][j]
+            denominator += weight
+            if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def random_sample(m, n):
+    inputs = np.zeros((m, n + 1)).astype(np.int32)
+    for i in range(0, m):
+        for j in range(0, n):
+            inputs[i][j] = random.randint(0, 1)
+        inputs[i][n] = 1
+    return inputs
+
+def populate_distances(inputs, distances, scratch):
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        for j in range(0, len(inputs)):
+            if i == j:
+                continue
+            x_b = inputs[j]
+            distance = hamming_distance(x_a, x_b, scratch)
+            distances[i][j] = 1.0 / (2 ** distance)
+
+def populate_layers_scratch(layers, x, layers_scratch, compute_scratch):
+    layers_scratch[0].fill(0)
+    for i in range(1, len(layers_scratch)):
+        scratch = layers_scratch[i]
+        layer = layers[i - 1]
+        for j in range(0, layer.node_count):
+            value = 0
+            np.multiply(layer.offsets[j], x, compute_scratch)
+            value ^= np.sum(compute_scratch) % 2
+            left = layers_scratch[i - 1][j * 2]
+            right = layers_scratch[i - 1][j * 2 + 1]
+            value ^= left * right
+            scratch[j] = value
+    return layers_scratch[-1][0]
+
+def evaluate_cached(candidate, x, layers_scratch, layers_scratch_base, compute_scratch):
+    global N
+    maybe_evaluate = set()
+    for j in range(0, candidate.node_count, 2):
+        value = 0
+        np.multiply(candidate.offsets[j], x, compute_scratch)
+        value ^= np.sum(compute_scratch) % 2
+        layers_scratch[0][j] = value
+        if candidate.node_count > 1:
+            value = 0
+            np.multiply(candidate.offsets[j + 1], x, compute_scratch)
+            value ^= np.sum(compute_scratch) % 2
+            layers_scratch[0][j + 1] = value
+            if layers_scratch[0][j] == 1 and layers_scratch[0][j + 1] == 1:
+                maybe_evaluate.add(int(j / 2))
+
+    for i in range(1, len(layers_scratch)):
+        np.copyto(layers_scratch[i], layers_scratch_base[i])
+        maybe_evaluate_next = set()
+        for j in maybe_evaluate:
+            left = layers_scratch[i - 1][j * 2]
+            right = layers_scratch[i - 1][j * 2 + 1]
+            child_value = left * right
+            left_base = layers_scratch_base[i - 1][j * 2]
+            right_base = layers_scratch_base[i - 1][j * 2 + 1]
+            child_base_value = left_base * right_base
+            if child_value != child_base_value:
+                layers_scratch[i][j] ^= 1
+                maybe_evaluate_next.add(int(j / 2))
+        maybe_evaluate = maybe_evaluate_next
+    return layers_scratch[-1][0]
+
+def evaluate(layers, candidate, x, layers_scratch, compute_scratch):
+    global N
+    for i in range(0, len(layers_scratch)):
+        scratch = layers_scratch[i]
+        if i == 0:
+            for j in range(0, candidate.node_count):
+                value = 0
+                np.multiply(candidate.offsets[j], x, compute_scratch)
+                value ^= np.sum(compute_scratch) % 2
+                scratch[j] = value
+        else:
+            layer = layers[i - 1]
+            for j in range(0, layer.node_count):
+                value = 0
+                np.multiply(layer.offsets[j], x, compute_scratch)
+                value ^= np.sum(compute_scratch) % 2
+                left = layers_scratch[i - 1][j * 2]
+                right = layers_scratch[i - 1][j * 2 + 1]
+                value ^= left * right
+                scratch[j] = value
+    return layers_scratch[-1][0]
+
+@timeit
+def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch):
+    global M, N
+    scores.fill(0)
+    unique_candidates = {}
+    for j in range(0, num_candidates):
+        create_candidate(probabilities, candidates[j])        
+        unique_candidates[candidate_str(candidates[j])] = j
+
+    for i in range(0, sample_size):
+        populate_layers_scratch(layers, inputs[i], layers_scratch_base, int_scratch)
+        for _, j in unique_candidates.items():
+            candidate = candidates[j]
+            outputs[j][i] = evaluate_cached(candidate, inputs[i], layers_scratch, layers_scratch_base, int_scratch)
+            # if outputs[j][i] != evaluate(layers, candidate, inputs[i], layers_scratch, int_scratch):
+            #     print('Uh-oh')
+    for _, j in unique_candidates.items():
+        candidate = candidates[j]
+        np.subtract(outputs[j], expected_outputs, output_xor)
+        np.mod(output_xor, M, output_xor)
+        scores[j] = coherence(output_xor, distances)
+
+@timeit
+def update_probabilities(probabilities, candidates, inputs, scores):
+    global N
+    num_candidates = len(candidates)
+
+    variance = np.zeros((N + 1,))
+    for x in inputs:
+        variance += x
+
+    probabilities.offset_coherences.fill(-1)
+    for p in range(0, num_candidates):
+        candidate = candidates[p]
+        score = scores[p]
+        if score == 0:
+            continue
+        for j in range(0, probabilities.node_count):
+            for k in range(0, N + 1):
+                i = candidate.offsets[j][k]
+                for m in range(0, probabilities.node_count):
+                    for n in range(0, N + 1):
+                        l = candidate.offsets[m][n]
+                        probabilities.offset_coherences[i][j][k][l][m][n] = max(score, probabilities.offset_coherences[i][j][k][l][m][n])
+
+    # for i in range(0, 2):
+    #     for j in range(0, probabilities.node_count):
+    #         for k in range(0, N + 1):
+    #             for l in range(0, 2):
+    #                 for m in range(0, probabilities.node_count):
+    #                     for n in range(0, N + 1):
+    #                         offset_max = 0
+    #                         offset_sum = 0
+    #                         offset_count = 0
+    #                         for p in range(0, num_candidates):
+    #                             candidate = candidates[p]
+    #                             if candidate.offsets[j][k] != i:
+    #                                 continue
+    #                             if candidate.offsets[m][n] != l:
+    #                                 continue
+    #                             if scores[p] == 0:
+    #                                 continue
+    #                             offset_max = max(offset_max, scores[p])
+    #                             offset_sum += scores[p]
+    #                             offset_count += 1
+    #                         if offset_max == 0:
+    #                             continue
+    #                         probabilities.offset_coherences[i][j][k][l][m][n] = offset_max
+
+    p_offsets_next = np.zeros((probabilities.node_count, N + 1))
+    inertia = 0
+    for j in range(0, probabilities.node_count):
+        for k in range(0, N + 1):
+            delta = 0
+            count = 0
+            for m in range(0, probabilities.node_count):
+                for n in range(0, N + 1):
+                    if j == m and k == n:
+                        continue
+                    # confidence = variance[k] * variance[n] / (len(inputs) ** 2)
+                    confidence = 1.0
+                    p_j1_if_m0 = probabilities.offset_coherences[1][j][k][0][m][n]
+                    p_j0_if_m0 = probabilities.offset_coherences[0][j][k][0][m][n]
+                    p_j1_if_m1 = probabilities.offset_coherences[1][j][k][1][m][n]
+                    p_j0_if_m1 = probabilities.offset_coherences[0][j][k][1][m][n]
+                    if p_j1_if_m0 >= 0 and p_j0_if_m0 >= 0:
+                        delta_if_m0 = p_j1_if_m0 - p_j0_if_m0
+                        delta += delta_if_m0 * (1.0 - probabilities.p_offsets[m][n]) * confidence
+                        count += 1
+                    if p_j1_if_m1 >= 0 and p_j0_if_m1 >= 0:
+                        delta_if_m1 = p_j1_if_m1 - p_j0_if_m1
+                        delta += delta_if_m1 * probabilities.p_offsets[m][n] * confidence
+                        count += 1
+            if count > 0:
+                delta /= count
+            p_offsets_next[j][k] = clamp(probabilities.p_offsets[j][k] + delta, 0, 1)
+            inertia += abs(p_offsets_next[j][k] - probabilities.p_offsets[j][k])
+
+    for j in range(0, probabilities.node_count):
+        for k in range(0, N + 1):
+            p_offset_next = p_offsets_next[j][k] 
+            probabilities.p_offsets[j][k] = 0.9 * probabilities.p_offsets[j][k] + 0.1 * p_offset_next
+    
+    # if probabilities.node_count > 1:
+    #     for j in range(0, probabilities.node_count):
+    #         q = j ^ 0b1
+    #         for k in range(0, N + 1):
+    #             if probabilities.p_offsets[j][k] > 0.5:
+    #                 probabilities.p_offsets[q][k] = min(probabilities.p_offsets[q][k], 1 - probabilities.p_offsets[j][k])
+    
+    return inertia
+
+def create_candidate(probabilities, candidate):
+    global N
+    for i in range(0, probabilities.node_count):
+        for j in range(0, N + 1):
+            candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0
+
+def copy_candidate(src, dest):
+    global N
+    for i in range(0, src.node_count):
+        for j in range(0, N + 1):
+            dest.offsets[i][j] = src.offsets[i][j]
+
+def p(x):
+    return math.ceil(x * 100) / 100
+
+def p_a(x):
+    return [p(z) for z in x]
+
+def print_probabilities(probabilities):
+    print('=====================')
+    for i in range(0, probabilities.node_count):
+        print(i, p_a(probabilities.p_offsets[i]))
+    print('=====================')
+
+def candidate_str(candidate):
+    global N
+    build_str = ''
+    for i in range(0, candidate.node_count):
+        for j in range(0, N + 1):
+            build_str += str(candidate.offsets[i][j])
+    return build_str
+
+def main():
+    global N, M
+    sample_size = 64
+    num_candidates = 100
+    num_survivors = 8
+    output_xor = np.zeros(sample_size,)
+    scratch = np.zeros((N + 1,))
+    int_scratch = np.zeros((N + 1,)).astype(np.int32)
+    g = test_fn
+    expected_outputs = np.zeros((sample_size,))
+    inputs = random_sample(sample_size, N)
+    distances = np.zeros((sample_size, sample_size))
+    populate_distances(inputs, distances, scratch)
+    for i in range(0, sample_size):
+        expected_outputs[i] = g(inputs[i])
+    outputs = np.zeros((num_candidates + num_survivors, sample_size,))
+    scores = np.zeros((num_candidates + num_survivors,))
+
+    layers = []
+    layers_scratch = [np.zeros(1, ).astype(np.int32)]
+    layers_scratch_base = [np.zeros(1, ).astype(np.int32)]
+    layer = 0
+
+    # for i in range(0, sample_size):
+    #     outputs[0][i] = candidate_fn(inputs[i])
+
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    score = coherence(output_xor, distances)
+    # print(score)
+
+    # for i in range(0, sample_size):
+    #     outputs[0][i] = true_fn(inputs[i])
+
+    # np.subtract(outputs[0], expected_outputs, output_xor)
+    # np.mod(output_xor, M, output_xor)
+    # score = coherence(output_xor, distances)
+    # print(score)
+    # return
+
+    while score < 1:
+        probabilities = Probabilities(layer)
+        candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)]
+        inertia = 1
+        while inertia > 0.01:
+            compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch)
+            round_inertia = update_probabilities(probabilities, candidates, inputs, scores)
+            inertia = 0.9 * inertia + 0.1 * round_inertia
+                    
+            print_probabilities(probabilities)
+            for candidate in layers:
+                print(candidate.offsets)
+            print(np.max(scores), round_inertia, inertia)
+
+            top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
+
+            for i in range(0, num_survivors):
+                src_index = top_n[i]
+                dest_index = num_candidates + i
+                if src_index == dest_index:
+                    continue
+                src = candidates[src_index]
+                dest = candidates[dest_index]
+                candidates[dest_index] = src
+                candidates[src_index] = dest
+
+            inputs = random_sample(sample_size, N)
+            populate_distances(inputs, distances, scratch)
+            for i in range(0, sample_size):
+                expected_outputs[i] = g(inputs[i])
+
+        candidate = probabilities.flatten()
+        for j in range(0, sample_size):
+            outputs[0][j] = evaluate(layers, candidate, inputs[j], layers_scratch, int_scratch)
+            np.subtract(outputs[0], expected_outputs, output_xor)
+            np.mod(output_xor, M, output_xor)
+        score = coherence(output_xor, distances)
+
+        layers.insert(0, candidate)
+        layer += 1
+        layers_scratch.insert(0, np.zeros(2 ** layer,).astype(np.int32))
+        layers_scratch_base.insert(0, np.zeros(2 ** layer,).astype(np.int32))
+
+    for candidate in layers:
+        print(candidate.offsets)
+
+if __name__ == "__main__":
+    main()
--- a/mutations8.py
+++ b/mutations8.py
@ -0,0 +1,451 @@
+from enum import unique
+import hashlib
+import math
+import numpy as np
+import random
+import time
+
+N = 8
+M = 2
+
+def vec_to_int(x):
+    global N
+    z = 0
+    for i in range(0, N + 1):
+        z <<= 1
+        z |= x[i]
+    return z
+
+def timeit(f):
+    def timed(*args, **kw):
+        ts = time.time()
+        result = f(*args, **kw)
+        te = time.time()
+
+        print('func:%r took: %2.4f sec' % (f.__name__, te-ts))
+        return result
+    return timed
+
+class Candidate:
+    def __init__(self, layer):
+        global N
+        self.layer = layer
+        self.node_count = 2 ** layer
+        self.offsets = np.zeros((self.node_count, N + 1)).astype(np.int32)
+
+class Probabilities:
+    def __init__(self, layer):
+        global N
+        self.layer = layer
+        self.node_count = 2 ** layer
+        self.p_offsets = np.zeros((self.node_count, N + 1))
+        self.p_offsets.fill(0.5)
+        self.offset_coherences = np.zeros((2, self.node_count, N + 1, 2, self.node_count, N + 1))
+        self.offset_coherences.fill(-1)
+        self.deltas = np.zeros((self.node_count, N + 1, 2, self.node_count, N + 1))
+
+    def inertia(self):
+        global N
+        total = 0
+        for i in range(0, self.node_count):
+            for j in range(0, N + 1):
+                if self.p_offsets[i][j] > 1e-2 and self.p_offsets[i][j] < (1 - 1e-2):
+                    total += abs(self.offset_coherences[1][i][j][1][i][j] - self.offset_coherences[0][i][j][0][i][j])
+        return total
+
+    def flatten(self):
+        candidate = Candidate(self.layer)
+        for i in range(0, self.node_count):
+            for j in range(0, N + 1):
+                candidate.offsets[i][j] = 1 if self.p_offsets[i][j] >= 0.5 else 0
+        if self.node_count > 1:
+            for i in range(0, self.node_count):
+                if not candidate.offsets[i].any():
+                    q = i ^ 0b1
+                    candidate.offsets[q].fill(0)
+        return candidate
+
+def clamp(x, min_value = 0.01, max_value = 1):
+    return min(max(x, min_value), max_value)
+
+def encode(v):
+    global N
+    byte_values = []
+    for i in range(0, math.ceil(N / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+# 00100111 x4
+# 00000110 x1
+def sha(v):
+    global M
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] % M
+
+def xor(x):
+    num_one_bits = 0
+    for i in range(0, len(x)):
+        if i == 0:
+            continue
+        num_one_bits += x[i]
+    return num_one_bits % 2
+
+def test_fn(x):
+    # 0 1
+    # 2 | 3
+    # 4 | 5 | 6 | 7
+    #   |   | 0 | 7 |   |   |   |
+    return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7])))
+
+def candidate_fn(x):
+    return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2])
+
+def true_fn(x):
+    return x[0] ^ x[1] ^ (x[3] * x[2])
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def coherence(outputs, distances):
+    coherences = []
+    for i in range(0, len(outputs)):
+        y_a = outputs[i]
+        numerator = 0
+        denominator = 0
+        for j in range(0, len(outputs)):
+            if i == j:
+                continue
+            y_b = outputs[j]
+            weight = distances[i][j]
+            denominator += weight
+            if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def random_sample(m, n):
+    inputs = np.zeros((m, n + 1)).astype(np.int32)
+    for i in range(0, m):
+        for j in range(0, n):
+            inputs[i][j] = random.randint(0, 1)
+        inputs[i][n] = 1
+    return inputs
+
+def populate_distances(inputs, distances, scratch):
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        for j in range(0, len(inputs)):
+            if i == j:
+                continue
+            x_b = inputs[j]
+            distance = hamming_distance(x_a, x_b, scratch)
+            distances[i][j] = 1.0 / (2 ** distance)
+
+def populate_layers_scratch(layers, x, layers_scratch, compute_scratch):
+    layers_scratch[0].fill(0)
+    for i in range(1, len(layers_scratch)):
+        scratch = layers_scratch[i]
+        layer = layers[i - 1]
+        for j in range(0, layer.node_count):
+            value = 0
+            np.multiply(layer.offsets[j], x, compute_scratch)
+            value ^= np.sum(compute_scratch) % 2
+            left = layers_scratch[i - 1][j * 2]
+            right = layers_scratch[i - 1][j * 2 + 1]
+            value ^= left * right
+            scratch[j] = value
+    return layers_scratch[-1][0]
+
+def evaluate_cached(candidate, x, layers_scratch, layers_scratch_base, compute_scratch):
+    global N
+    maybe_evaluate = set()
+    for j in range(0, candidate.node_count, 2):
+        value = 0
+        np.multiply(candidate.offsets[j], x, compute_scratch)
+        value ^= np.sum(compute_scratch) % 2
+        layers_scratch[0][j] = value
+        if candidate.node_count > 1:
+            value = 0
+            np.multiply(candidate.offsets[j + 1], x, compute_scratch)
+            value ^= np.sum(compute_scratch) % 2
+            layers_scratch[0][j + 1] = value
+            if layers_scratch[0][j] == 1 and layers_scratch[0][j + 1] == 1:
+                maybe_evaluate.add(int(j / 2))
+
+    for i in range(1, len(layers_scratch)):
+        np.copyto(layers_scratch[i], layers_scratch_base[i])
+        maybe_evaluate_next = set()
+        for j in maybe_evaluate:
+            left = layers_scratch[i - 1][j * 2]
+            right = layers_scratch[i - 1][j * 2 + 1]
+            child_value = left * right
+            left_base = layers_scratch_base[i - 1][j * 2]
+            right_base = layers_scratch_base[i - 1][j * 2 + 1]
+            child_base_value = left_base * right_base
+            if child_value != child_base_value:
+                layers_scratch[i][j] ^= 1
+                maybe_evaluate_next.add(int(j / 2))
+        maybe_evaluate = maybe_evaluate_next
+    return layers_scratch[-1][0]
+
+def evaluate(layers, candidate, x, layers_scratch, compute_scratch):
+    global N
+    for i in range(0, len(layers_scratch)):
+        scratch = layers_scratch[i]
+        if i == 0:
+            for j in range(0, candidate.node_count):
+                value = 0
+                np.multiply(candidate.offsets[j], x, compute_scratch)
+                value ^= np.sum(compute_scratch) % 2
+                scratch[j] = value
+        else:
+            layer = layers[i - 1]
+            for j in range(0, layer.node_count):
+                value = 0
+                np.multiply(layer.offsets[j], x, compute_scratch)
+                value ^= np.sum(compute_scratch) % 2
+                left = layers_scratch[i - 1][j * 2]
+                right = layers_scratch[i - 1][j * 2 + 1]
+                value ^= left * right
+                scratch[j] = value
+    return layers_scratch[-1][0]
+
+@timeit
+def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch):
+    global M, N
+    scores.fill(0)
+    unique_candidates = {}
+    for j in range(0, num_candidates):
+        create_candidate(probabilities, candidates[j])        
+        unique_candidates[candidate_str(candidates[j])] = j
+
+    for i in range(0, sample_size):
+        populate_layers_scratch(layers, inputs[i], layers_scratch_base, int_scratch)
+        for _, j in unique_candidates.items():
+            candidate = candidates[j]
+            outputs[j][i] = evaluate_cached(candidate, inputs[i], layers_scratch, layers_scratch_base, int_scratch)
+            # if outputs[j][i] != evaluate(layers, candidate, inputs[i], layers_scratch, int_scratch):
+            #     print('Uh-oh')
+    for _, j in unique_candidates.items():
+        candidate = candidates[j]
+        np.subtract(outputs[j], expected_outputs, output_xor)
+        np.mod(output_xor, M, output_xor)
+        scores[j] = coherence(output_xor, distances)
+
+@timeit
+def update_probabilities(probabilities, candidates, inputs, scores, scale):
+    global N
+    num_candidates = len(candidates)
+
+    probabilities.offset_coherences.fill(-1)
+    for p in range(0, num_candidates):
+        candidate = candidates[p]
+        score = scores[p]
+        if score == 0:
+            continue
+        for j in range(0, probabilities.node_count):
+            for k in range(0, N + 1):
+                i = candidate.offsets[j][k]
+                for m in range(0, probabilities.node_count):
+                    for n in range(0, N + 1):
+                        l = candidate.offsets[m][n]
+                        probabilities.offset_coherences[i][j][k][l][m][n] = max(score, probabilities.offset_coherences[i][j][k][l][m][n])
+
+    # for i in range(0, 2):
+    #     for j in range(0, probabilities.node_count):
+    #         for k in range(0, N + 1):
+    #             for l in range(0, 2):
+    #                 for m in range(0, probabilities.node_count):
+    #                     for n in range(0, N + 1):
+    #                         offset_max = 0
+    #                         offset_sum = 0
+    #                         offset_count = 0
+    #                         for p in range(0, num_candidates):
+    #                             candidate = candidates[p]
+    #                             if candidate.offsets[j][k] != i:
+    #                                 continue
+    #                             if candidate.offsets[m][n] != l:
+    #                                 continue
+    #                             if scores[p] == 0:
+    #                                 continue
+    #                             offset_max = max(offset_max, scores[p])
+    #                             offset_sum += scores[p]
+    #                             offset_count += 1
+    #                         if offset_max == 0:
+    #                             continue
+    #                         probabilities.offset_coherences[i][j][k][l][m][n] = offset_max
+
+    for j in range(0, probabilities.node_count):
+        for k in range(0, N + 1):
+            for m in range(0, probabilities.node_count):
+                for n in range(0, N + 1):
+                    # if j == m and k == n:
+                    #     continue
+                    p_j1_if_m0 = probabilities.offset_coherences[1][j][k][0][m][n]
+                    p_j0_if_m0 = probabilities.offset_coherences[0][j][k][0][m][n]
+                    p_j1_if_m1 = probabilities.offset_coherences[1][j][k][1][m][n]
+                    p_j0_if_m1 = probabilities.offset_coherences[0][j][k][1][m][n]
+                    if p_j1_if_m0 >= 0 and p_j0_if_m0 >= 0:
+                        delta_if_m0 = p_j1_if_m0 - p_j0_if_m0
+                        probabilities.deltas[j][k][0][m][n] = delta_if_m0
+                    if p_j1_if_m1 >= 0 and p_j0_if_m1 >= 0:
+                        delta_if_m1 = p_j1_if_m1 - p_j0_if_m1
+                        probabilities.deltas[j][k][1][m][n] = delta_if_m1
+
+    p_offsets_next = np.zeros((probabilities.node_count, N + 1))
+    p_offsets_next.fill(0.5)
+    for j in range(0, probabilities.node_count):
+        for k in range(0, N + 1):
+            for m in range(0, probabilities.node_count):
+                for n in range(0, N + 1):
+                    # if j == m and k == n:
+                    #     continue
+                    delta = probabilities.deltas[j][k][1][m][n] * probabilities.p_offsets[m][n] + probabilities.deltas[j][k][0][m][n] * (1 - probabilities.p_offsets[m][n])
+                    p_offsets_next[j][k] += delta * scale
+                    # if delta > 0 and probabilities.node_count > 1:
+                    #     q = j ^ 0b1
+                    #     p_offsets_next[q][k] -= delta * scale
+    
+    inertia = 0
+    for j in range(0, probabilities.node_count):
+        for k in range(0, N + 1):
+            value = clamp(p_offsets_next[j][k], 0, 1)
+            inertia += abs(probabilities.p_offsets[j][k] - value)
+            probabilities.p_offsets[j][k] = value
+
+    return inertia
+
+def create_candidate(probabilities, candidate):
+    global N
+    for i in range(0, probabilities.node_count):
+        for j in range(0, N + 1):
+            candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0
+
+def copy_candidate(src, dest):
+    global N
+    for i in range(0, src.node_count):
+        for j in range(0, N + 1):
+            dest.offsets[i][j] = src.offsets[i][j]
+
+def p(x):
+    return math.ceil(x * 100) / 100
+
+def p_a(x):
+    return [p(z) for z in x]
+
+def print_probabilities(probabilities):
+    print('=====================')
+    for i in range(0, probabilities.node_count):
+        print(i, p_a(probabilities.p_offsets[i]))
+    print('=====================')
+
+def candidate_str(candidate):
+    global N
+    build_str = ''
+    for i in range(0, candidate.node_count):
+        for j in range(0, N + 1):
+            build_str += str(candidate.offsets[i][j])
+    return build_str
+
+def main():
+    global N, M
+    sample_size = 64
+    num_candidates = 100
+    num_survivors = 8
+    output_xor = np.zeros(sample_size,)
+    scratch = np.zeros((N + 1,))
+    int_scratch = np.zeros((N + 1,)).astype(np.int32)
+    g = test_fn
+    expected_outputs = np.zeros((sample_size,))
+    inputs = random_sample(sample_size, N)
+    distances = np.zeros((sample_size, sample_size))
+    populate_distances(inputs, distances, scratch)
+    for i in range(0, sample_size):
+        expected_outputs[i] = g(inputs[i])
+    outputs = np.zeros((num_candidates + num_survivors, sample_size,))
+    scores = np.zeros((num_candidates + num_survivors,))
+
+    layers = []
+    layers_scratch = [np.zeros(1, ).astype(np.int32)]
+    layers_scratch_base = [np.zeros(1, ).astype(np.int32)]
+    layer = 0
+
+    # for i in range(0, sample_size):
+    #     outputs[0][i] = candidate_fn(inputs[i])
+
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    score = coherence(output_xor, distances)
+    # print(score)
+
+    # for i in range(0, sample_size):
+    #     outputs[0][i] = true_fn(inputs[i])
+
+    # np.subtract(outputs[0], expected_outputs, output_xor)
+    # np.mod(output_xor, M, output_xor)
+    # score = coherence(output_xor, distances)
+    # print(score)
+    # return
+
+    while score < 1:
+        probabilities = Probabilities(layer)
+        candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)]
+        inertia = 1
+        epoch = 1
+        while inertia > 0.001:
+            compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch)
+            round_inertia = update_probabilities(probabilities, candidates, inputs, scores, epoch / 1000.0)
+            inertia = 0.9 * inertia + 0.1 * round_inertia
+                    
+            print_probabilities(probabilities)
+            for candidate in layers:
+                print(candidate.offsets)
+            print(np.max(scores), round_inertia, inertia)
+
+            top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
+
+            for i in range(0, num_survivors):
+                src_index = top_n[i]
+                dest_index = num_candidates + i
+                if src_index == dest_index:
+                    continue
+                src = candidates[src_index]
+                dest = candidates[dest_index]
+                candidates[dest_index] = src
+                candidates[src_index] = dest
+
+            inputs = random_sample(sample_size, N)
+            populate_distances(inputs, distances, scratch)
+            for i in range(0, sample_size):
+                expected_outputs[i] = g(inputs[i])
+            epoch += 1
+
+        candidate = probabilities.flatten()
+        for j in range(0, sample_size):
+            outputs[0][j] = evaluate(layers, candidate, inputs[j], layers_scratch, int_scratch)
+            np.subtract(outputs[0], expected_outputs, output_xor)
+            np.mod(output_xor, M, output_xor)
+        score = coherence(output_xor, distances)
+
+        layers.insert(0, candidate)
+        layer += 1
+        layers_scratch.insert(0, np.zeros(2 ** layer,).astype(np.int32))
+        layers_scratch_base.insert(0, np.zeros(2 ** layer,).astype(np.int32))
+
+    for candidate in layers:
+        print(candidate.offsets)
+
+if __name__ == "__main__":
+    main()
--- a/mutations9.py
+++ b/mutations9.py
@ -0,0 +1,414 @@
+from enum import unique
+import hashlib
+import math
+import numpy as np
+import random
+import time
+
+N = 8
+M = 2
+
+def vec_to_int(x):
+    global N
+    z = 0
+    for i in range(0, N + 1):
+        z <<= 1
+        z |= x[i]
+    return z
+
+def timeit(f):
+    def timed(*args, **kw):
+        ts = time.time()
+        result = f(*args, **kw)
+        te = time.time()
+
+        print('func:%r took: %2.4f sec' % (f.__name__, te-ts))
+        return result
+    return timed
+
+class Candidate:
+    def __init__(self, layer):
+        global N
+        self.layer = layer
+        self.node_count = layer
+        self.offsets = np.zeros((self.node_count, N + 1)).astype(np.int32)
+
+class Probabilities:
+    def __init__(self, layer):
+        global N
+        self.layer = layer
+        self.node_count = layer
+        self.p_offsets = np.zeros((self.node_count, N + 1))
+        self.p_offsets.fill(0.5)
+        self.offset_coherences = np.zeros((2, self.node_count, N + 1, 2, self.node_count, N + 1))
+        self.offset_coherences.fill(-1)
+        self.deltas = np.zeros((self.node_count, N + 1, 2, self.node_count, N + 1))
+
+    def has_converged(self):
+        global N
+        for i in range(0,self.node_count):
+            for j in range(0, N + 1):
+                if self.p_offsets[i][j] > 0.05 and self.p_offsets[i][j] < 0.95:
+                    return False
+        return True
+
+    def flatten(self):
+        global N
+        candidate = Candidate(self.layer)
+        for i in range(0, self.node_count):
+            for j in range(0, N + 1):
+                candidate.offsets[i][j] = 1 if self.p_offsets[i][j] >= 0.95 else 0
+        return candidate
+
+def clamp(x, min_value = 0.01, max_value = 1):
+    return min(max(x, min_value), max_value)
+
+def encode(v):
+    global N
+    byte_values = []
+    for i in range(0, math.ceil(N / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if index >= len(v):
+                continue
+            x <<= 1
+            x |= int(v[index])
+        byte_values.append(x)
+    return bytearray(byte_values)
+
+# 00100111 x4
+# 00000110 x1
+def sha(v):
+    global M
+    x = encode(v)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] % M
+
+def xor(x):
+    num_one_bits = 0
+    for i in range(0, len(x)):
+        if i == 0:
+            continue
+        num_one_bits += x[i]
+    return num_one_bits % 2
+
+
+# 0 ^ 1 ^ (2 ^ (4 * (5 ^ 0 * 7))) * (3 ^ 6 * 7)
+# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * (5 ^ 0 * 7)) ^ 4 * 6 * 7 * (5 ^ 0 * 7)
+# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * 5 ^ 0 * 3 * 4 * 7 ^ 4 * 5 * 6 * 7 ^ 0 * 4 * 6 * 7
+
+# 0 ^ 1 ^ 2*3 ^ 2*6*7 ^ 3*4*5 ^ 0*3*4*7 ^ 4*5*6*7 ^ 0*4*6*7
+def test_fn(x):
+    # 0 1
+    # 2 | 3
+    # 4 | 5 | 6 | 7
+    #   |   | 0 | 7 |   |   |   |
+    return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7])))
+
+def candidate_fn(x):
+    return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2])
+
+def true_fn(x):
+    return x[0] ^ x[1] ^ (x[3] * x[2])
+
+def hamming_distance(a, b, scratch):
+    np.logical_xor(a, b, scratch)
+    return sum(scratch)
+
+def coherence(outputs, distances):
+    coherences = []
+    for i in range(0, len(outputs)):
+        y_a = outputs[i]
+        numerator = 0
+        denominator = 0
+        for j in range(0, len(outputs)):
+            if i == j:
+                continue
+            y_b = outputs[j]
+            weight = distances[i][j]
+            denominator += weight
+            if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
+                numerator += weight
+        coherence = numerator / denominator if denominator > 0 else 0
+        coherences.append(coherence)
+    return sum(coherences) / len(coherences)
+
+def random_sample(m, n):
+    inputs = np.zeros((m, n + 1)).astype(np.int32)
+    for i in range(0, m):
+        for j in range(0, n):
+            inputs[i][j] = random.randint(0, 1)
+        inputs[i][n] = 1
+    return inputs
+
+def populate_distances(inputs, distances, scratch):
+    for i in range(0, len(inputs)):
+        x_a = inputs[i]
+        for j in range(0, len(inputs)):
+            if i == j:
+                continue
+            x_b = inputs[j]
+            distance = hamming_distance(x_a, x_b, scratch)
+            distances[i][j] = 1.0 / (2 ** distance)
+
+def evaluate(layers, candidate, x, compute_scratch):
+    global N
+    z = evaluate_layers(layers, x, compute_scratch)
+    z ^= evaluate_candidate(candidate, x, compute_scratch)
+    return z
+
+def evaluate_layers(layers, x, compute_scratch):
+    global N
+    z = 0
+    for layer in layers:
+        z ^= evaluate_candidate(layer, x, compute_scratch)
+    return z
+
+def evaluate_candidate(candidate, x, compute_scratch):
+    y = 1
+    for j in range(0, candidate.node_count):
+        value = 0
+        np.multiply(candidate.offsets[j], x, compute_scratch)
+        value ^= np.sum(compute_scratch) % 2
+        y &= value
+    return y
+
+@timeit
+def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch):
+    global M, N
+
+    for i in range(0, sample_size):
+        outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch)
+    for j in range(1, num_candidates):
+        np.copyto(outputs[j], outputs[0])
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    base_score = coherence(output_xor, distances)
+
+    scores.fill(0)
+    unique_candidates = {}
+    for j in range(0, num_candidates):
+        create_candidate(probabilities, candidates[j])        
+        unique_candidates[candidate_str(candidates[j])] = j
+
+    for i in range(0, sample_size):
+        for _, j in unique_candidates.items():
+            candidate = candidates[j]
+            outputs[j][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
+    for _, j in unique_candidates.items():
+        candidate = candidates[j]
+        np.subtract(outputs[j], expected_outputs, output_xor)
+        np.mod(output_xor, M, output_xor)
+        score = coherence(output_xor, distances)
+        scores[j] = score
+    return base_score
+
+
+def compute_uplift(candidate, layers, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch):
+    global M, N
+
+    for i in range(0, sample_size):
+        outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch)
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    base_score = coherence(output_xor, distances)
+
+    for i in range(0, sample_size):
+        outputs[0][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
+
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    score = coherence(output_xor, distances)
+    return (base_score, score)
+
+@timeit
+def update_probabilities(probabilities, candidates, inputs, base_score, scores, scale):
+    global N
+    num_candidates = len(candidates)
+
+    probabilities.offset_coherences.fill(-1)
+    for p in range(0, num_candidates):
+        candidate = candidates[p]
+        if scores[p] == 0:
+            continue
+        # score = max(scores[p], base_score)
+        score = scores[p]
+        for j in range(0, probabilities.node_count):
+            for k in range(0, N + 1):
+                i = candidate.offsets[j][k]
+                for m in range(0, probabilities.node_count):
+                    for n in range(0, N + 1):
+                        l = candidate.offsets[m][n]
+                        probabilities.offset_coherences[i][j][k][l][m][n] = max(score, probabilities.offset_coherences[i][j][k][l][m][n])
+
+    p_offsets_next = np.zeros((probabilities.node_count, N + 1))
+    inertia = 0
+    for j in range(0, probabilities.node_count):
+        for k in range(0, N + 1):
+            delta = 0
+            count = 0
+            for m in range(0, probabilities.node_count):
+                for n in range(0, N + 1):
+                    # if j == m and k == n:
+                    #     continue
+                    p_j1_if_m0 = probabilities.offset_coherences[1][j][k][0][m][n]
+                    p_j0_if_m0 = probabilities.offset_coherences[0][j][k][0][m][n]
+                    p_j1_if_m1 = probabilities.offset_coherences[1][j][k][1][m][n]
+                    p_j0_if_m1 = probabilities.offset_coherences[0][j][k][1][m][n]
+                    if p_j1_if_m0 >= 0 and p_j0_if_m0 >= 0:
+                        # delta_if_m0 = (p_j1_if_m0 - base_score) - (p_j0_if_m0 - base_score)
+                        delta_if_m0 = p_j1_if_m0 - p_j0_if_m0
+                        delta += delta_if_m0 * (1.0 - probabilities.p_offsets[m][n]) * scale
+                        count += 1
+                    if p_j1_if_m1 >= 0 and p_j0_if_m1 >= 0:
+                        # delta_if_m1 = (p_j1_if_m1 - base_score) - (p_j0_if_m1 - base_score)
+                        delta_if_m1 = p_j1_if_m1 - p_j0_if_m1
+                        delta += delta_if_m1 * probabilities.p_offsets[m][n] * scale
+                        count += 1
+            if count > 0:
+                delta /= count
+            p_offsets_next[j][k] = clamp(probabilities.p_offsets[j][k] + delta, 0, 1)
+            inertia += abs(p_offsets_next[j][k] - probabilities.p_offsets[j][k])
+
+    for j in range(0, probabilities.node_count):
+        for k in range(0, N + 1):
+            p_offset_next = 0.9 * probabilities.p_offsets[j][k] + 0.1 * p_offsets_next[j][k]
+            # if p_offset_next <= 0.05:
+            #     p_offset_next = 0.0
+            # elif p_offset_next >= 0.95:
+            #     p_offset_next = 1.0
+            probabilities.p_offsets[j][k] = p_offset_next
+
+    return inertia
+
+def create_candidate(probabilities, candidate):
+    global N
+    for i in range(0, probabilities.node_count):
+        for j in range(0, N + 1):
+            candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0
+
+def copy_candidate(src, dest):
+    global N
+    for i in range(0, src.node_count):
+        for j in range(0, N + 1):
+            dest.offsets[i][j] = src.offsets[i][j]
+
+def p(x):
+    return math.ceil(x * 100) / 100
+
+def p_a(x):
+    return [p(z) for z in x]
+
+def print_probabilities(probabilities):
+    print('=====================')
+    for i in range(0, probabilities.node_count):
+        print(i, p_a(probabilities.p_offsets[i]))
+    print('=====================')
+
+def candidate_str(candidate):
+    global N
+    build_str = ''
+    for i in range(0, candidate.node_count):
+        for j in range(0, N + 1):
+            build_str += str(candidate.offsets[i][j])
+    return build_str
+
+def main():
+    global N, M
+    sample_size = 64
+    num_candidates = 100
+    num_survivors = 1
+    uplift_sample_size = 100
+    output_xor = np.zeros(sample_size,)
+    scratch = np.zeros((N + 1,))
+    int_scratch = np.zeros((N + 1,)).astype(np.int32)
+    g = test_fn
+    expected_outputs = np.zeros((sample_size,))
+    inputs = random_sample(sample_size, N)
+    distances = np.zeros((sample_size, sample_size))
+    populate_distances(inputs, distances, scratch)
+    for i in range(0, sample_size):
+        expected_outputs[i] = g(inputs[i])
+    outputs = np.zeros((num_candidates + num_survivors, sample_size,)).astype(np.int32)
+    scores = np.zeros((num_candidates + num_survivors,))
+
+    layers = []
+    layer = 1
+
+    np.subtract(outputs[0], expected_outputs, output_xor)
+    np.mod(output_xor, M, output_xor)
+    score = coherence(output_xor, distances)
+
+    while score < 1:
+        probabilities = Probabilities(layer)
+        candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)]
+        inertia = 1
+        epoch = 1
+        while inertia > 0.001 and epoch < 1000 and not probabilities.has_converged():
+            base_score = compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch)
+            round_inertia = update_probabilities(probabilities, candidates, inputs, base_score, scores, 1 + 0.01 * epoch)
+            inertia = 0.9 * inertia + 0.1 * round_inertia
+                    
+            print_probabilities(probabilities)
+            for candidate in layers:
+                print(candidate.offsets)
+            max_score = np.max(scores)
+            print(base_score, max_score,round_inertia, inertia)
+
+            top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
+
+            for i in range(0, num_survivors):
+                src_index = top_n[i]
+                dest_index = num_candidates + i
+                if src_index == dest_index:
+                    continue
+                src = candidates[src_index]
+                dest = candidates[dest_index]
+                candidates[dest_index] = src
+                candidates[src_index] = dest
+
+            inputs = random_sample(sample_size, N)
+            populate_distances(inputs, distances, scratch)
+            for i in range(0, sample_size):
+                expected_outputs[i] = g(inputs[i])
+            epoch += 1
+
+        candidate = probabilities.flatten()
+        print(candidate.offsets)
+        for j in range(0, sample_size):
+            outputs[0][j] = evaluate(layers, candidate, inputs[j], int_scratch)
+            np.subtract(outputs[0], expected_outputs, output_xor)
+            np.mod(output_xor, M, output_xor)
+        score = coherence(output_xor, distances)
+
+        average_base_score = 0
+        average_score = 0
+        for i in range(0, uplift_sample_size):
+            inputs = random_sample(sample_size, N)
+            populate_distances(inputs, distances, scratch)
+            for i in range(0, sample_size):
+                expected_outputs[i] = g(inputs[i])
+            (base_score, score) = compute_uplift(candidate, layers, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch)
+            average_base_score += base_score
+            average_score += score
+        average_base_score /= uplift_sample_size
+        average_score /= uplift_sample_size
+        uplift = average_score - average_base_score
+        print(uplift)
+
+        if uplift <= 0:
+            layer += 1
+            continue
+
+        layers.insert(0, candidate)
+        if layer == 1:
+            layer += 1
+
+    for candidate in layers:
+        print(candidate.offsets)
+
+if __name__ == "__main__":
+    main()
--- a/mutations_cuda.py
+++ b/mutations_cuda.py
@ -0,0 +1,269 @@
+# Sample source code from the Tutorial Introduction in the documentation.
+
+import hashlib
+import numpy as np
+import math
+import pycuda.driver as cuda
+from pycuda.driver import Stream
+import pycuda.autoinit
+from pycuda.compiler import SourceModule
+import pycuda.gpuarray as gpuarray
+import random
+
+'''
+a = numpy.random.randn(4,4)
+
+a = a.astype(numpy.float32)
+
+a_gpu = cuda.mem_alloc(a.size * a.dtype.itemsize)
+
+cuda.memcpy_htod(a_gpu, a)
+
+mod = SourceModule("""
+    __global__ void doublify(float *a)
+    {
+      int idx = threadIdx.x + threadIdx.y*4;
+      a[idx] *= 2;
+    }
+    """)
+
+func = mod.get_function("doublify")
+func(a_gpu, block=(4,4,1))
+
+a_doubled = numpy.empty_like(a)
+cuda.memcpy_dtoh(a_doubled, a_gpu)
+print("original array:")
+print(a)
+print("doubled with kernel:")
+print(a_doubled)
+
+# alternate kernel invocation -------------------------------------------------
+
+func(cuda.InOut(a), block=(4, 4, 1))
+print("doubled with InOut:")
+print(a)
+
+# part 2 ----------------------------------------------------------------------
+
+a_gpu = gpuarray.to_gpu(numpy.random.randn(4,4).astype(numpy.float32))
+a_doubled = (2*a_gpu).get()
+
+print("original array:")
+print(a_gpu)
+print("doubled with gpuarray:")
+print(a_doubled)
+'''
+
+N = 8
+M = 2
+sample_size = 64
+
+def encode(v, offset):
+    byte_values = []
+    for i in range(0, math.ceil(N / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            if offset + index >= len(v):
+                break
+            x <<= 1
+            x |= int(v[offset + index])
+        byte_values.append(x)
+    return bytearray(x)
+
+def sha(v, offset):
+    global M
+    x = encode(v, offset)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] % M
+
+def create_program_r(model, output_var):
+    global N, M
+    (constant, scalars, child) = model
+    program = 'int ' + output_var + ' = ' + str(constant) + ';\n'
+    scalars_part = ' + '.join([str(scalars[i]) + ' * x[gid * ' + str(N) + ' + ' + str(i) + ']' for i in range(0, len(scalars)) if scalars[i] > 0])
+    if len(scalars_part) > 0:
+        program += output_var + ' += ' + scalars_part + ';\n'
+    if not child is None:
+        left_output = output_var + '0'
+        right_output = output_var + '1'
+        (left, right) = child
+        program += create_program_r(left, left_output)
+        program += create_program_r(right, right_output)
+        program += output_var + ' += ' + left_output + ' * ' + right_output + ';\n'
+    program += output_var + ' %= ' + str(M) + ';\n'
+    return program
+
+def create_program(model, name, offset):
+    output_var = 'output'
+    program = '__global__ void ' + name + '(const int *x, int *out) {\n'
+    program += 'int gid = threadIdx.x + blockIdx.x * blockDim.x;\n'
+    program += create_program_r(model, output_var)
+    program += 'out[' + str(offset) + ' + gid] = ' + output_var + ';\n'
+    program += '}\n'
+    return program
+
+def distances_program():
+    global N, sample_size
+    program = "__global__ void p(const int *x, double *distances) {\n"
+    program += "  int gid = threadIdx.x + blockIdx.x * blockDim.x;\n"
+    program += "  int i = gid / " + str(sample_size) + ";\n"
+    program += "  int j = gid % " + str(sample_size) + ";\n"
+    program += "  if (i == j) {\n"
+    program += "    distances[gid] = 0;\n"
+    program += "    return;\n"
+    program += "  }\n"
+    program += "  int distance = 0;\n"
+    program += "  for (int k = 0; k < " + str(N) + "; k++) {\n"
+    program += "    distance += x[i * " + str(N) + " + k] ^ x[j * " + str(N) + " + k];\n"
+    program += "  }\n"
+    program += "  distances[gid] = pow((double)2.0, (double)-distance);\n"
+    program += "}\n"
+    return program
+
+def coherence_program():
+    global sample_size
+    program = "__global__ void p(const int *y, const int *z, const double *distances, double *coherences) {\n"
+    program += "  int gid = threadIdx.x + blockIdx.x * blockDim.x;\n"
+    program += "  double numerator = 0;\n"
+    program += "  double denominator = 0;\n"
+    program += "  for (int i = 0; i < " + str(sample_size) + "; i++) {\n"
+    program += "    int p = z[i] ^ y[gid * " + str(sample_size) + " + i];\n"
+    program += "    for (int j = 0; j < " + str(sample_size) + "; j++) {\n"
+    program += "      int q = z[j] ^ y[gid * " + str(sample_size) + " + j];\n"
+    program += "      double distance = distances[i * " + str(sample_size) + " + j];\n"
+    program += "      denominator += distance;\n"
+    program += "      if (p == q) {\n"
+    program += "        numerator += distance;\n"
+    program += "      }\n"
+    program += "    }\n"
+    program += "  }\n"
+    program += "  coherences[gid] = numerator / denominator;\n"
+    program += "}\n"
+    return program
+
+def random_sample():
+    global N, sample_size
+    x = np.zeros((N * sample_size,)).astype(np.int32)
+    for i in range(0, len(x)):
+        x[i] = random.randint(0, 1)
+    return x
+
+def clone_model(model, p_mutation):
+    global N, M
+
+    p_constant = p_mutation * random.random()
+    p_flip = p_mutation * random.random()
+    p_add_child = p_mutation * random.random()
+    p_drop_child = p_mutation * random.random()
+
+    (constant, xors, child) = model
+    if random.random() < p_constant:
+        constant += random.randint(0, M - 1)
+        constant %= M
+    clone_xors = np.zeros((N,))
+    np.copyto(clone_xors, xors)
+    for i in range(0, N):
+        if random.random() < p_flip:
+            offset = 1 if M == 2 else random.randint(1, M - 1)
+            clone_xors[i] += offset
+            clone_xors[i] %= M
+    if child is None:
+        if random.random() < p_add_child:
+            left = random_child(p_mutation)
+            right = random_child(p_mutation)
+            return (constant, clone_xors, (left, right))
+        return (constant, clone_xors, None)
+    if random.random() < p_drop_child:
+        return (constant, clone_xors, None)
+    (left, right) = child
+    clone_left = clone_model(left, p_mutation)
+    clone_right = clone_model(right, p_mutation)
+    return (constant, clone_xors, (clone_left, clone_right))
+
+def random_child(p_mutation):
+    global N, M
+    constant = random.randint(0, M - 1)
+    xors = np.zeros((N,))
+
+    p_flip = p_mutation * random.random()
+    p_child = p_mutation * random.random()
+
+    index = random.randint(0, N - 1)
+    xors[index] = 1 if M == 2 else random.randint(1, M - 1)
+    for i in range(0, N):
+        if i != index and random.random() < p_flip:
+            xors[i] = 1 if M == 2 else random.randint(1, M - 1)
+    if random.random() < p_child:
+        left = random_child(p_mutation * random.random())
+        right = random_child(p_mutation * random.random())
+        return (constant, xors, (left, right))
+    return (constant, xors, None)
+
+def null_candidate():
+    global N
+    return (0, np.zeros((N,)), None)
+
+def main():
+    global N, M, sample_size
+    epochs = 1000
+    num_survivors = 100
+    num_offspring = 10
+    num_candidates = num_survivors + num_survivors * num_offspring
+    block_size = 1
+
+    x = random_sample()
+    z = np.zeros((sample_size,)).astype(np.int32)
+    coherences = np.zeros((num_candidates,)).astype(np.float64)
+    candidates = [null_candidate() for _ in range(0, num_candidates)]
+
+    for i in range(0, sample_size):
+        z[i] = sha(x, N * i)
+    # print(z)
+
+    x_gpu = cuda.mem_alloc(4 * N * sample_size)
+    cuda.memcpy_htod(x_gpu, x)
+    z_gpu = cuda.mem_alloc(4 * sample_size)
+    cuda.memcpy_htod(z_gpu, z)
+    distances_gpu = cuda.mem_alloc(8 * sample_size * sample_size)
+    coherences_gpu = cuda.mem_alloc(8 * num_candidates)
+    outputs_gpu = cuda.mem_alloc(4 * sample_size * num_candidates)
+
+    distances_kernel = SourceModule(distances_program()).get_function('p')
+    coherence_kernel = SourceModule(coherence_program()).get_function('p')
+
+    distances_kernel(x_gpu, distances_gpu, block=(block_size, 1, 1), grid=(int(sample_size * sample_size / block_size), 1, 1))
+    # distances = np.zeros((sample_size,sample_size)).astype(np.double)
+    # cuda.memcpy_dtoh(distances, distances_gpu)
+    # print(distances)
+
+    for epoch in range(0, epochs):
+        mod = SourceModule('\n'.join([create_program(candidates[i], 'k' + str(i), i * sample_size) for i in range(0, num_candidates)]))
+        stream = Stream()
+        for i in range(0, num_candidates):
+            f = mod.get_function('k' + str(i))
+            f(x_gpu, outputs_gpu, stream=stream, block=(block_size, 1, 1), grid=(int(sample_size / block_size), 1, 1))
+        stream.synchronize()
+
+        # outputs = np.zeros((sample_size * num_candidates,)).astype(np.int32)
+        # cuda.memcpy_dtoh(outputs, outputs_gpu)
+        # print(outputs)
+
+        coherence_kernel(outputs_gpu, z_gpu, distances_gpu, coherences_gpu, block=(block_size, 1, 1), grid=(int(num_candidates / block_size), 1, 1))
+        cuda.memcpy_dtoh(coherences, coherences_gpu)
+
+        top_n = sorted(range(len(coherences)), key=lambda i: coherences[i])[-num_survivors:]
+        survivors = [candidates[index] for index in top_n]
+        print(epoch, coherences[top_n[-1]])
+
+        for i in range(0, num_survivors):
+            candidate = survivors[i]
+            candidates[i] = candidate
+            for j in range(0, num_offspring):
+                index = num_survivors + j * num_survivors + i
+                candidates[index] = clone_model(candidate, random.random())
+
+if __name__ == "__main__":
+    main()
--- a/mutations_gpu.py
+++ b/mutations_gpu.py
@ -0,0 +1,207 @@
+import hashlib
+import numpy as np
+import math
+import pyopencl as cl
+import random
+
+N = 8
+M = 2
+sample_size = 64
+
+def encode(v, offset):
+    byte_values = []
+    for i in range(0, math.ceil(N / 8)):
+        x = 0
+        for j in range(0, 8):
+            index = i * 8 + j
+            x <<= 1
+            x |= int(v[offset + index])
+        byte_values.append(x)
+    return bytearray(x)
+
+def sha(v, offset):
+    global M
+    x = encode(v, offset)
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] % M
+
+def create_program_r(model, output_var):
+    global N, M
+    (constant, scalars, child) = model
+    program = 'int ' + output_var + ' = ' + str(constant) + ';\n'
+    scalars_part = ' + '.join([str(scalars[i]) + ' * x[gid * ' + str(N) + ' + ' + str(i) + ']' for i in range(0, len(scalars)) if scalars[i] > 0])
+    if len(scalars_part) > 0:
+        program += output_var + ' += ' + scalars_part + ';\n'
+    if not child is None:
+        left_output = output_var + '0'
+        right_output = output_var + '1'
+        (left, right) = child
+        program += create_program_r(left, left_output)
+        program += create_program_r(right, right_output)
+        program += output_var + ' += ' + left_output + ' * ' + right_output + ';\n'
+    program += output_var + ' %= ' + str(M) + ';\n'
+    return program
+
+def create_program(model, name, offset):
+    output_var = 'output'
+    program = '__kernel void ' + name + '(__global const int *x, __global int *out) {\n'
+    program += 'int gid = get_global_id(0);\n'
+    program += create_program_r(model, output_var)
+    program += 'out[' + str(offset) + ' + gid] = ' + output_var + ';\n'
+    program += '}\n'
+    return program
+
+def distances_program():
+    global N, sample_size
+    program = "__kernel void p(__global const int *x, __global float *distances) {\n"
+    program += "  int gid = get_global_id(0);\n"
+    program += "  int i = gid / " + str(sample_size) + ";\n"
+    program += "  int j = gid % " + str(sample_size) + ";\n"
+    program += "  float distance = 0;\n"
+    program += "  if (i == j) {\n"
+    program += "    distances[gid] = distance;\n"
+    program += "    return;\n"
+    program += "  }\n"
+    program += "  for (int k = 0; k < " + str(N) + "; k++) {\n"
+    program += "    distance += x[i * " + str(N) + " + k] ^ x[j * " + str(N) + " + k];\n"
+    program += "  }\n"
+    program += "  distances[gid] = pow(2, -distance);\n"
+    program += "}\n"
+    return program
+
+def coherence_program():
+    global sample_size
+    program = "__kernel void p(__global const int *y, __global const int *z, __global const float *distances, __global float *coherences) {\n"
+    program += "  int gid = get_global_id(0);\n"
+    program += "  float numerator = 0;\n"
+    program += "  float denominator = 0;\n"
+    program += "  for (int i = 0; i < " + str(sample_size) + "; i++) {\n"
+    program += "    int p = z[i] ^ y[gid * " + str(sample_size) + " + i];\n"
+    program += "    for (int j = 0; j < " + str(sample_size) + "; j++) {\n"
+    program += "      int q = z[j] ^ y[gid * " + str(sample_size) + " + j];\n"
+    program += "      float distance = distances[i * " + str(sample_size) + " + j];\n"
+    program += "      denominator += distance;\n"
+    program += "      if (p == q) {\n"
+    program += "        numerator += distance;\n"
+    program += "      }\n"
+    program += "    }\n"
+    program += "  }\n"
+    program += "  coherences[gid] = numerator / denominator;\n"
+    program += "}\n"
+    return program
+
+def random_sample():
+    global N, sample_size
+    x = np.zeros((N * sample_size,)).astype(np.int32)
+    for i in range(0, len(x)):
+        x[i] = random.randint(0, 1)
+    return x
+
+def clone_model(model, p_mutation):
+    global N, M
+
+    p_constant = p_mutation * random.random()
+    p_flip = p_mutation * random.random()
+    p_add_child = p_mutation * random.random()
+    p_drop_child = p_mutation * random.random()
+
+    (constant, xors, child) = model
+    if random.random() < p_constant:
+        constant += random.randint(0, M - 1)
+        constant %= M
+    clone_xors = np.zeros((N,))
+    np.copyto(clone_xors, xors)
+    for i in range(0, N):
+        if random.random() < p_flip:
+            offset = 1 if M == 2 else random.randint(1, M - 1)
+            clone_xors[i] += offset
+            clone_xors[i] %= M
+    if child is None:
+        if random.random() < p_add_child:
+            left = random_child(p_mutation)
+            right = random_child(p_mutation)
+            return (constant, clone_xors, (left, right))
+        return (constant, clone_xors, None)
+    if random.random() < p_drop_child:
+        return (constant, clone_xors, None)
+    (left, right) = child
+    clone_left = clone_model(left, p_mutation)
+    clone_right = clone_model(right, p_mutation)
+    return (constant, clone_xors, (clone_left, clone_right))
+
+def random_child(p_mutation):
+    global N, M
+    constant = random.randint(0, M - 1)
+    xors = np.zeros((N,))
+
+    p_flip = p_mutation * random.random()
+    p_child = p_mutation * random.random()
+
+    index = random.randint(0, N - 1)
+    xors[index] = 1 if M == 2 else random.randint(1, M - 1)
+    for i in range(0, N):
+        if i != index and random.random() < p_flip:
+            xors[i] = 1 if M == 2 else random.randint(1, M - 1)
+    if random.random() < p_child:
+        left = random_child(p_mutation * random.random())
+        right = random_child(p_mutation * random.random())
+        return (constant, xors, (left, right))
+    return (constant, xors, None)
+
+def null_candidate():
+    global N
+    return (0, np.zeros((N,)), None)
+
+def main():
+    global N, M, sample_size
+    epochs = 1000
+    num_survivors = 100
+    num_offspring = 10
+    num_candidates = num_survivors + num_survivors * num_offspring
+    local_work_size = (512,)
+
+    x = random_sample()
+    z = np.zeros((sample_size,)).astype(np.int32)
+    coherences = np.zeros((num_candidates,)).astype(np.float32)
+    ctx = cl.create_some_context()
+    queue = cl.CommandQueue(ctx)
+    mf = cl.mem_flags
+    candidates = [null_candidate() for _ in range(0, num_candidates)]
+
+    for i in range(0, sample_size):
+        z[i] = sha(x, N * i)
+
+    x_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=x)
+    z_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=z)
+    distances_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, 4 * sample_size * sample_size)
+    coherences_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, 4 * num_candidates)
+    outputs_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, 4 * sample_size * num_candidates)
+
+    distances_kernel = cl.Program(ctx, distances_program()).build().p
+    coherence_kernel = cl.Program(ctx, coherence_program()).build().p
+
+    distances_kernel(queue, (sample_size * sample_size,), local_work_size, x_gpu, distances_gpu)
+
+    for epoch in range(0, epochs):
+        program = cl.Program(ctx, '\n'.join([create_program(candidates[i], 'k' + '{:0>9}'.format(i), i * sample_size) for i in range(0, num_candidates)])).build()
+        for knl in program.all_kernels():
+            knl(queue, (sample_size,), local_work_size, x_gpu, outputs_gpu)
+
+        coherence_kernel(queue, (num_candidates,), local_work_size, outputs_gpu, z_gpu, distances_gpu, coherences_gpu)
+        cl.enqueue_copy(queue, coherences, coherences_gpu)
+
+        top_n = sorted(range(len(coherences)), key=lambda i: coherences[i])[-num_survivors:]
+        survivors = [candidates[index] for index in top_n]
+        print(epoch, coherences[top_n[-1]])
+
+        for i in range(0, num_survivors):
+            candidate = survivors[i]
+            candidates[i] = candidate
+            for j in range(0, num_offspring):
+                index = num_survivors + j * num_survivors + i
+                candidates[index] = clone_model(candidate, random.random())
+
+if __name__ == "__main__":
+    main()
--- a/mutations_opencl.py
+++ b/mutations_opencl.py
@ -0,0 +1,5 @@
+def main():
+    print('test')
+
+if __name__ == "__main__":
+    main()
--- a/shifts.py
+++ b/shifts.py
@ -0,0 +1,29 @@
+def remove_bit(i, n):
+    return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
+
+def main():
+    N = 65
+    mappings = {}
+    for i in range(0, N):
+        n = 0
+        g = remove_bit(i, n)
+        paths_set = set()
+        while g < i:
+            paths_set.add(g)
+            n += 1
+            g = remove_bit(i, n)
+        paths = sorted(list(paths_set))
+        mappings[i] = paths
+
+        visited_set = set()
+        stack = [paths[:]]
+        while len(stack) > 0:
+            for h in stack.pop():
+                if not h in visited_set:
+                    visited_set.add(h)
+                    stack.append(mappings[h])
+        visited = sorted(list(visited_set))
+        print(i, len(visited))
+
+if __name__ == "__main__":
+    main()
--- a/space_analysis.py
+++ b/space_analysis.py
@ -0,0 +1,142 @@
+import numpy as np
+
+def decode(x, N):
+    index = 0
+    output = np.zeros((N))
+    while x > 0 and index < N:
+        output[index] = x & 0b1
+        x >>= 1
+        index += 1
+    return output
+
+def hamming_distance(a, b):
+    return np.sum(np.logical_xor(a, b))
+
+def xor(x, bits):
+    return np.sum(x[:bits]) % 2
+
+#                2
+#              4,  4,
+#            6,  8,  6
+#          8, 12, 12,  8
+#       10, 16, 18, 16, 10
+#     12, 20, 24, 24, 20, 12
+#   14, 24, 30, 32, 30, 24, 14
+# 16, 28, 36, 40, 40, 36, 28, 16
+
+#                1
+#              2,  2
+#            3,  4,  3
+#          4,  6,  6,  4
+#        5,  8,  9,  8,  5
+#      6, 10, 12, 12, 10,  6
+#    7, 12, 15, 16, 15, 12, 7
+
+#                6,   0,   6
+#             24,  12,  12,  24
+#          60,  48,  36,  48,  60
+#       120, 120,  96,  96, 120, 120
+#     210, 240, 210, 192, 210, 240, 210
+#   336, 420, 396, 360, 360, 396, 420, 336
+# 504, 672, 672, 624, 600, 624, 672, 672, 504
+
+
+#              1,   0,   1
+#            4,   2,   2,   4
+#         10,   8,   6,   8,  10
+#       20,  20,  16,  16,  20,  20
+#     35,  40,  35,  32,  35,  40,  35
+#   56,  70,  66,  60,  60,  66,  70,  56
+# 84, 112, 112, 104, 100, 104, 112, 112,  84
+
+#             
+#         20,    0,   20,    0,   20,
+#      120,   40,   80,   80,   40,  120
+#    420,  240,  260,  320,  260,  240,  420
+# 1120,  840,  760,  880,  880,  760,  840, 1120
+
+#          1,  0,  1,  0,  1
+#        6,  2,  4,  4,  2,  6
+#      21, 12, 13, 16, 13, 12, 21
+#    56, 42, 38, 44, 44, 38, 42, 56
+
+#    70,   0,  70,   0,  70,   0,  70
+# 560, 140, 420, 280, 280, 420, 140, 560
+
+#    252,    0,  252,    0,  252,    0,  252,    0,  252
+# 2520,  504, 2016, 1008, 1512, 1512, 1008, 2016,  504, 2520
+
+# 1, 2, 3, 4,
+# 1, 3, 6, 10
+# 1, 4, 10, 20
+# 1, 5, 15, 35
+# 1, 6, 
+
+#         1,  2,  1
+#       1,  3,  3,  1
+#     1,  4,  6,  4,  1
+#   1,  5, 10, 10,  5,  1
+# 1,  6, 15, 20, 15, 6,   1
+
+# 2, 6, 12, 20, 30, 42,   56
+#        6, 30, 90, 210, 420
+#               20, 140, 560,
+#                         70
+
+# 1, 3, 6, 10, 15, 21, 28
+#       1,  5, 15, 35
+
+def main():
+    N = 8
+    points = []
+    for i in range(0, 2 ** N):
+        points.append(decode(i, N))
+    
+    bands = [[[] for _ in range(0, N + 1)] for _ in range(0, len(points))]
+    for i in range(0, len(points)):
+        a = points[i]
+        for j in range(0, len(points)):
+            if i == j:
+                continue
+            b = points[j]
+            distance = hamming_distance(a, b)
+            bands[i][distance].append(b)
+
+    incoherent_distances = np.zeros((N + 1, N + 1))
+    for k in range(0, N + 1):
+        print(k, '================================')
+        for t in range(0, 1):
+            x_a = points[t]
+            y_a = xor(x_a, k)
+            incoherent_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
+            total_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
+            for distance in range(0, N + 1):
+                band = bands[0][distance]
+                for x_b in band:
+                    y_b = xor(x_b, k)
+                    if y_a != y_b:
+                        incoherent_distances[k][distance] += 1
+                
+                if len(band) < 2:
+                    continue
+                for band_origin in range(0, len(band)):
+                    x_p = band[band_origin]
+                    y_p = xor(x_p, k)
+                    for i in range(0, len(band)):
+                        if i == band_origin:
+                            continue
+                        x_q = band[i]
+                        y_q = xor(x_q, k)
+                        band_distance = hamming_distance(x_p, x_q)
+                        total_bands[distance][band_distance] += 1
+                        if y_p != y_q:
+                            incoherent_bands[distance][band_distance] += 1
+            print(incoherent_bands)
+            print(total_bands)
+                # print(distance, hamming_distance(x_p, x_q), y_p, y_q)
+        
+    print(incoherent_distances)
+    # print(bands)
+    
+if __name__ == "__main__":
+    main()
--- a/space_analysis2.py
+++ b/space_analysis2.py
@ -0,0 +1,255 @@
+import math
+import numpy as np
+
+def decode(x, N):
+    index = 0
+    output = np.zeros((N))
+    while x > 0 and index < N:
+        output[index] = x & 0b1
+        x >>= 1
+        index += 1
+    return output
+
+def hamming_distance(a, b):
+    return np.sum(np.logical_xor(a, b))
+
+def xor(x, bits):
+    return np.sum(x[:bits]) % 2
+
+def compute_pyramids(N):
+    num_orders = max(int(N / 2), 1)
+    pyramids = np.zeros((num_orders, N, N)).astype(np.int32)
+    for i in range(2, N):
+        for j in range(1, i):
+            pyramids[0][i][j] = j
+    for order in range(1, num_orders):
+        # build out the first column
+        acc = 0
+        for i in range(order * 2 + 2, N):
+            acc += pyramids[order - 1][i - 2][1]
+            pyramids[order][i][1] = acc
+        # accumulate the first column and place it on the diagonal(s)
+        for k in range(0, int(order / 2) + 1):
+            acc = 0
+            for i in range(order * 2 + 2, N):
+                acc += pyramids[order][i][1]
+                pyramids[order][i][i - 1 - 2 * k] = acc
+        # for odd, copy the first column to the first diagonal
+        if order % 2 == 1:
+            k += 1
+            for i in range(order * 2 + 2, N):
+                pyramids[order][i][i - 1 - 2 * k] = pyramids[order][i][1]
+        # integrate under the diagonal
+        inset = 1
+        for j in reversed(range(2, N - 2 * k - 2)):
+            acc = pyramids[order][N - inset - 1][j]
+            for i in range(N - inset, N):
+                acc += pyramids[order - 1][i - 2][j]
+                pyramids[order][i][j] = acc
+            if order * 2 + 2 < N - inset:
+                inset += 1
+    return pyramids
+
+def compute_pyramids_full(N):
+    num_orders = max(int(N / 2), 1)
+    pyramids = np.zeros((num_orders, N, N)).astype(np.int32)
+    # 1st order can be filled in as multiplication and forms the base case
+    for i in range(0, N):
+        for j in range(0, i + 1):
+            pyramids[0][i][j] = (i - j + 1) * (j + 1)
+    for order in range(1, num_orders):
+        offset = order * 2
+
+        # fill in the LHS and diagonal
+        for i in range(0, N - offset):
+            value = math.comb(2 * (order + 1) + i - 1, i)
+            pyramids[order][i + offset][0] = value
+            # mirror
+            pyramids[order][i + offset][i + offset] = value
+
+        # accumulate along the diagonals
+        for i in range(1, N):
+            value = pyramids[order][i][0]
+            acc = value
+            for j in range(1, N - i):
+                value += acc
+                pyramids[order][i + j][j] = value
+                acc += pyramids[order - 1][i + j - 1][j - 1]
+
+    return pyramids
+
+def get_total_band_count_2(distance, band_distance, N):
+    if band_distance % 2 == 1:
+        return 0
+    order = int(band_distance / 2) - 1
+    if order < 0:
+        return 0
+    if distance < order + 1:
+        return 0
+    if distance > N - order - 1:
+        return 0
+    order_root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2
+    scale = math.comb(N - (order + 1) * 2, distance - order - 1)
+    value = math.comb(2 * (order + 1) + N - 2 * (order + 1), N - 2 * (order + 1))
+    return order_root * scale * value
+
+def get_incoherent_band_count_2(pyramids, distance, band_distance, k, N):
+    if k == 0 or k == N or band_distance % 2 == 1:
+        return 0
+    order = int(band_distance / 2) - 1
+    if order < 0:
+        return 0
+    if distance < order + 1:
+        return 0
+    if distance > N - order - 1:
+        return 0
+    order_root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2
+    scale = math.comb(N - (order + 1) * 2, distance - order - 1)
+    value = pyramids[order][N - 2][k - 1]
+    return order_root * scale * value
+
+    # pyramid = pyramids[order]
+    # offset = (N - 1 - order) - distance
+    # multiplier = pyramid[2 * order + offset][2 * order + 1 + offset]
+    # row = N - offset
+    # column = k
+    # value = pyramid[row][column]
+    # return multiplier * value
+
+def get_incoherent_band_count(pyramids, distance, band_distance, k, N):
+    if k == 0 or k == N or band_distance % 2 == 1:
+        return 0
+    order = int(band_distance / 2) - 1
+    if order < 0:
+        return 0
+    if distance < order + 1:
+        return 0
+    if distance > N - order - 1:
+        return 0
+    if distance < k:
+        distance = N - distance
+        k = N - k
+    pyramid = pyramids[order]
+    offset = (N - 1 - order) - distance
+    multiplier = pyramid[2 * order + 2 + offset][2 * order + 1 + offset]
+    row = N - offset
+    column = k
+    value = pyramid[row][column]
+    return multiplier * value
+
+def get_total_band_count(pyramids, distance, band_distance, N):
+    if band_distance % 2 == 1:
+        return 0
+    order = int(band_distance / 2) - 1
+    if order < 0:
+        return 0
+    if distance < order + 1:
+        return 0
+    if distance > N - order - 1:
+        return 0
+    pyramid = pyramids[order]
+    offset = (N - 1 - order) - distance
+    length = N + 1 - 2 * (order + 1)
+    a = pyramid[2 * order + 2 + offset][2 * order + 1 + offset]
+    b = pyramid[2 * order + 2 + (length - offset - 1)][2 * order + 1 + (length - offset - 1)]
+    return a * b
+
+# def compute_band_distances(pyramids, N):
+#     num_orders = max(int(N / 2), 1)
+#     incoherent_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32)
+#     for order in range(0, num_orders):
+#         band_distance = (order + 1) * 2
+#         for k in range(1, N):
+            
+
+#     for k in range(0, N + 1):
+#         for distance in range()
+
+def main():
+    # N = 8
+    # print(compute_pyramids_full(N))
+    # total_distances = np.zeros((N + 1, N + 1)).astype(np.int32)
+    # for i in range(0, N + 1):
+    #     for j in range(0, N + 1):
+    #         total_distances[i][j] = get_total_band_count_2(i, j, N)
+    # print(total_distances)
+    # return
+
+    max_N = 8
+    orders = [np.zeros((max_N + 1, max_N + 1)).astype(np.int32) for _ in range(0, max_N)]
+
+    print('Attempting discrete solution...')
+    pyramids = compute_pyramids_full(max_N + 1)
+
+    for N in range(max_N, max_N + 1):
+    # for N in range(2, max_N + 1):
+        print('=============================')
+        print('N@', N)
+        print('Generating points...')
+        points = []
+        for i in range(0, 2 ** N):
+            points.append(decode(i, N))
+        
+        print('Computing bands...')
+        bands = [[] for _ in range(0, N + 1)]
+        for i in range(1, len(points)):
+            distance = hamming_distance(points[0], points[i])
+            bands[distance].append(points[i])
+
+        print('Computing band distances...')
+        incoherent_distances = np.zeros((N + 1, N + 1))
+        for k in range(0, N + 1):
+            print('k@', k)
+            # print(k, '================================')
+            x_a = points[0]
+            y_a = xor(x_a, k)
+            incoherent_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
+            precomputed_incoherent_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
+            total_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
+            precomputed_total_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
+            for distance in range(0, N + 1):
+                band = bands[distance]
+                for x_b in band:
+                    y_b = xor(x_b, k)
+                    if y_a != y_b:
+                        incoherent_distances[k][distance] += 1
+                
+                if len(band) < 2:
+                    continue
+                for band_origin in range(0, len(band)):
+                    x_p = band[band_origin]
+                    # print(x_p)
+                    y_p = xor(x_p, k)
+                    for i in range(0, len(band)):
+                        if i == band_origin:
+                            continue
+                        x_q = band[i]
+                        y_q = xor(x_q, k)
+                        band_distance = hamming_distance(x_p, x_q)
+                        total_bands[distance][band_distance] += 1
+                        if y_p != y_q:
+                            incoherent_bands[distance][band_distance] += 1
+                for band_distance in range(0, N + 1):
+                    precomputed_incoherent_bands[distance][band_distance] = get_incoherent_band_count_2(pyramids, distance, band_distance, k, N)
+                    precomputed_total_bands[distance][band_distance] = get_total_band_count_2(distance, band_distance, N)
+            # print(incoherent_bands)
+            for order in range(0, int(N / 2)):
+                root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2
+                index = order * 2 + 2
+                orders[order][N][k] = incoherent_bands[-2 - order][index] / root
+
+            print(incoherent_bands)
+            print(precomputed_incoherent_bands)
+            print(total_bands)
+            print(precomputed_total_bands)
+            # print(total_bands)
+                    # print(distance, hamming_distance(x_p, x_q), y_p, y_q)
+    # for i in range(0, len(orders)):
+    #     print(orders[i])
+    #     # print(pyramids[i])
+    #     print('========================================')
+        # print(incoherent_distances)
+    # print(bands)
+    
+if __name__ == "__main__":
+    main()
--- a/space_analysis3.py
+++ b/space_analysis3.py
@ -0,0 +1,385 @@
+import math
+import numpy as np
+import sys
+
+np.set_printoptions(threshold=sys.maxsize)
+
+cache = {}
+def p_bernoulli(n, k, m, j):
+    key = (n, k, m, j)
+    if key in cache:
+        return cache[key]
+    probabilities = np.zeros((n + 1, n + 1))
+    probabilities.fill(-1)
+    stack = [(0,0)]
+    while len(stack) > 0:
+        (a, b) = stack.pop()
+        if a + b == n:
+            probabilities[a][b] = 1 if a == k else 0
+        elif a > j:
+            probabilities[a][b] = 0
+        elif b > (m - j):
+            probabilities[a][b] = 0
+        else:
+            p_left = probabilities[a + 1][b]
+            p_right = probabilities[a][b + 1]
+            if p_left >= 0 and p_right >= 0:
+                p = (j - a) / (m - a - b)
+                probabilities[a][b] = p_left * p + p_right * (1 - p)
+            else:
+                stack.append((a, b))
+                if p_left < 0:
+                    stack.append((a + 1, b))
+                if p_right < 0:
+                    stack.append((a, b + 1))
+    # if len(cache) % 100 == 0:
+    #     print('Cache size: ', len(cache), math.floor(10000 * hits / (hits + misses)) / 100, '%')
+    cache[key] = probabilities[0][0]
+    return probabilities[0][0]
+
+def decode(x, N):
+    index = 0
+    output = np.zeros((N))
+    while x > 0 and index < N:
+        output[index] = x & 0b1
+        x >>= 1
+        index += 1
+    return output
+
+def hamming_distance(a, b):
+    return np.sum(np.logical_xor(a, b))
+
+def xor(x, bits):
+    return np.sum(x[:bits]) % 2
+
+def compute_pseudopascal(N):
+    dist = np.zeros((N, N))
+    for j in range(0, N):
+        dist[0][j] = math.comb(N - 1, j)
+        dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
+    for i in range(1, N):
+        for j in range(0, i + 1):
+            dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
+        for k in range(i + 1, N):
+            for j in reversed(range(0, k)):
+                dist[i][j+1] = dist[i][j] + dist[i][j+1]
+    return dist
+
+def compute_pyramids(N):
+    num_orders = max(int(N / 2), 1)
+    pyramids = np.zeros((num_orders, N, N)).astype(np.int32)
+    # 1st order can be filled in as multiplication and forms the base case
+    for i in range(0, N):
+        for j in range(0, i + 1):
+            pyramids[0][i][j] = (i - j + 1) * (j + 1)
+    for order in range(1, num_orders):
+        offset = order * 2
+
+        # fill in the LHS and diagonal
+        for i in range(0, N - offset):
+            value = math.comb(2 * (order + 1) + i - 1, i)
+            pyramids[order][i + offset][0] = value
+            # mirror
+            pyramids[order][i + offset][i + offset] = value
+
+        # accumulate along the diagonals
+        for i in range(1, N):
+            value = pyramids[order][i][0]
+            acc = value
+            for j in range(1, N - i):
+                value += acc
+                pyramids[order][i + j][j] = value
+                acc += pyramids[order - 1][i + j - 1][j - 1]
+
+    return pyramids
+
+def compute_string_key(key):
+    return ','.join([str(x) for x in key])
+
+def generate_bands(points):
+    all_bands = [{} for _ in range(0, len(points))]
+    for origin_index in range(0, len(points)):
+        bands = all_bands[origin_index]
+        key = []
+        group = [index for index in range(0, len(points)) if index != origin_index]
+        stack = [(origin_index, key, group)]
+        while len(stack) > 0:
+            (origin_index, key, group) = stack.pop()
+            distance = hamming_distance(points[origin_index], points[group[0]])
+            in_band = []
+            out_of_band = []
+            for index in group:
+                if distance == hamming_distance(points[origin_index], points[index]):
+                    in_band.append(index)
+                else:
+                    out_of_band.append(index)
+            if len(out_of_band) > 0:
+                stack.append((origin_index, key, out_of_band))
+            key = key[:]
+            key.append(distance)
+            string_key = compute_string_key(key)
+            if string_key not in bands:
+                bands[string_key] = 0
+            bands[string_key] += len(in_band)
+            if len(in_band) < 2:
+                continue
+            for origin_index in in_band:
+                group = [index for index in in_band if index != origin_index]
+                stack.append((origin_index, key, group))
+    return all_bands
+
+def test():
+    N = 8
+    points = [decode(x, N) for x in range(0, 2 ** N)]
+    print(generate_bands(points)[0])
+
+
+#                2
+#              4,  4,
+#            6,  8,  6
+#          8, 12, 12,  8
+#       10, 16, 18, 16, 10
+#     12, 20, 24, 24, 20, 12
+#   14, 24, 30, 32, 30, 24, 14
+# 16, 28, 36, 40, 40, 36, 28, 16
+
+#                1
+#              2,  2
+#            3,  4,  3
+#          4,  6,  6,  4
+#        5,  8,  9,  8,  5
+#      6, 10, 12, 12, 10,  6
+#    7, 12, 15, 16, 15, 12, 7
+
+#                6,   0,   6
+#             24,  12,  12,  24
+#          60,  48,  36,  48,  60
+#       120, 120,  96,  96, 120, 120
+#     210, 240, 210, 192, 210, 240, 210
+#   336, 420, 396, 360, 360, 396, 420, 336
+# 504, 672, 672, 624, 600, 624, 672, 672, 504
+
+
+#              1,   0,   1
+#            4,   2,   2,   4
+#         10,   8,   6,   8,  10
+#       20,  20,  16,  16,  20,  20
+#     35,  40,  35,  32,  35,  40,  35
+#   56,  70,  66,  60,  60,  66,  70,  56
+# 84, 112, 112, 104, 100, 104, 112, 112,  84
+
+#             
+#         20,    0,   20,    0,   20,
+#      120,   40,   80,   80,   40,  120
+#    420,  240,  260,  320,  260,  240,  420
+# 1120,  840,  760,  880,  880,  760,  840, 1120
+
+#          1,  0,  1,  0,  1
+#        6,  2,  4,  4,  2,  6
+#      21, 12, 13, 16, 13, 12, 21
+#    56, 42, 38, 44, 44, 38, 42, 56
+
+#    70,   0,  70,   0,  70,   0,  70
+# 560, 140, 420, 280, 280, 420, 140, 560
+
+#    252,    0,  252,    0,  252,    0,  252,    0,  252
+# 2520,  504, 2016, 1008, 1512, 1512, 1008, 2016,  504, 2520
+
+# 1, 2, 3, 4,
+# 1, 3, 6, 10
+# 1, 4, 10, 20
+# 1, 5, 15, 35
+# 1, 6, 
+
+#         1,  2,  1
+#       1,  3,  3,  1
+#     1,  4,  6,  4,  1
+#   1,  5, 10, 10,  5,  1
+# 1,  6, 15, 20, 15, 6,   1
+
+# 2, 6, 12, 20, 30, 42,   56
+#        6, 30, 90, 210, 420
+#               20, 140, 560,
+#                         70
+
+# 1, 3, 6, 10, 15, 21, 28
+#       1,  5, 15, 35
+
+def main():
+    test()
+    return
+
+    N = 5
+
+    # print(compute_pseudopascal(10))
+    # print(compute_pyramids(10))
+
+    points = []
+    for i in range(0, 2 ** N):
+        points.append(decode(i, N))
+    
+    bands = [[[] for _ in range(0, N + 1)] for _ in range(0, len(points))]
+    for i in range(0, len(points)):
+        a = points[i]
+        for j in range(0, len(points)):
+            if i == j:
+                continue
+            b = points[j]
+            distance = hamming_distance(a, b)
+            bands[i][distance].append(b)
+
+    golden_incoherent_distances = None
+    golden_total_distances = None
+    golden_incoherent_bands = None
+    golden_total_bands = None
+    golden_incoherent_sub_bands = None
+    golden_total_sub_bands = None
+    # for t in range(0, len(points)):
+    for t in range(0, 1):
+        incoherent_distances = np.zeros((N + 1, N + 1)).astype(np.int32)
+        total_distances = np.zeros((N + 1)).astype(np.int32)
+        if t == 0:
+            golden_incoherent_distances = incoherent_distances
+            golden_total_distances = total_distances
+        incoherent_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32)
+        total_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
+        if t == 0:
+            golden_incoherent_bands = incoherent_bands
+            golden_total_bands = total_bands
+        incoherent_sub_bands = np.zeros((N + 1, N + 1, N + 1, N + 1)).astype(np.int32)
+        total_sub_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32)
+        if t == 0:
+            golden_incoherent_sub_bands = incoherent_sub_bands
+            golden_total_sub_bands = total_sub_bands
+        # print(t)
+        for k in range(1, N + 1):
+            # print(k, '================================')
+            x_a = points[t]
+            y_a = xor(x_a, k)
+            for distance in range(0, N + 1):
+                # print('distance', distance)
+                band = bands[t][distance]
+                for x_b in band:
+                    y_b = xor(x_b, k)
+                    if k == 1:
+                        total_distances[distance] += 1
+                    if y_a != y_b:
+                        incoherent_distances[k][distance] += 1
+                
+                if len(band) < 2:
+                    continue
+                for band_origin in range(0, len(band)):
+                    x_p = band[band_origin]
+                    y_p = xor(x_p, k)
+                    sub_bands = [[] for _ in range(0, N + 1)]
+                    for i in range(0, len(band)):
+                        if i == band_origin:
+                            continue
+                        x_q = band[i]
+                        y_q = xor(x_q, k)
+                        band_distance = hamming_distance(x_p, x_q)
+                        if k == 1:
+                            total_bands[distance][band_distance] += 1
+                        if y_p != y_q:
+                            incoherent_bands[k][distance][band_distance] += 1
+                        sub_bands[band_distance].append(x_q)
+                    
+                    # incoherent_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
+                    # total_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
+                    for band_distance in range(0, N + 1):
+                        sub_band = sub_bands[band_distance]
+                        if len(sub_band) < 2:
+                            continue
+                        for sub_band_origin in range(0, len(sub_band)):
+                            x_u = sub_band[sub_band_origin]
+                            y_u = xor(x_u, k)
+                            for i in range(0, len(sub_band)):
+                                if i == sub_band_origin:
+                                    continue
+                                x_v = sub_band[i]
+                                y_v = xor(x_v, k)
+                                sub_band_distance = hamming_distance(x_v, x_u)
+                                if k == 1:
+                                    total_sub_bands[band_distance][sub_band_distance] += 1
+                                if y_u != y_v:
+                                    incoherent_sub_bands[k][distance][band_distance][sub_band_distance] += 1
+                    # print(incoherent_sub_bands)
+                    # print(total_sub_bands)
+                    # print('==========================')
+        if t != 0:
+            if not np.array_equal(golden_incoherent_sub_bands, incoherent_sub_bands):
+                print(golden_incoherent_sub_bands)
+                print(incoherent_sub_bands)
+                raise Exception('Not symmetric')
+
+            if not np.array_equal(golden_incoherent_bands, incoherent_bands):
+                print(golden_incoherent_bands)
+                print(incoherent_bands)
+                raise Exception('Not symmetric')
+                # print(incoherent_bands)
+                # print(total_bands)
+                    # print(distance, hamming_distance(x_p, x_q), y_p, y_q)
+            if not np.array_equal(golden_incoherent_distances, incoherent_distances):
+                print(golden_incoherent_distances)
+                print(incoherent_distances)
+                raise Exception('Not symmetric')
+
+    # print(golden_total_distances)
+    # print(golden_incoherent_distances)
+
+    # print(golden_total_bands)
+    # print(golden_incoherent_bands)
+    # print(golden_total_bands)
+
+    p = np.ones((2 ** N, N + 1))
+    for sample_size in range(0, 2 ** N):
+        for k in range(0, N + 1):
+            for d1 in range(0, N + 1):
+                if golden_total_distances[d1] == 0:
+                    continue
+                m = golden_total_distances[d1]
+                j = golden_incoherent_distances[k][d1]
+                n = min(sample_size, m)
+                l = int(n * j / m)
+                p[sample_size][k] *= p_bernoulli(n, l, m, j)
+    print(np.around(p, 2))
+
+    p = np.ones((4 ** N, N + 1))
+    for sample_size in range(0, 4 ** N):
+        for k in range(0, N + 1):
+            for d1 in range(0, N + 1):
+                for d2 in range(0, N + 1):
+                    if golden_total_bands[d1][d2] == 0:
+                        continue
+                    m = golden_total_bands[d1][d2]
+                    j = golden_incoherent_bands[k][d1][d2]
+                    n = min(sample_size, m)
+                    l = int(n * j / m)
+                    p[sample_size][k] *= p_bernoulli(n, l, m, j)
+    print(np.around(p, 3))
+
+    # p = np.ones((N + 1))
+    # for k in range(0, N + 1):
+    #     for d1 in range(0, N + 1):
+    #         for d2 in range(0, N + 1):
+    #             if golden_total_bands[d1][d2] == 0:
+    #                 continue
+    #             partial = golden_incoherent_bands[k][d1][d2] / golden_total_bands[d1][d2]
+    #             p[k] *= max(partial, 1 - partial)
+    # print(p)
+
+    # p = np.ones((N + 1))
+    # for k in range(0, N + 1):
+    #     for d1 in range(0, N + 1):
+    #         for d2 in range(0, N + 1):
+    #             for d3 in range(0, N + 1):
+    #                 if golden_total_sub_bands[d1][d2][d3] == 0:
+    #                     continue
+    #                 partial = golden_incoherent_sub_bands[k][d1][d2][d3] / golden_total_sub_bands[d1][d2][d3]
+    #                 p[k] *= max(partial, 1 - partial)
+    # print(p)
+
+    # print(bands)
+
+if __name__ == "__main__":
+    main()
--- a/space_analysis4.py
+++ b/space_analysis4.py
@ -0,0 +1,229 @@
+import math
+import numpy as np
+import sys
+
+np.set_printoptions(threshold=sys.maxsize)
+
+def decode(x, N):
+    index = 0
+    output = np.zeros((N))
+    while x > 0 and index < N:
+        output[index] = x & 0b1
+        x >>= 1
+        index += 1
+    return output
+
+def hamming_distance(a, b):
+    return np.sum(np.logical_xor(a, b))
+
+def xor(x, bits):
+    return np.sum(x[:bits]) % 2
+
+def compute_pseudopascal(N):
+    dist = np.zeros((N, N))
+    for j in range(0, N):
+        dist[0][j] = math.comb(N - 1, j)
+        dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
+    for i in range(1, N):
+        for j in range(0, i + 1):
+            dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
+        for k in range(i + 1, N):
+            for j in reversed(range(0, k)):
+                dist[i][j+1] = dist[i][j] + dist[i][j+1]
+    return dist
+
+def compute_pyramids(N):
+    num_orders = max(int(N / 2), 1)
+    pyramids = np.zeros((num_orders, N, N)).astype(np.int32)
+    # 1st order can be filled in as multiplication and forms the base case
+    for i in range(0, N):
+        for j in range(0, i + 1):
+            pyramids[0][i][j] = (i - j + 1) * (j + 1)
+    for order in range(1, num_orders):
+        offset = order * 2
+
+        # fill in the LHS and diagonal
+        for i in range(0, N - offset):
+            value = math.comb(2 * (order + 1) + i - 1, i)
+            pyramids[order][i + offset][0] = value
+            # mirror
+            pyramids[order][i + offset][i + offset] = value
+
+        # accumulate along the diagonals
+        for i in range(1, N):
+            value = pyramids[order][i][0]
+            acc = value
+            for j in range(1, N - i):
+                value += acc
+                pyramids[order][i + j][j] = value
+                acc += pyramids[order - 1][i + j - 1][j - 1]
+
+    return pyramids
+
+#                2
+#              4,  4,
+#            6,  8,  6
+#          8, 12, 12,  8
+#       10, 16, 18, 16, 10
+#     12, 20, 24, 24, 20, 12
+#   14, 24, 30, 32, 30, 24, 14
+# 16, 28, 36, 40, 40, 36, 28, 16
+
+#                1
+#              2,  2
+#            3,  4,  3
+#          4,  6,  6,  4
+#        5,  8,  9,  8,  5
+#      6, 10, 12, 12, 10,  6
+#    7, 12, 15, 16, 15, 12, 7
+
+#                6,   0,   6
+#             24,  12,  12,  24
+#          60,  48,  36,  48,  60
+#       120, 120,  96,  96, 120, 120
+#     210, 240, 210, 192, 210, 240, 210
+#   336, 420, 396, 360, 360, 396, 420, 336
+# 504, 672, 672, 624, 600, 624, 672, 672, 504
+
+
+#              1,   0,   1
+#            4,   2,   2,   4
+#         10,   8,   6,   8,  10
+#       20,  20,  16,  16,  20,  20
+#     35,  40,  35,  32,  35,  40,  35
+#   56,  70,  66,  60,  60,  66,  70,  56
+# 84, 112, 112, 104, 100, 104, 112, 112,  84
+
+#             
+#         20,    0,   20,    0,   20,
+#      120,   40,   80,   80,   40,  120
+#    420,  240,  260,  320,  260,  240,  420
+# 1120,  840,  760,  880,  880,  760,  840, 1120
+
+#          1,  0,  1,  0,  1
+#        6,  2,  4,  4,  2,  6
+#      21, 12, 13, 16, 13, 12, 21
+#    56, 42, 38, 44, 44, 38, 42, 56
+
+#    70,   0,  70,   0,  70,   0,  70
+# 560, 140, 420, 280, 280, 420, 140, 560
+
+#    252,    0,  252,    0,  252,    0,  252,    0,  252
+# 2520,  504, 2016, 1008, 1512, 1512, 1008, 2016,  504, 2520
+
+# 1, 2, 3, 4,
+# 1, 3, 6, 10
+# 1, 4, 10, 20
+# 1, 5, 15, 35
+# 1, 6, 
+
+#         1,  2,  1
+#       1,  3,  3,  1
+#     1,  4,  6,  4,  1
+#   1,  5, 10, 10,  5,  1
+# 1,  6, 15, 20, 15, 6,   1
+
+# 2, 6, 12, 20, 30, 42,   56
+#        6, 30, 90, 210, 420
+#               20, 140, 560,
+#                         70
+
+# 1, 3, 6, 10, 15, 21, 28
+#       1,  5, 15, 35
+
+def main():
+    last_incoherent_distances = None
+    last_incoherent_bands = None
+    last_incoherent_sub_bands = None
+    for N in range(4, 5):
+    # print(compute_pseudopascal(10))
+    # print(compute_pyramids(10))
+
+        points = []
+        for i in range(0, 2 ** N):
+            points.append(decode(i, N))
+        
+        bands = [[[] for _ in range(0, N + 1)] for _ in range(0, len(points))]
+        for i in range(0, len(points)):
+            a = points[i]
+            for j in range(0, len(points)):
+                if i == j:
+                    continue
+                b = points[j]
+                distance = hamming_distance(a, b)
+                bands[i][distance].append(b)
+
+        # for t in range(0, len(points)):
+        for t in range(0, 1):
+            incoherent_distances = np.zeros((N + 1, N + 1))
+            incoherent_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32)
+            incoherent_sub_bands = np.zeros((N + 1, N + 1, N + 1, N + 1)).astype(np.int32)
+            for k in range(1, N + 1):
+                # print(k, '================================')
+                x_a = points[t]
+                y_a = xor(x_a, k)
+                total_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
+                for distance in range(0, N + 1):
+                    # print('distance', distance)
+                    band = bands[t][distance]
+                    for x_b in band:
+                        y_b = xor(x_b, k)
+                        if y_a != y_b:
+                            incoherent_distances[k][distance] += 1
+                    
+                    if len(band) < 2:
+                        continue
+                    for band_origin in range(0, len(band)):
+                        x_p = band[band_origin]
+                        y_p = xor(x_p, k)
+                        sub_bands = [[] for _ in range(0, N + 1)]
+                        for i in range(0, len(band)):
+                            if i == band_origin:
+                                continue
+                            x_q = band[i]
+                            y_q = xor(x_q, k)
+                            band_distance = hamming_distance(x_p, x_q)
+                            total_bands[distance][band_distance] += 1
+                            if y_p != y_q:
+                                incoherent_bands[k][distance][band_distance] += 1
+                            sub_bands[band_distance].append(x_q)
+                        
+                        # incoherent_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
+                        # total_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
+                        for band_distance in range(0, N + 1):
+                            sub_band = sub_bands[band_distance]
+                            if len(sub_band) < 2:
+                                continue
+                            for sub_band_origin in range(0, len(sub_band)):
+                                x_u = sub_band[sub_band_origin]
+                                y_u = xor(x_u, k)
+                                for i in range(0, len(sub_band)):
+                                    if i == sub_band_origin:
+                                        continue
+                                    x_v = sub_band[i]
+                                    y_v = xor(x_v, k)
+                                    sub_band_distance = hamming_distance(x_v, x_u)
+                                    # total_sub_bands[band_distance][sub_band_distance] += 1
+                                    if y_u != y_v:
+                                        incoherent_sub_bands[k][distance][band_distance][sub_band_distance] += 1
+                        # print(incoherent_sub_bands)
+                        # print(total_sub_bands)
+                        # print('==========================')
+
+            if last_incoherent_sub_bands is not None:
+                for distance in range(1, int(N / 2) + 1):
+                    for band_distance in range(0, N + 1):
+                        for sub_band_distance in range (0, N + 1):
+                            if band_distance >= N or sub_band_distance >= N or last_incoherent_sub_bands[1][distance][band_distance][sub_band_distance] == 0:
+                                value = incoherent_sub_bands[1][distance][band_distance][sub_band_distance]
+                                if value > 0:
+                                    print(N, value, (distance, band_distance, sub_band_distance))
+
+            last_incoherent_distances = incoherent_distances
+            last_incoherent_bands = incoherent_bands
+            last_incoherent_sub_bands = incoherent_sub_bands
+
+    # print(bands)
+
+if __name__ == "__main__":
+    main()
--- a/train_generator.py
+++ b/train_generator.py
@ -0,0 +1,164 @@
+import hashlib
+import secrets
+from struct import pack, pack_into, unpack_from
+
+def sha(x):
+    m = hashlib.sha256()
+    m.update(x)
+    result = m.digest()
+    return result[0] & 0b1
+
+def bit_at_index(buffer, index):
+    offset = (index >> 3) % len(buffer)
+    return buffer[offset] & (1 << (index & 0b111)) != 0
+
+def evaluate(f, x):
+    stack = []
+    offset = 0
+    value = 0
+    while offset < len(f):
+        opcode = f[offset]
+        offset += 1
+        if opcode == 0 or opcode == 1:
+            stack.append((opcode, value))
+            value = 0
+        elif opcode == 2:
+            if len(stack) == 0:
+                return (value, offset)
+            (last_opcode, _) = stack[-1]
+            if last_opcode > 0:
+                stack.append((0, value))
+                value = 0
+                continue
+            right = value
+            (_, left) = stack.pop()
+            (opcode, value) = stack.pop()
+            value ^= ((left & right) ^ (opcode & 0b1))
+        else:
+            try:
+                index = unpack_from('I', f, offset)[0]
+                offset += 4
+                if bit_at_index(x, index):
+                    value ^= 1
+            except:
+                break
+
+    while len(stack) > 0:
+        (opcode, other_value) = stack.pop()
+        if opcode == 0:
+            right = other_value
+            (opcode, left) = stack.pop()
+            value ^= ((left & right) ^ (opcode & 0b1))
+        value ^= other_value ^ (opcode & 0b1)
+    return (value, offset)
+
+def random_generator():
+    return secrets.token_bytes(256)
+
+def random_input():
+    return secrets.token_bytes(4)
+
+def generate(generator, sample):
+    f_size = 1024
+    f = bytearray(f_size)
+    x = bytearray(4) + sample
+    for i in range(0, f_size):
+        build_value = 0
+        for j in range(0, 8):
+            step = i * 8 + j
+            pack_into('H', x, 0, step)
+            (value, _) = evaluate(generator, x)
+            build_value <<= 1
+            build_value |= value
+        f[i] = build_value
+    return f
+
+def sample(N):
+    inputs = [random_input() for i in range(0, N)]
+    outputs = [sha(x) for x in inputs]
+    return (inputs, outputs)
+
+def augment_inputs(inputs, layers):
+    augmented_inputs = []
+    for x in inputs:
+        x_n = bytearray(1) + x
+        for layer in layers:
+            build_value = 0
+            for candidate in layer:
+                (value, _) = evaluate(candidate, x_n)
+                build_value <<= 1
+                build_value |= value
+            x_n[0] = build_value
+        augmented_inputs.append(x_n)
+    return augmented_inputs
+
+def pack_sample(inputs, outputs):
+    sample = bytearray()
+    for i in range(0, len(inputs)):
+        sample += inputs[i]
+        sample += bytearray([outputs[i]])
+    return sample
+
+def compute_score(f, inputs, outputs):
+    correct = 0.0
+    for i in range(0, len(inputs)):
+        (value, _) = evaluate(f, inputs[i])
+        if value == outputs[i]:
+            correct += 1
+    return correct / len(outputs)
+
+def evaluate_generator(g):
+    num_candidates = 8
+    num_train_samples = 64
+    num_test_samples = 1000
+    num_epochs = 10
+    threshold = 0
+
+    layers = []
+    for epoch in range(0, num_epochs):
+        difficulty = 0
+        layer = []
+        candidate = 0
+        scores = []
+        while candidate < num_candidates:
+            (x, y) = sample(num_train_samples)
+            x_n = augment_inputs(x, layers)
+            f = generate(g, pack_sample(x_n, y))
+            print(f)
+
+            (x, y) = sample(num_test_samples)
+            x_n = augment_inputs(x, layers)
+            score = compute_score(f, x_n, y)
+
+            if score < threshold - difficulty * 0.0001:
+                difficulty += 1
+                continue
+
+            print(epoch, score, difficulty)
+            
+            layer.append(f)
+            scores.append(score)
+            difficulty = 0
+            candidate += 1
+        threshold = sum(scores) / len(scores)
+        layers.append(layer)
+    return threshold
+
+def main():
+    num_random_candidates = 1000
+
+    g = None
+    score = 0
+
+    for i in range(0, num_random_candidates):
+        g_n = random_generator()
+        print(g_n)
+        score_n = evaluate_generator(g_n)
+        print(i, score_n)
+        if score > score_n:
+            score = score_n
+            g = g_n
+
+
+if __name__ == "__main__":
+    main()