Add probabilities work to git
This commit is contained in:
commit
fd2045dfca
77
2_point_plot.py
Executable file
77
2_point_plot.py
Executable file
@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def flip(n, index):
|
||||
return n ^ (1 << index)
|
||||
|
||||
def distance(i, j):
|
||||
return bin(i ^ j).count('1')
|
||||
|
||||
def matrix_system_with_two_knowns(p, q, N):
|
||||
S = 2 ** N
|
||||
mat = np.zeros((S, S))
|
||||
val = np.zeros(S)
|
||||
for i in range(0, S):
|
||||
if i == p:
|
||||
mat[i][i] = 1.0
|
||||
val[i] = 1.0
|
||||
elif i == q:
|
||||
mat[i][i] = 1.0
|
||||
else:
|
||||
mat[i][i] = -1.0
|
||||
for j in range(0, N):
|
||||
mat[i][flip(i,j)] = 1.0 / N
|
||||
return (mat, val)
|
||||
|
||||
def main():
|
||||
final_values = []
|
||||
final_x = []
|
||||
final_y = []
|
||||
|
||||
for N in range(11, 12):
|
||||
print(N)
|
||||
S = 2 ** N
|
||||
distances = np.zeros((S, S))
|
||||
for i in range(0, S):
|
||||
for j in range(0, S):
|
||||
distances[i][j] = distance(i,j)
|
||||
|
||||
# final_values = []
|
||||
# final_basis = []
|
||||
visited_distances = set()
|
||||
for p in range(0, S):
|
||||
for q in range(p + 1, S):
|
||||
pq_distance = distances[p, q]
|
||||
if pq_distance in visited_distances:
|
||||
continue
|
||||
visited_distances.add(pq_distance)
|
||||
(mat, val) = matrix_system_with_two_knowns(p, q, N)
|
||||
solution = np.linalg.inv(mat).dot(val)
|
||||
for i in range(0, len(solution)):
|
||||
final_x.append(distances[i, p] / N)
|
||||
final_y.append(distances[i, q] / N)
|
||||
final_values.append(solution[i])
|
||||
|
||||
# values = list(set(solution))
|
||||
# values.sort()
|
||||
# if len(values) <= 1:
|
||||
# continue
|
||||
# basis = [1.0 * i / (len(values) - 1) for i in range(len(values))]
|
||||
|
||||
# final_values.extend(values)
|
||||
# final_basis.extend(basis)
|
||||
|
||||
# fig, ax = plt.subplots()
|
||||
# ax.scatter(final_values, final_basis)
|
||||
|
||||
# print(np.linalg.lstsq((final_x, final_y), final_values))
|
||||
|
||||
fig = plt.figure()
|
||||
ax = fig.add_subplot(projection='3d')
|
||||
ax.scatter(final_x, final_y, final_values)
|
||||
|
||||
ax.grid(True)
|
||||
plt.show()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
6
Dockerfile
Normal file
6
Dockerfile
Normal file
@ -0,0 +1,6 @@
|
||||
FROM nvidia/cuda:11.6.0-devel-ubuntu20.04
|
||||
RUN apt-get update && apt-get install -y python3 python3-pip
|
||||
RUN pip install numpy pycuda
|
||||
WORKDIR /app
|
||||
COPY mutations_cuda.py /app/mutations_cuda.py
|
||||
CMD ["python3", "-u", "mutations_cuda.py"]
|
43
README.md
Normal file
43
README.md
Normal file
@ -0,0 +1,43 @@
|
||||
Terminology:
|
||||
|
||||
Sample space 'S' has 'n' bits, and there is a function 'f', that maps 'x' (an n-bit vector) in 'S' to 'y'.
|
||||
|
||||
f(x) = y
|
||||
|
||||
We can use PCA to generate candidates for some sub-sample of 'S', 'P'. Candidates that exhibit generalization
|
||||
properties (score higher than the previous generation on a sub-sample they haven't seen before, 'Q') can be
|
||||
cascaded into the input for training the next generation of candidates.
|
||||
|
||||
This candidate generation process is 'G'. 'G' is considered to perform well if the candidates that it
|
||||
generates exhibit generalization properties.
|
||||
|
||||
To bootstrap, we can use PCA for 'G' and store the state machine instructions 'S_G' for creating the highest-performing
|
||||
candidates on a particular problem 'f' as a sample space for training a new generator 'G_n'.
|
||||
|
||||
Use 'G' to generate candidates for 'G_n'. Training samples come from 'S_G', but candidates should be evaluated
|
||||
based on how well the candidates they generate perform on 'f'.
|
||||
|
||||
So, we need to be able to score a particular g e G_n. We can evaluate for a fixed number of epochs and use some combination
|
||||
of the average difficulty and evaluation score.
|
||||
|
||||
A generator G is a state machine with input
|
||||
|
||||
G(|j-bit step|m * n-bit inputs|) = y
|
||||
|
||||
Where y is a bit in an instruction.
|
||||
|
||||
'a' is an address in 'A' |log2(n)|
|
||||
|
||||
|opcode 2-bit|
|
||||
|00 - xor|
|
||||
|01 - end|
|
||||
|10 - and|
|
||||
|11 - nand|
|
||||
|
||||
xor is followed by an address 'a' for an input bit.
|
||||
|
||||
This process can be repeated indefinitely, replacing 'G' with 'G_n' to create new generators that outperform the previous
|
||||
generation for solving 'f'.
|
||||
|
||||
A candidate is a state machine with input
|
||||
f(|n-bit input|) = y
|
171
model_probabilities.py
Executable file
171
model_probabilities.py
Executable file
@ -0,0 +1,171 @@
|
||||
import math
|
||||
from statistics import median
|
||||
|
||||
def count_one_bits(n):
|
||||
return bin(n).count("1")
|
||||
|
||||
def compute_distance(a, b):
|
||||
distance = count_one_bits(a ^ b)
|
||||
# return 1 / (8 ** distance)
|
||||
return 1 / (2 ** distance)
|
||||
|
||||
def xor_n(n):
|
||||
return count_one_bits(n) % 2
|
||||
|
||||
def compute_distances(N):
|
||||
return [[compute_distance(i, j) for j in range(N)] for i in range(N)]
|
||||
|
||||
def compute_nn_probabilities(i, knowns, distances):
|
||||
total = 0.0
|
||||
total_zero = 0.0
|
||||
total_one = 0.0
|
||||
for known in knowns:
|
||||
j = known[0]
|
||||
distance = distances[i][j]
|
||||
total += distance
|
||||
if known[1] == 0:
|
||||
total_zero += distance
|
||||
else:
|
||||
total_one += distance
|
||||
p_zero = total_zero / total
|
||||
p_one = total_one / total
|
||||
return (p_zero, p_one)
|
||||
|
||||
def compute_est_coherence(i, knowns, coherences, distances):
|
||||
total = 0.0
|
||||
coherence = 0.0
|
||||
for known in knowns:
|
||||
j = known[0]
|
||||
distance = distances[i][j]
|
||||
total += distance
|
||||
coherence += distance * coherences[j]
|
||||
return coherence / total
|
||||
|
||||
def compute_est_coherences(N, knowns, distances):
|
||||
nn_probabilities = [None for i in range(N)]
|
||||
est_coherences = [None for i in range(N)]
|
||||
|
||||
# for known in knowns:
|
||||
# i = known[0]
|
||||
# nn_probabilities[i] = compute_nn_probabilities(i, knowns, distances)
|
||||
for known in knowns:
|
||||
i = known[0]
|
||||
nn_probabilities[i] = (1.0 - known[1], 1.0 * known[1])
|
||||
|
||||
for i in range(len(nn_probabilities)):
|
||||
if not nn_probabilities[i] is None:
|
||||
continue
|
||||
nn_probabilities[i] = compute_nn_probabilities(i, knowns, distances)
|
||||
|
||||
print(nn_probabilities)
|
||||
|
||||
for i in range(len(nn_probabilities)):
|
||||
total = 0.0
|
||||
coherence = 0.0
|
||||
p_i = nn_probabilities[i]
|
||||
for j in range(len(nn_probabilities)):
|
||||
if i == j:
|
||||
continue
|
||||
p_j = nn_probabilities[j]
|
||||
distance = distances[i][j]
|
||||
total += distance
|
||||
coherence += (p_i[0] * p_j[0] + p_i[1] * p_j[1]) * distance
|
||||
# print(coherence, total)
|
||||
est_coherences[i] = coherence / total
|
||||
|
||||
# for known in knowns:
|
||||
# i = known[0]
|
||||
# est_coherences[i] = nn_probabilities[i][known[1]]
|
||||
|
||||
# for i in range(len(est_coherences)):
|
||||
# if not est_coherences[i] is None:
|
||||
# continue
|
||||
# est_coherences[i] = compute_est_coherence(i, knowns, est_coherences, distances)
|
||||
|
||||
# print(est_coherences)
|
||||
|
||||
return est_coherences
|
||||
|
||||
def score(coherences):
|
||||
# while len(coherences) > 1:
|
||||
# coherences = [(coherences[i] + coherences[i + 1]) / 2 for i in range(0, len(coherences), 2)]
|
||||
# return coherences[0]
|
||||
|
||||
# return median(coherences)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def xor_by_index(knowns, index):
|
||||
mask = 1 << index
|
||||
knowns = knowns[:]
|
||||
for i in range(len(knowns)):
|
||||
known = knowns[i]
|
||||
if known[0] & mask:
|
||||
knowns[i] = (known[0], known[1] ^ 1)
|
||||
return knowns
|
||||
|
||||
def main():
|
||||
n = 3
|
||||
N = 2 ** n
|
||||
distances = compute_distances(N)
|
||||
|
||||
knowns = [(i, xor_n(i)) for i in [
|
||||
0, 3, 4, 5, 7
|
||||
# 3, 5, 6, 10, 12, 14
|
||||
# 1, 3, 7, 10, 14, 15
|
||||
# 0, 3, 5, 6, 10, 11, 12
|
||||
# 0, 3, 5, 6, 10, 11, 12, 24, 30
|
||||
]]
|
||||
print(knowns)
|
||||
print()
|
||||
|
||||
# knowns = [
|
||||
# (1, 1),
|
||||
# (3, 0),
|
||||
# (7, 1),
|
||||
# (10, 0),
|
||||
# (14, 1),
|
||||
# (15, 0)
|
||||
# ]
|
||||
|
||||
# knowns = [
|
||||
# (0, 0),
|
||||
# (3, 0),
|
||||
# (4, 1),
|
||||
# (5, 0),
|
||||
# (7, 1)
|
||||
# ]
|
||||
|
||||
# knowns = [
|
||||
# (0, 0),
|
||||
# (1, 1),
|
||||
# (2, 1),
|
||||
# (3, 0),
|
||||
# (4, 1),
|
||||
# (5, 0),
|
||||
# (6, 0),
|
||||
# (7, 1)
|
||||
# ]
|
||||
|
||||
coherences = compute_est_coherences(N, knowns, distances)
|
||||
best_coherence = score(coherences)
|
||||
print(best_coherence)
|
||||
|
||||
while best_coherence < 1.0:
|
||||
print()
|
||||
# print(knowns)
|
||||
# print()
|
||||
best_index = -1
|
||||
for i in range(0, n):
|
||||
coherences = compute_est_coherences(N, xor_by_index(knowns, i), distances)
|
||||
coherence = score(coherences)
|
||||
print(coherence)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_index = i
|
||||
if best_index < 0:
|
||||
break
|
||||
knowns = xor_by_index(knowns, best_index)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
260
model_probabilities2.py
Executable file
260
model_probabilities2.py
Executable file
@ -0,0 +1,260 @@
|
||||
import math
|
||||
from statistics import median, stdev
|
||||
|
||||
def count_one_bits(n):
|
||||
return bin(n).count("1")
|
||||
|
||||
def compute_distance(a, b):
|
||||
distance = count_one_bits(a ^ b)
|
||||
# return 1 / (8 ** distance)
|
||||
if distance == 0:
|
||||
return 0
|
||||
# return 1 / (64 ** (distance - 1))
|
||||
return distance
|
||||
|
||||
def xor_n(n):
|
||||
return count_one_bits(n) % 2
|
||||
|
||||
def compute_distances(N):
|
||||
return [[compute_distance(i, j) for j in range(N)] for i in range(N)]
|
||||
|
||||
def compute_nn_probabilities(i, knowns, distances):
|
||||
total = 0.0
|
||||
total_zero = 0.0
|
||||
total_one = 0.0
|
||||
for known in knowns:
|
||||
j = known[0]
|
||||
if i == j:
|
||||
continue
|
||||
distance = distances[i][j]
|
||||
total += distance
|
||||
if known[1] == 0:
|
||||
total_zero += distance
|
||||
else:
|
||||
total_one += distance
|
||||
p_zero = total_zero / total
|
||||
p_one = total_one / total
|
||||
return (p_zero, p_one)
|
||||
|
||||
def interpolate_probabilities(i, knowns, distances, probabilities, dim):
|
||||
total = 0.0
|
||||
total_dim = [0.0] * dim
|
||||
for known in knowns:
|
||||
j = known[0]
|
||||
if i == j:
|
||||
continue
|
||||
distance = distances[i][j]
|
||||
total += distance
|
||||
probability = probabilities[j]
|
||||
for index in range(dim):
|
||||
total_dim[index] += distance * probability[index]
|
||||
for index in range(dim):
|
||||
total_dim[index] /= total
|
||||
return total_dim
|
||||
|
||||
def compute_est_coherence(i, knowns, coherences, distances):
|
||||
total = 0.0
|
||||
coherence = 0.0
|
||||
for known in knowns:
|
||||
j = known[0]
|
||||
distance = distances[i][j]
|
||||
total += distance
|
||||
coherence += distance * coherences[j]
|
||||
return coherence / total
|
||||
|
||||
def compute_est_coherences(N, knowns, distances):
|
||||
nn_probabilities = [None for i in range(N)]
|
||||
nn_correct_probabilities = [None for i in range(N)]
|
||||
coherences = []
|
||||
|
||||
for known in knowns:
|
||||
i = known[0]
|
||||
nn_probabilities[i] = compute_nn_probabilities(i, knowns, distances)
|
||||
|
||||
# for i in range(len(nn_probabilities)):
|
||||
# if not nn_probabilities[i] is None:
|
||||
# continue
|
||||
# nn_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_probabilities, 2)
|
||||
|
||||
for known in knowns:
|
||||
i = known[0]
|
||||
nn_correct_probabilities[i] = [nn_probabilities[i][known[1]]]
|
||||
|
||||
# for i in range(len(nn_correct_probabilities)):
|
||||
# if not nn_correct_probabilities[i] is None:
|
||||
# continue
|
||||
# nn_correct_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_correct_probabilities, 1)
|
||||
|
||||
coherences_0 = []
|
||||
coherences_1 = []
|
||||
for known_i in knowns:
|
||||
i = known_i[0]
|
||||
coherence = 0.0
|
||||
total = 0.0
|
||||
for known_j in knowns:
|
||||
j = known_j[0]
|
||||
if i == j:
|
||||
continue
|
||||
|
||||
distance = distances[i][j]
|
||||
total += distance
|
||||
|
||||
nn_p_i_0 = nn_probabilities[i][0]
|
||||
nn_p_i_1 = nn_probabilities[i][1]
|
||||
nn_c_p_i = nn_correct_probabilities[i][0]
|
||||
|
||||
nn_p_j_0 = nn_probabilities[j][0]
|
||||
nn_p_j_1 = nn_probabilities[j][1]
|
||||
nn_c_p_j = nn_correct_probabilities[j][0]
|
||||
|
||||
p_i_0 = nn_p_i_0 * nn_c_p_i + nn_p_i_1 * (1 - nn_c_p_i)
|
||||
p_i_1 = nn_p_i_1 * nn_c_p_i + nn_p_i_0 * (1 - nn_c_p_i)
|
||||
|
||||
p_j_0 = nn_p_j_0 * nn_c_p_j + nn_p_j_1 * (1 - nn_c_p_j)
|
||||
p_j_1 = nn_p_j_1 * nn_c_p_j + nn_p_j_0 * (1 - nn_c_p_j)
|
||||
|
||||
coherence += distance * (p_i_0 * p_j_0 + p_i_1 * p_j_1)
|
||||
coherences.append(coherence / total)
|
||||
if known_i[1] == 0:
|
||||
coherences_0.append(coherence / total)
|
||||
else:
|
||||
coherences_1.append(coherence / total)
|
||||
|
||||
return coherences
|
||||
|
||||
def score(coherences, knowns, distances):
|
||||
# while len(coherences) > 1:
|
||||
# coherences = [(coherences[i] + coherences[i + 1]) / 2 for i in range(0, len(coherences), 2)]
|
||||
# return coherences[0]
|
||||
|
||||
# return median(coherences)
|
||||
# return sum(coherences) / len(coherences)
|
||||
if len(coherences) == 0:
|
||||
return 1.0
|
||||
numerator_0 = 0.0
|
||||
denominator_0 = 0.0
|
||||
numerator_1 = 0.0
|
||||
denominator_1 = 0.0
|
||||
count_0 = 0.0
|
||||
count_1 = 0.0
|
||||
for i in range(len(knowns)):
|
||||
weight = 0
|
||||
for j in range(len(knowns)):
|
||||
weight += distances[knowns[i][0]][knowns[j][0]]
|
||||
print(weight, end=' ')
|
||||
if knowns[i][1] == 0:
|
||||
denominator_0 += weight
|
||||
numerator_0 += weight * coherences[i]
|
||||
count_0 += 1
|
||||
else:
|
||||
denominator_1 += weight
|
||||
numerator_1 += weight * coherences[i]
|
||||
count_1 += 1
|
||||
# print()
|
||||
if count_0 == 0 or count_1 == 0:
|
||||
return 1.0
|
||||
|
||||
# return ((sum(coherences[0]) / len(coherences[0])) + (sum(coherences[1]) / len(coherences[1]))) / 2.0
|
||||
# return (sum(coherences[0]) + sum(coherences[1])) / (len(coherences[0]) + len(coherences[1]))
|
||||
# div_0 = (numerator_0 / denominator_0 if denominator_0 > 0 else 1.0) * 0.5
|
||||
# div_1 = (numerator_1 / denominator_1 if denominator_1 > 0 else 1.0) * 0.5
|
||||
# return div_0 + div_1
|
||||
# aligned = 1.0 - abs(0.5 - max(count_0 / (count_0 + count_1), count_1 / (count_0 + count_1)))
|
||||
# return ((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) * (aligned ** 0.1)
|
||||
# return (((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) + 0.12 * aligned) * (1.0 / 1.12)
|
||||
return (numerator_0 + numerator_1) / (denominator_0 + denominator_1)
|
||||
|
||||
def xor_by_index(knowns, index):
|
||||
mask = 1 << index
|
||||
knowns = knowns[:]
|
||||
for i in range(len(knowns)):
|
||||
known = knowns[i]
|
||||
if known[0] & mask:
|
||||
knowns[i] = (known[0], known[1] ^ 1)
|
||||
return knowns
|
||||
|
||||
def main():
|
||||
n = 8
|
||||
N = 2 ** n
|
||||
distances = compute_distances(N)
|
||||
|
||||
knowns = [(i, xor_n(i)) for i in [
|
||||
# 0, 3, 4, 5, 7
|
||||
# 3, 5, 6, 10, 12, 14
|
||||
# 1, 3, 7, 10, 14, 15
|
||||
# 0, 3, 5, 6, 10, 11, 12
|
||||
0, 3, 5, 6, 10, 11, 12, 24, 30
|
||||
# 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127
|
||||
# 128, 131, 248, 0, 7, 13, 17, 19
|
||||
]]
|
||||
|
||||
for known_i in knowns:
|
||||
i = known_i[0]
|
||||
for known_j in knowns:
|
||||
j = known_j[0]
|
||||
print(distances[i][j], end=' ')
|
||||
print()
|
||||
|
||||
print(knowns)
|
||||
print()
|
||||
|
||||
# knowns = [
|
||||
# (1, 1),
|
||||
# (3, 0),
|
||||
# (7, 1),
|
||||
# (10, 0),
|
||||
# (14, 1),
|
||||
# (15, 0)
|
||||
# ]
|
||||
|
||||
# knowns = [
|
||||
# (0, 0),
|
||||
# (3, 0),
|
||||
# (4, 1),
|
||||
# (5, 0),
|
||||
# (7, 1)
|
||||
# ]
|
||||
|
||||
# knowns = [
|
||||
# (0, 0),
|
||||
# (1, 1),
|
||||
# (2, 1),
|
||||
# (3, 0),
|
||||
# (4, 1),
|
||||
# (5, 0),
|
||||
# (6, 0),
|
||||
# (7, 1)
|
||||
# ]
|
||||
|
||||
coherences = compute_est_coherences(N, knowns, distances)
|
||||
best_coherence = score(coherences, knowns, distances)
|
||||
print(best_coherence)
|
||||
|
||||
flipped = []
|
||||
while best_coherence < 1.0:
|
||||
print()
|
||||
# print(knowns)
|
||||
# print()
|
||||
best_index = -1
|
||||
# best_coherence = 0
|
||||
for i in range(0, n):
|
||||
if i in flipped:
|
||||
continue
|
||||
mutated_knowns = xor_by_index(knowns, i)
|
||||
coherences = compute_est_coherences(N, mutated_knowns, distances)
|
||||
coherence = score(coherences, mutated_knowns, distances)
|
||||
# print(coherence)
|
||||
print(coherence, end=' ')
|
||||
print(mutated_knowns)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_index = i
|
||||
if best_index < 0:
|
||||
break
|
||||
knowns = xor_by_index(knowns, best_index)
|
||||
# flipped.append(best_index)
|
||||
print(knowns)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
463
model_probabilities3.py
Executable file
463
model_probabilities3.py
Executable file
@ -0,0 +1,463 @@
|
||||
import hashlib
|
||||
import math
|
||||
from statistics import median, stdev
|
||||
import numpy as np
|
||||
|
||||
def count_one_bits(n):
|
||||
return bin(n).count("1")
|
||||
|
||||
def compute_distance(a, b):
|
||||
distance = count_one_bits(a ^ b)
|
||||
# return 1 / (8 ** distance)
|
||||
if distance == 0:
|
||||
return 0
|
||||
# return 1 / (64 ** (distance - 1))
|
||||
return distance
|
||||
|
||||
def xor_n(n):
|
||||
return count_one_bits(n) % 2
|
||||
|
||||
def sha_n(n):
|
||||
m = hashlib.sha256()
|
||||
m.update(str(n).encode("utf-8"))
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def compute_distances(N):
|
||||
return [[compute_distance(i, j) for j in range(N)] for i in range(N)]
|
||||
|
||||
def compute_nn_probabilities(i, knowns, distances):
|
||||
total = 0.0
|
||||
total_zero = 0.0
|
||||
total_one = 0.0
|
||||
for known in knowns:
|
||||
j = known[0]
|
||||
if i == j:
|
||||
continue
|
||||
distance = distances[i][j]
|
||||
total += distance
|
||||
if known[1] == 0:
|
||||
total_zero += distance
|
||||
else:
|
||||
total_one += distance
|
||||
p_zero = total_zero / total
|
||||
p_one = total_one / total
|
||||
return (p_zero, p_one)
|
||||
|
||||
def interpolate_probabilities(i, knowns, distances, probabilities, dim):
|
||||
total = 0.0
|
||||
total_dim = [0.0] * dim
|
||||
for known in knowns:
|
||||
j = known[0]
|
||||
if i == j:
|
||||
continue
|
||||
distance = distances[i][j]
|
||||
total += distance
|
||||
probability = probabilities[j]
|
||||
for index in range(dim):
|
||||
total_dim[index] += distance * probability[index]
|
||||
for index in range(dim):
|
||||
total_dim[index] /= total
|
||||
return total_dim
|
||||
|
||||
def compute_est_coherence(i, knowns, coherences, distances):
|
||||
total = 0.0
|
||||
coherence = 0.0
|
||||
for known in knowns:
|
||||
j = known[0]
|
||||
distance = distances[i][j]
|
||||
total += distance
|
||||
coherence += distance * coherences[j]
|
||||
return coherence / total
|
||||
|
||||
def compute_est_coherences(N, knowns, distances):
|
||||
nn_probabilities = [None for i in range(N)]
|
||||
nn_correct_probabilities = [None for i in range(N)]
|
||||
coherences = []
|
||||
|
||||
for known in knowns:
|
||||
i = known[0]
|
||||
nn_probabilities[i] = compute_nn_probabilities(i, knowns, distances)
|
||||
|
||||
# for i in range(len(nn_probabilities)):
|
||||
# if not nn_probabilities[i] is None:
|
||||
# continue
|
||||
# nn_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_probabilities, 2)
|
||||
|
||||
for known in knowns:
|
||||
i = known[0]
|
||||
nn_correct_probabilities[i] = [nn_probabilities[i][known[1]]]
|
||||
|
||||
# for i in range(len(nn_correct_probabilities)):
|
||||
# if not nn_correct_probabilities[i] is None:
|
||||
# continue
|
||||
# nn_correct_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_correct_probabilities, 1)
|
||||
|
||||
coherences_0 = []
|
||||
coherences_1 = []
|
||||
for known_i in knowns:
|
||||
i = known_i[0]
|
||||
coherence = 0.0
|
||||
total = 0.0
|
||||
for known_j in knowns:
|
||||
j = known_j[0]
|
||||
if i == j:
|
||||
continue
|
||||
|
||||
distance = distances[i][j]
|
||||
total += distance
|
||||
|
||||
nn_p_i_0 = nn_probabilities[i][0]
|
||||
nn_p_i_1 = nn_probabilities[i][1]
|
||||
nn_c_p_i = nn_correct_probabilities[i][0]
|
||||
|
||||
nn_p_j_0 = nn_probabilities[j][0]
|
||||
nn_p_j_1 = nn_probabilities[j][1]
|
||||
nn_c_p_j = nn_correct_probabilities[j][0]
|
||||
|
||||
p_i_0 = nn_p_i_0 * nn_c_p_i + nn_p_i_1 * (1 - nn_c_p_i)
|
||||
p_i_1 = nn_p_i_1 * nn_c_p_i + nn_p_i_0 * (1 - nn_c_p_i)
|
||||
|
||||
p_j_0 = nn_p_j_0 * nn_c_p_j + nn_p_j_1 * (1 - nn_c_p_j)
|
||||
p_j_1 = nn_p_j_1 * nn_c_p_j + nn_p_j_0 * (1 - nn_c_p_j)
|
||||
|
||||
coherence += distance * (p_i_0 * p_j_0 + p_i_1 * p_j_1)
|
||||
coherences.append(coherence / total)
|
||||
if known_i[1] == 0:
|
||||
coherences_0.append(coherence / total)
|
||||
else:
|
||||
coherences_1.append(coherence / total)
|
||||
|
||||
return coherences
|
||||
|
||||
def score(coherences, knowns, distances):
|
||||
# while len(coherences) > 1:
|
||||
# coherences = [(coherences[i] + coherences[i + 1]) / 2 for i in range(0, len(coherences), 2)]
|
||||
# return coherences[0]
|
||||
|
||||
# return median(coherences)
|
||||
# return sum(coherences) / len(coherences)
|
||||
if len(coherences) == 0:
|
||||
return 1.0
|
||||
numerator_0 = 0.0
|
||||
denominator_0 = 0.0
|
||||
numerator_1 = 0.0
|
||||
denominator_1 = 0.0
|
||||
count_0 = 0.0
|
||||
count_1 = 0.0
|
||||
for i in range(len(knowns)):
|
||||
weight = 0
|
||||
for j in range(len(knowns)):
|
||||
weight += distances[knowns[i][0]][knowns[j][0]]
|
||||
print(weight, end=' ')
|
||||
if knowns[i][1] == 0:
|
||||
denominator_0 += weight
|
||||
numerator_0 += weight * coherences[i]
|
||||
count_0 += 1
|
||||
else:
|
||||
denominator_1 += weight
|
||||
numerator_1 += weight * coherences[i]
|
||||
count_1 += 1
|
||||
# print()
|
||||
if count_0 == 0 or count_1 == 0:
|
||||
return 1.0
|
||||
|
||||
# return ((sum(coherences[0]) / len(coherences[0])) + (sum(coherences[1]) / len(coherences[1]))) / 2.0
|
||||
# return (sum(coherences[0]) + sum(coherences[1])) / (len(coherences[0]) + len(coherences[1]))
|
||||
# div_0 = (numerator_0 / denominator_0 if denominator_0 > 0 else 1.0) * 0.5
|
||||
# div_1 = (numerator_1 / denominator_1 if denominator_1 > 0 else 1.0) * 0.5
|
||||
# return div_0 + div_1
|
||||
# aligned = 1.0 - abs(0.5 - max(count_0 / (count_0 + count_1), count_1 / (count_0 + count_1)))
|
||||
# return ((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) * (aligned ** 0.1)
|
||||
# return (((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) + 0.12 * aligned) * (1.0 / 1.12)
|
||||
return (numerator_0 + numerator_1) / (denominator_0 + denominator_1)
|
||||
|
||||
def xor_by_index(knowns, index, reverse=False):
|
||||
mask = 1 << index
|
||||
knowns = knowns[:]
|
||||
for i in range(len(knowns)):
|
||||
known = knowns[i]
|
||||
if known[0] & mask or (not (known[0] & mask) and reverse):
|
||||
knowns[i] = (known[0], known[1] ^ 1)
|
||||
return knowns
|
||||
|
||||
def flip(n, index):
|
||||
return n ^ (1 << index)
|
||||
|
||||
def matrix_from_knowns(knowns, N):
|
||||
S = 2 ** N
|
||||
mat = np.zeros((S, S))
|
||||
val = np.zeros(S)
|
||||
unknowns = set([i for i in range(0, S)])
|
||||
for (i, value) in knowns:
|
||||
mat[i][i] = 1.0
|
||||
val[i] = value
|
||||
unknowns.remove(i)
|
||||
for i in unknowns:
|
||||
mat[i][i] = -1.0
|
||||
for j in range(0, N):
|
||||
mat[i][flip(i,j)] = 1.0 / N
|
||||
return (mat, val)
|
||||
|
||||
def compute_splits(knowns, N):
|
||||
splits = []
|
||||
for i in range(0, N):
|
||||
mask = 1 << i
|
||||
left_0 = 0
|
||||
left_1 = 0
|
||||
right_0 = 0
|
||||
right_1 = 0
|
||||
for (j, value) in knowns:
|
||||
if j & mask == 0:
|
||||
if value == 0:
|
||||
left_0 += 1
|
||||
else:
|
||||
left_1 += 1
|
||||
else:
|
||||
if value == 0:
|
||||
right_0 += 1
|
||||
else:
|
||||
right_1 += 1
|
||||
print((left_0, left_1), (right_0, right_1))
|
||||
left_ratio = min(left_0, left_1) / (left_0 + left_1)
|
||||
right_ratio = min(right_0, right_1) / (right_0 + right_1)
|
||||
# print(left_ratio, right_ratio)
|
||||
splits.append((left_ratio + right_ratio) / 2)
|
||||
return splits
|
||||
|
||||
def compute_coherence(knowns, N):
|
||||
S = 2 ** N
|
||||
# (mat, val) = matrix_from_knowns(knowns, N)
|
||||
# solution = np.linalg.inv(mat).dot(val)
|
||||
# for it in range(0, 1000):
|
||||
# next = np.zeros(len(solution))
|
||||
# for i in range(0, len(solution)):
|
||||
# sum = 0.0
|
||||
# for j in range(0, N):
|
||||
# sum += solution[flip(i,j)]
|
||||
# next[i] = sum / N
|
||||
# solution = next
|
||||
# return 0.0
|
||||
|
||||
# coherence_0 = 0.0
|
||||
# coherence_1 = 0.0
|
||||
# zeros = 0.0
|
||||
# ones = 0.0
|
||||
# lowest = 1.0
|
||||
# print()
|
||||
(mat, val) = matrix_from_knowns(knowns, N)
|
||||
A = np.linalg.inv(mat).dot(val)
|
||||
knowns_nn = []
|
||||
for known_index in range(0, len(knowns)):
|
||||
(mat, val) = matrix_from_knowns(knowns[:known_index] + knowns[known_index + 1:], N)
|
||||
solution = np.linalg.inv(mat).dot(val)
|
||||
(i, value) = knowns[known_index]
|
||||
value_nn = solution[i]
|
||||
knowns_nn.append((i, value_nn))
|
||||
(mat, val) = matrix_from_knowns(knowns_nn, N)
|
||||
B = np.linalg.inv(mat).dot(val)
|
||||
return 1.0 - (sum(abs(A - B)) / len(A))
|
||||
# # print(A)
|
||||
# # print(B)
|
||||
# A_sub_B = A - B
|
||||
# print(A)
|
||||
# print(B)
|
||||
# print(A)
|
||||
# print(B)
|
||||
# print(np.dot(A, B) / len(A))
|
||||
# return 1.0 - (np.dot(A_sub_B, A_sub_B) / len(A))
|
||||
# print(i, value, value_nn, partial)
|
||||
# coherence += ((value * value_nn) + ((1 - value) * (1 - value_nn))) / len(knowns)
|
||||
# if value == 0:
|
||||
# coherence_0 += partial
|
||||
# zeros += 1
|
||||
# else:
|
||||
# coherence_1 += partial
|
||||
# ones += 1
|
||||
# if zeros == 0 or ones == 0:
|
||||
# return 1.0
|
||||
# return 0.5 * coherence_0 / zeros + 0.5 * coherence_1 / ones
|
||||
|
||||
# coherences = np.zeros(S)
|
||||
# (mat, val) = matrix_from_knowns(knowns, N)
|
||||
# solution = np.linalg.inv(mat).dot(val)
|
||||
# print(solution)
|
||||
# for i in range(0, S):
|
||||
# p = solution[i]
|
||||
# coherence = 0.0
|
||||
# for j in range(0, N):
|
||||
# q = solution[flip(i,j)]
|
||||
# coherence += ((p * q) + ((1 - p) * (1 - q))) / N
|
||||
# coherences[i] = coherence
|
||||
# print(coherences)
|
||||
# return sum(coherences) / len(coherences)
|
||||
|
||||
def compute_split_knowns(knowns, N):
|
||||
sum = 0
|
||||
splits = []
|
||||
for i in range(0, N):
|
||||
mask = 1 << i
|
||||
left = []
|
||||
right = []
|
||||
for (j, value) in knowns:
|
||||
k = (j & ((1 << i) - 1)) | ((j & ~((1 << (i + 1)) - 1)) >> 1)
|
||||
masked_known = (k, value)
|
||||
if j & mask == 0:
|
||||
left.append(masked_known)
|
||||
else:
|
||||
right.append(masked_known)
|
||||
left_coherence = compute_coherence(left, N - 1)
|
||||
right_coherence = compute_coherence(right, N - 1)
|
||||
splits.append((left_coherence, right_coherence))
|
||||
sum += min(left_coherence, right_coherence) * (1.0 - abs(left_coherence - right_coherence))
|
||||
# print()
|
||||
# print(splits)
|
||||
# print()
|
||||
return sum / N
|
||||
|
||||
def remove_bit(i, n):
|
||||
return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
|
||||
|
||||
def compute_split_knowns_r(knowns, N):
|
||||
if len(knowns) == 0:
|
||||
raise ValueError('This should never happen')
|
||||
|
||||
hist = np.zeros(N)
|
||||
for i in range(0, N):
|
||||
mask = 1 << i
|
||||
for (j, value) in knowns:
|
||||
if j & mask == 0:
|
||||
hist[i] += 1
|
||||
|
||||
constant_bits = []
|
||||
for i in range(0, N):
|
||||
if hist[i] == 0 or hist[i] == len(knowns):
|
||||
constant_bits.append(i)
|
||||
|
||||
if len(constant_bits) > 0:
|
||||
constant_bits.reverse()
|
||||
for n in constant_bits:
|
||||
reduced_knowns = []
|
||||
for (j, value) in knowns:
|
||||
reduced_knowns.append((remove_bit(j, n), value))
|
||||
knowns = reduced_knowns
|
||||
return compute_split_knowns_r(knowns, N - len(constant_bits))
|
||||
|
||||
if len(knowns) == 1:
|
||||
return 1.0
|
||||
if len(knowns) == 2:
|
||||
if knowns[0][1] == knowns[1][1]:
|
||||
return 1.0
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
sum = 0
|
||||
for i in range(0, N):
|
||||
mask = 1 << i
|
||||
left = []
|
||||
right = []
|
||||
for (j, value) in knowns:
|
||||
k = remove_bit(j, i)
|
||||
masked_known = (k, value)
|
||||
if j & mask == 0:
|
||||
left.append(masked_known)
|
||||
else:
|
||||
right.append(masked_known)
|
||||
|
||||
# left_correctness = max(left_0_count, left_1_count) / (left_0_count + left_1_count) if left_0_count > 0 and left_1_count > 0 else 1.0
|
||||
# right_correctness = max(right_0_count, right_1_count) / (right_0_count + right_1_count) if right_0_count > 0 and right_1_count > 0 else 1.0
|
||||
left_coherence = compute_split_knowns_r(left, N - 1)
|
||||
right_coherence = compute_split_knowns_r(right, N - 1)
|
||||
evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0
|
||||
# sum += min(left_coherence, right_coherence) * (evenness ** 2)
|
||||
# delta = 1.0 - ((left_coherence - right_coherence) * (left_coherence - right_coherence))
|
||||
sum += 0.7 * min(left_coherence, right_coherence) + 0.3 * evenness ** 2
|
||||
# sum += min(left_coherence, right_coherence) * (1.0 - abs(left_coherence - right_coherence))
|
||||
return sum / N
|
||||
|
||||
def main():
|
||||
N = 8
|
||||
S = 2 ** N
|
||||
distances = compute_distances(S)
|
||||
|
||||
knowns = [(i, sha_n(i)) for i in [
|
||||
0, 1, 2, 3, 4, 5, 6, 7
|
||||
# 0, 3, 4, 5, 7
|
||||
# 3, 5, 6, 10, 12, 14
|
||||
# 1, 3, 7, 10, 14, 15
|
||||
# 0, 3, 5, 6, 10, 11, 12
|
||||
# 0, 3, 5, 6, 10, 11, 12, 24, 30
|
||||
# 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127
|
||||
# 128, 131, 248, 0, 7, 13, 17, 19
|
||||
]]
|
||||
|
||||
# best_coherence = compute_coherence(knowns, N)
|
||||
best_coherence = compute_split_knowns_r(knowns, N)
|
||||
print(best_coherence)
|
||||
print(knowns)
|
||||
print()
|
||||
while best_coherence < 1.0:
|
||||
best_index = -1
|
||||
best_reverse = False
|
||||
# best_coherence = 0
|
||||
for i in range(0, N):
|
||||
for reverse in [False, True]:
|
||||
mutated_knowns = xor_by_index(knowns, i, reverse)
|
||||
# coherence = compute_coherence(mutated_knowns, N)
|
||||
coherence = compute_split_knowns_r(mutated_knowns, N)
|
||||
print(i, reverse, coherence)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_index = i
|
||||
best_reverse = reverse
|
||||
if best_index < 0:
|
||||
break
|
||||
knowns = xor_by_index(knowns, best_index, best_reverse)
|
||||
print()
|
||||
print(best_index, best_reverse, best_coherence)
|
||||
print(knowns)
|
||||
print()
|
||||
print(knowns)
|
||||
|
||||
# for known_i in knowns:
|
||||
# i = known_i[0]
|
||||
# for known_j in knowns:
|
||||
# j = known_j[0]
|
||||
# print(distances[i][j], end=' ')
|
||||
# print()
|
||||
|
||||
# print(knowns)
|
||||
# print()
|
||||
|
||||
# coherences = compute_est_coherences(N, knowns, distances)
|
||||
# best_coherence = score(coherences, knowns, distances)
|
||||
# print(best_coherence)
|
||||
|
||||
# flipped = []
|
||||
# while best_coherence < 1.0:
|
||||
# print()
|
||||
# # print(knowns)
|
||||
# # print()
|
||||
# best_index = -1
|
||||
# # best_coherence = 0
|
||||
# for i in range(0, n):
|
||||
# if i in flipped:
|
||||
# continue
|
||||
# mutated_knowns = xor_by_index(knowns, i)
|
||||
# coherences = compute_est_coherences(N, mutated_knowns, distances)
|
||||
# coherence = score(coherences, mutated_knowns, distances)
|
||||
# # print(coherence)
|
||||
# print(coherence, end=' ')
|
||||
# print(mutated_knowns)
|
||||
# if coherence > best_coherence:
|
||||
# best_coherence = coherence
|
||||
# best_index = i
|
||||
# if best_index < 0:
|
||||
# break
|
||||
# knowns = xor_by_index(knowns, best_index)
|
||||
# # flipped.append(best_index)
|
||||
# print(knowns)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
208
model_probabilities4.py
Executable file
208
model_probabilities4.py
Executable file
@ -0,0 +1,208 @@
|
||||
import hashlib
|
||||
import math
|
||||
from statistics import median, stdev
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
def count_one_bits(n):
|
||||
return bin(n).count("1")
|
||||
|
||||
def xor_n(n):
|
||||
return count_one_bits(n) % 2
|
||||
|
||||
def sha_n(n):
|
||||
m = hashlib.sha256()
|
||||
m.update(str(n).encode("utf-8"))
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def xor_by_index(knowns, index, reverse=False):
|
||||
mask = 1 << index
|
||||
knowns = knowns[:]
|
||||
for i in range(len(knowns)):
|
||||
known = knowns[i]
|
||||
if known[0] & mask or (not (known[0] & mask) and reverse):
|
||||
knowns[i] = (known[0], known[1] ^ 1)
|
||||
return knowns
|
||||
|
||||
def remove_bit(i, n):
|
||||
return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
|
||||
|
||||
def split_at(knowns, N, i):
|
||||
mask = 1 << i
|
||||
left = [(remove_bit(j, i), value) for (j, value) in knowns if (j & mask) == 0]
|
||||
right = [(remove_bit(j, i), value) for (j, value) in knowns if not (j & mask) == 0]
|
||||
return (left, right)
|
||||
|
||||
def factor_at(knowns, N, i, identity_value=1):
|
||||
mask = 1 << i
|
||||
left = [(j, value) for (j, value) in knowns if value == identity_value or (j & mask) == 0]
|
||||
right = [(j, value) for (j, value) in knowns if value == identity_value or not (j & mask) == 0]
|
||||
return (left, right)
|
||||
|
||||
def compute_coherence(pair, N, depth = 0):
|
||||
(left, right) = pair
|
||||
(left_depth, left_coherence) = compute_split_knowns_r(left, N, depth)
|
||||
(right_depth, right_coherence) = compute_split_knowns_r(right, N, depth)
|
||||
ratio = min(len(left), len(right)) / max(len(left), len(right))
|
||||
# evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0
|
||||
evenness = left_coherence - right_coherence
|
||||
# return 0.75 * min(left_coherence, right_coherence) + 0.25 * evenness ** 2
|
||||
# return 0.8 * min(left_coherence, right_coherence) + 0.2 * evenness ** 2
|
||||
coherence = left_coherence if left_depth > right_depth else right_coherence if right_depth > left_depth else (left_coherence + right_coherence) / 2.0
|
||||
depth = max(left_depth, right_depth)
|
||||
return (depth, 0.9 * coherence + 0.1 * (1.0 - (evenness ** 2)))
|
||||
|
||||
def compute_split_knowns_r(knowns, N, depth = 0):
|
||||
if len(knowns) == 0:
|
||||
return (depth, 1.0)
|
||||
|
||||
hist = np.zeros(N)
|
||||
for i in range(0, N):
|
||||
mask = 1 << i
|
||||
for (j, value) in knowns:
|
||||
if j & mask == 0:
|
||||
hist[i] += 1
|
||||
|
||||
constant_bits = [i for i in range(0, N) if hist[i] == 0 or hist[i] == len(knowns)]
|
||||
if len(constant_bits) > 0:
|
||||
constant_bits.reverse()
|
||||
for n in constant_bits:
|
||||
knowns = [(remove_bit(j, n), value) for (j, value) in knowns]
|
||||
return compute_split_knowns_r(knowns, N - len(constant_bits), depth)
|
||||
|
||||
if len(knowns) == 1:
|
||||
return (depth, 1.0)
|
||||
if len(knowns) == 2:
|
||||
if knowns[0][1] == knowns[1][1]:
|
||||
return (depth, 1.0)
|
||||
else:
|
||||
return (depth, 0.0)
|
||||
|
||||
sum = 0
|
||||
denominator = 0
|
||||
for i in range(0, N):
|
||||
(left, right) = split_at(knowns, N, i)
|
||||
(depth, partial) = compute_coherence((left, right), N, depth + 1)
|
||||
sum += depth * partial
|
||||
denominator += depth
|
||||
return (depth, sum / denominator)
|
||||
|
||||
def invert(knowns):
|
||||
inverted_knowns = []
|
||||
for (i, value) in knowns:
|
||||
inverted_knowns.append((i, 1 - value))
|
||||
return inverted_knowns
|
||||
|
||||
def reduce(knowns, N):
|
||||
flips = []
|
||||
(depth, best_coherence) = compute_split_knowns_r(knowns, N)
|
||||
print(best_coherence)
|
||||
print(knowns)
|
||||
print()
|
||||
while best_coherence < 1.0:
|
||||
best_index = -1
|
||||
best_reverse = False
|
||||
# best_coherence = 0
|
||||
for i in range(0, N):
|
||||
for reverse in [False, True]:
|
||||
mutated_knowns = xor_by_index(knowns, i, reverse)
|
||||
# coherence = compute_coherence(mutated_knowns, N)
|
||||
(depth, coherence) = compute_split_knowns_r(mutated_knowns, N)
|
||||
print(i, reverse, coherence)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_index = i
|
||||
best_reverse = reverse
|
||||
if best_index < 0:
|
||||
break
|
||||
knowns = xor_by_index(knowns, best_index, best_reverse)
|
||||
flips.append((best_index, best_reverse))
|
||||
print()
|
||||
print(best_index, best_reverse, best_coherence)
|
||||
print(knowns)
|
||||
print()
|
||||
return (knowns, best_coherence, flips)
|
||||
|
||||
def solve(knowns, N):
|
||||
(knowns, coherence, flips) = reduce(knowns, N)
|
||||
if coherence == 1.0:
|
||||
inverted = knowns[0][1]
|
||||
return (inverted, flips, None)
|
||||
|
||||
raise Exception('Stop')
|
||||
|
||||
best_coherence = 0
|
||||
best_index = -1
|
||||
best_identity_value = False
|
||||
print()
|
||||
for i in range(0, N):
|
||||
for identity_value in [0, 1]:
|
||||
coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N)
|
||||
print(i, identity_value, coherence)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_index = i
|
||||
best_identity_value = identity_value
|
||||
print()
|
||||
(left, right) = factor_at(knowns, N, best_index, best_identity_value)
|
||||
return (0, flips, (best_identity_value, solve(left, N), solve(right, N)))
|
||||
|
||||
def evaluate(model, n, value = 0):
|
||||
(inverted, flips, child) = model
|
||||
for (i, invert) in flips:
|
||||
mask = (1 << i)
|
||||
masked_n = n & mask
|
||||
if (masked_n > 0 and not invert) or (masked_n == 0 and invert):
|
||||
value = 1 - value
|
||||
if not child is None:
|
||||
(identity, left_child, right_child) = child
|
||||
left = evaluate(left_child, n, 1 - identity)
|
||||
right = evaluate(right_child, n, 1 - identity)
|
||||
if left and right:
|
||||
value = 1 - value
|
||||
if identity == 0:
|
||||
value = 1 - value
|
||||
if inverted:
|
||||
value = 1 - value
|
||||
return value
|
||||
|
||||
def main():
|
||||
N = 8
|
||||
S = 2 ** N
|
||||
train_size = 16
|
||||
test_size = 100
|
||||
f = xor_n
|
||||
|
||||
knowns = [(i, f(i)) for i in [
|
||||
# 0, 1, 2, 3, 4, 5, 6, 7
|
||||
# 0, 3, 4, 5, 7
|
||||
# 3, 5, 6, 10, 12, 14
|
||||
# 1, 3, 7, 10, 14, 15
|
||||
# 0, 3, 5, 6, 10, 11, 12
|
||||
# 0, 3, 5, 6, 10, 11, 12, 24, 30
|
||||
# 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127
|
||||
# 128, 131, 248, 0, 7, 13, 17, 19
|
||||
23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255
|
||||
]]
|
||||
|
||||
# f = xor_n
|
||||
# knowns = []
|
||||
# train_samples = set()
|
||||
# for i in range(0, train_size):
|
||||
# k = random.randint(0, S)
|
||||
# while k in train_samples:
|
||||
# k = random.randint(0, S)
|
||||
# knowns.append((k, f(i)))
|
||||
# train_samples.add(k)
|
||||
|
||||
model = solve(knowns, N)
|
||||
# print(model)
|
||||
correct = 0
|
||||
for i in range(0, test_size):
|
||||
if f(i) == evaluate(model, i):
|
||||
correct += 1
|
||||
print(str(correct) + "/" + str(test_size))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
219
model_probabilities5.py
Executable file
219
model_probabilities5.py
Executable file
@ -0,0 +1,219 @@
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
def count_one_bits(n):
|
||||
return bin(n).count("1")
|
||||
|
||||
def xor_n(n):
|
||||
return count_one_bits(n) % 2
|
||||
|
||||
def sha_n(n):
|
||||
m = hashlib.sha256()
|
||||
m.update(str(n).encode("utf-8"))
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def xor_by_index(knowns, index, reverse=False):
|
||||
mask = 1 << index
|
||||
knowns = knowns[:]
|
||||
for i in range(len(knowns)):
|
||||
known = knowns[i]
|
||||
if known[0] & mask or (not (known[0] & mask) and reverse):
|
||||
knowns[i] = (known[0], known[1] ^ 1)
|
||||
return knowns
|
||||
|
||||
def remove_bit(i, n):
|
||||
return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
|
||||
|
||||
def split_at(knowns, N, i):
|
||||
mask = 1 << i
|
||||
left = [(remove_bit(j, i), value) for (j, value) in knowns if (j & mask) == 0]
|
||||
right = [(remove_bit(j, i), value) for (j, value) in knowns if not (j & mask) == 0]
|
||||
return (left, right)
|
||||
|
||||
def factor_at(knowns, N, i, identity_value=1):
|
||||
mask = 1 << i
|
||||
left = [(j, value) for (j, value) in knowns if value == identity_value or (j & mask) == 0]
|
||||
right = [(j, value) for (j, value) in knowns if value == identity_value or not (j & mask) == 0]
|
||||
return (left, right)
|
||||
|
||||
def compute_coherence(pair, N):
|
||||
(left, right) = pair
|
||||
left_coherence = compute_split_knowns_r(left, N)
|
||||
right_coherence = compute_split_knowns_r(right, N)
|
||||
ratio = min(len(left), len(right)) / max(len(left), len(right))
|
||||
# evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0
|
||||
# evenness = left_coherence - right_coherence
|
||||
evenness = (1.0 - ((1.0 - left_coherence) - (1.0 - right_coherence)) ** 2)
|
||||
# return 0.75 * min(left_coherence, right_coherence) + 0.25 * evenness ** 2
|
||||
# return 0.8 * min(left_coherence, right_coherence) + 0.2 * evenness ** 2
|
||||
# coherence = left_coherence if left_depth > right_depth else right_coherence if right_depth > left_depth else (left_coherence + right_coherence) / 2.0
|
||||
# depth = max(left_depth, right_depth)
|
||||
# return (depth, 0.9 * coherence + 0.1 * (1.0 - (evenness ** 2)))
|
||||
# return 0.8 * min(left_coherence, right_coherence) + 0.2 * (1.0 - (evenness ** 2))
|
||||
# return 0.75 * min(left_coherence, right_coherence) + 0.25 * (evenness ** 2)
|
||||
# return ((left_coherence * len(left) + right_coherence * len(right)) / (len(left) +len(right))) * min(left_coherence, right_coherence) * evenness
|
||||
# return min(left_coherence, right_coherence) * (evenness ** 2)
|
||||
coherence = ((len(left) / (len(left) + len(right))) * left_coherence + (len(right) / (len(left) + len(right))) * right_coherence)
|
||||
return min(left_coherence, right_coherence) * (evenness ** 2)
|
||||
|
||||
def compute_split_knowns_r(knowns, N):
|
||||
# if len(knowns) == 0:
|
||||
# return 1.0
|
||||
|
||||
# hist = np.zeros(N)
|
||||
# for i in range(0, N):
|
||||
# mask = 1 << i
|
||||
# for (j, value) in knowns:
|
||||
# if j & mask == 0:
|
||||
# hist[i] += 1
|
||||
|
||||
# constant_bits = [i for i in range(0, N) if hist[i] == 0 or hist[i] == len(knowns)]
|
||||
# if len(constant_bits) > 0:
|
||||
# constant_bits.reverse()
|
||||
# for n in constant_bits:
|
||||
# knowns = [(remove_bit(j, n), value) for (j, value) in knowns]
|
||||
# return compute_split_knowns_r(knowns, N - len(constant_bits), depth)
|
||||
|
||||
if len(knowns) == 1:
|
||||
return 1.0
|
||||
if len(knowns) == 2:
|
||||
if knowns[0][1] == knowns[1][1]:
|
||||
return 1.0
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
sum = 0
|
||||
denominator = 0
|
||||
for i in range(0, N):
|
||||
(left, right) = split_at(knowns, N, i)
|
||||
weight = min(len(left), len(right)) / max(len(left), len(right))
|
||||
# weight = 1.0 - (abs(len(left) - len(right)) / (len(left) + len(right)))
|
||||
if weight == 0:
|
||||
continue
|
||||
partial = compute_coherence((left, right), N - 1)
|
||||
sum += weight * partial
|
||||
denominator += weight
|
||||
return sum / denominator
|
||||
|
||||
def invert(knowns):
|
||||
inverted_knowns = []
|
||||
for (i, value) in knowns:
|
||||
inverted_knowns.append((i, 1 - value))
|
||||
return inverted_knowns
|
||||
|
||||
def reduce(knowns, N):
|
||||
flips = []
|
||||
best_coherence = compute_split_knowns_r(knowns, N)
|
||||
print(best_coherence)
|
||||
print(knowns)
|
||||
print()
|
||||
while best_coherence < 1.0:
|
||||
best_index = -1
|
||||
best_reverse = False
|
||||
# best_coherence = 0
|
||||
for i in range(0, N):
|
||||
for reverse in [False, True]:
|
||||
mutated_knowns = xor_by_index(knowns, i, reverse)
|
||||
# coherence = compute_coherence(mutated_knowns, N)
|
||||
coherence = compute_split_knowns_r(mutated_knowns, N)
|
||||
print(i, reverse, coherence)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_index = i
|
||||
best_reverse = reverse
|
||||
if best_index < 0:
|
||||
break
|
||||
knowns = xor_by_index(knowns, best_index, best_reverse)
|
||||
flips.append((best_index, best_reverse))
|
||||
print()
|
||||
print(best_index, best_reverse, best_coherence)
|
||||
print(knowns)
|
||||
print()
|
||||
return (knowns, best_coherence, flips)
|
||||
|
||||
def solve(knowns, N):
|
||||
(knowns, coherence, flips) = reduce(knowns, N)
|
||||
if coherence == 1.0:
|
||||
inverted = knowns[0][1]
|
||||
return (inverted, flips, None)
|
||||
|
||||
raise Exception('Stop')
|
||||
|
||||
best_coherence = 0
|
||||
best_index = -1
|
||||
best_identity_value = False
|
||||
print()
|
||||
for i in range(0, N):
|
||||
for identity_value in [0, 1]:
|
||||
coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N)
|
||||
print(i, identity_value, coherence)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_index = i
|
||||
best_identity_value = identity_value
|
||||
print()
|
||||
(left, right) = factor_at(knowns, N, best_index, best_identity_value)
|
||||
return (0, flips, (best_identity_value, solve(left, N), solve(right, N)))
|
||||
|
||||
def evaluate(model, n, value = 0):
|
||||
(inverted, flips, child) = model
|
||||
for (i, invert) in flips:
|
||||
mask = (1 << i)
|
||||
masked_n = n & mask
|
||||
if (masked_n > 0 and not invert) or (masked_n == 0 and invert):
|
||||
value = 1 - value
|
||||
if not child is None:
|
||||
(identity, left_child, right_child) = child
|
||||
left = evaluate(left_child, n, 1 - identity)
|
||||
right = evaluate(right_child, n, 1 - identity)
|
||||
if left and right:
|
||||
value = 1 - value
|
||||
if identity == 0:
|
||||
value = 1 - value
|
||||
if inverted:
|
||||
value = 1 - value
|
||||
return value
|
||||
|
||||
def main():
|
||||
N = 8
|
||||
S = 2 ** N
|
||||
train_size = 128
|
||||
test_size = 100
|
||||
f = xor_n
|
||||
|
||||
knowns = [(i, f(i)) for i in [
|
||||
# 0, 1, 2, 3, 4, 5, 6, 7
|
||||
# 0, 3, 4, 5, 7
|
||||
# 3, 5, 6, 10, 12, 14
|
||||
# 1, 3, 7, 10, 14, 15
|
||||
# 0, 3, 5, 6, 10, 11, 12
|
||||
# 0, 3, 5, 6, 10, 11, 12, 24, 30
|
||||
# 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127
|
||||
128, 131, 248, 0, 7, 13, 17, 19
|
||||
# 23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255
|
||||
]]
|
||||
|
||||
# knowns = []
|
||||
# train_samples = set()
|
||||
# for i in range(0, train_size):
|
||||
# k = random.randint(0, S)
|
||||
# while k in train_samples:
|
||||
# k = random.randint(0, S)
|
||||
# knowns.append((k, f(i)))
|
||||
# train_samples.add(k)
|
||||
|
||||
model = solve(knowns, N)
|
||||
print(model)
|
||||
# print(model)
|
||||
correct = 0
|
||||
for i in range(0, test_size):
|
||||
k = random.randint(0, S - 1)
|
||||
if f(k) == evaluate(model, k):
|
||||
correct += 1
|
||||
print(str(correct) + "/" + str(test_size))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
201
model_probabilities6.py
Executable file
201
model_probabilities6.py
Executable file
@ -0,0 +1,201 @@
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
def count_one_bits(n):
|
||||
return bin(n).count("1")
|
||||
|
||||
def xor_n(n):
|
||||
return count_one_bits(n) % 2
|
||||
|
||||
def sha_n(n):
|
||||
m = hashlib.sha256()
|
||||
m.update(str(n).encode("utf-8"))
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def xor_by_index(knowns, index, reverse=False):
|
||||
mask = 1 << index
|
||||
knowns = knowns[:]
|
||||
for i in range(len(knowns)):
|
||||
(g, j, value) = knowns[i]
|
||||
if j & mask or (not (j & mask) and reverse):
|
||||
knowns[i] = (g, j, value ^ 1)
|
||||
return knowns
|
||||
|
||||
def remove_bit(i, n):
|
||||
return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
|
||||
|
||||
def split_at(knowns, N, i):
|
||||
mask = 1 << i
|
||||
left = [(g, remove_bit(j, i), value) for (g, j, value) in knowns if (j & mask) == 0]
|
||||
right = [(g, remove_bit(j, i), value) for (g, j, value) in knowns if not (j & mask) == 0]
|
||||
return (left, right)
|
||||
|
||||
def factor_at(knowns, N, i, identity_value=1):
|
||||
mask = 1 << i
|
||||
left = [(g, j, value) for (g, j, value) in knowns if value == identity_value or (j & mask) == 0]
|
||||
right = [(g, j, value) for (g, j, value) in knowns if value == identity_value or not (j & mask) == 0]
|
||||
return (left, right)
|
||||
|
||||
def key_for_knowns(knowns):
|
||||
return tuple([g for (g, _, _) in knowns])
|
||||
|
||||
primes = [1, 2, 3, 5, 7, 11, 13, 17, 19, 23]
|
||||
|
||||
def compute_split_knowns_r(knowns, N):
|
||||
stack = [(knowns, N)]
|
||||
numerator = 0.0
|
||||
denominator = 0.0
|
||||
|
||||
while len(stack) > 0:
|
||||
(s, n) = stack.pop()
|
||||
depth = (N - n)
|
||||
weight = depth ** 64
|
||||
|
||||
if len(s) == 1:
|
||||
# numerator += weight
|
||||
# denominator += weight
|
||||
numerator += weight
|
||||
denominator += weight
|
||||
continue
|
||||
if len(s) == 2:
|
||||
(_, a, left_value) = s[0]
|
||||
(_, b, right_value) = s[1]
|
||||
distance = count_one_bits(a ^ b)
|
||||
weight /= (2 ** distance)
|
||||
if left_value == right_value:
|
||||
numerator += weight
|
||||
denominator += weight
|
||||
else:
|
||||
denominator += weight
|
||||
continue
|
||||
|
||||
for i in range(0, n):
|
||||
(left, right) = split_at(s, n, i)
|
||||
if len(left) == 0 or len(right) == 0:
|
||||
continue
|
||||
stack.append((left, n - 1))
|
||||
stack.append((right, n - 1))
|
||||
|
||||
return numerator / denominator
|
||||
|
||||
def invert(knowns):
|
||||
inverted_knowns = []
|
||||
for (i, value) in knowns:
|
||||
inverted_knowns.append((i, 1 - value))
|
||||
return inverted_knowns
|
||||
|
||||
def reduce(knowns, N):
|
||||
flips = []
|
||||
best_coherence = compute_split_knowns_r(knowns, N)
|
||||
print(best_coherence)
|
||||
print(knowns)
|
||||
print()
|
||||
while best_coherence < 1.0:
|
||||
best_index = -1
|
||||
best_reverse = False
|
||||
# best_coherence = 0
|
||||
for i in range(0, N):
|
||||
for reverse in [False, True]:
|
||||
mutated_knowns = xor_by_index(knowns, i, reverse)
|
||||
# coherence = compute_coherence(mutated_knowns, N)
|
||||
coherence = compute_split_knowns_r(mutated_knowns, N)
|
||||
print(i, reverse, coherence)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_index = i
|
||||
best_reverse = reverse
|
||||
if best_index < 0:
|
||||
break
|
||||
knowns = xor_by_index(knowns, best_index, best_reverse)
|
||||
flips.append((best_index, best_reverse))
|
||||
print()
|
||||
print(best_index, best_reverse, best_coherence)
|
||||
print(knowns)
|
||||
print()
|
||||
return (knowns, best_coherence, flips)
|
||||
|
||||
def solve(knowns, N):
|
||||
(knowns, coherence, flips) = reduce(knowns, N)
|
||||
if coherence == 1.0:
|
||||
(_, _, inverted) = knowns[0]
|
||||
return (inverted, flips, None)
|
||||
|
||||
raise Exception('Stop')
|
||||
|
||||
best_coherence = 0
|
||||
best_index = -1
|
||||
best_identity_value = False
|
||||
print()
|
||||
for i in range(0, N):
|
||||
for identity_value in [0, 1]:
|
||||
coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N)
|
||||
print(i, identity_value, coherence)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_index = i
|
||||
best_identity_value = identity_value
|
||||
print()
|
||||
(left, right) = factor_at(knowns, N, best_index, best_identity_value)
|
||||
return (0, flips, (best_identity_value, solve(left, N), solve(right, N)))
|
||||
|
||||
def evaluate(model, n, value = 0):
|
||||
(inverted, flips, child) = model
|
||||
for (i, invert) in flips:
|
||||
mask = (1 << i)
|
||||
masked_n = n & mask
|
||||
if (masked_n > 0 and not invert) or (masked_n == 0 and invert):
|
||||
value = 1 - value
|
||||
if not child is None:
|
||||
(identity, left_child, right_child) = child
|
||||
left = evaluate(left_child, n, 1 - identity)
|
||||
right = evaluate(right_child, n, 1 - identity)
|
||||
if left and right:
|
||||
value = 1 - value
|
||||
if identity == 0:
|
||||
value = 1 - value
|
||||
if inverted:
|
||||
value = 1 - value
|
||||
return value
|
||||
|
||||
def main():
|
||||
N = 8
|
||||
S = 2 ** N
|
||||
train_size = 128
|
||||
test_size = 100
|
||||
f = xor_n
|
||||
|
||||
knowns = [(i, i, f(i)) for i in [
|
||||
# 0, 1, 2, 3, 4, 5, 6, 7
|
||||
# 0, 3, 4, 5, 7
|
||||
# 3, 5, 6, 10, 12, 14
|
||||
# 1, 3, 7, 10, 14, 15
|
||||
# 0, 3, 5, 6, 10, 11, 12
|
||||
0, 3, 5, 6, 10, 11, 12, 24, 30
|
||||
# 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127
|
||||
# 128, 131, 248, 0, 7, 13, 17, 19
|
||||
# 23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255
|
||||
]]
|
||||
|
||||
# knowns = []
|
||||
# train_samples = set()
|
||||
# for i in range(0, train_size):
|
||||
# k = random.randint(0, S)
|
||||
# while k in train_samples:
|
||||
# k = random.randint(0, S)
|
||||
# knowns.append((k, f(i)))
|
||||
# train_samples.add(k)
|
||||
|
||||
model = solve(knowns, N)
|
||||
# print(model)
|
||||
correct = 0
|
||||
for i in range(0, test_size):
|
||||
k = random.randint(0, S - 1)
|
||||
if f(k) == evaluate(model, k):
|
||||
correct += 1
|
||||
print(str(correct) + "/" + str(test_size))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
249
model_probabilities7.py
Executable file
249
model_probabilities7.py
Executable file
@ -0,0 +1,249 @@
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
def count_one_bits(n):
|
||||
return bin(n).count("1")
|
||||
|
||||
def xor_n(n):
|
||||
return count_one_bits(n) % 2
|
||||
|
||||
def sha_n(n):
|
||||
m = hashlib.sha256()
|
||||
m.update(str(n).encode("utf-8"))
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def xor_by_index(knowns, index, reverse=False):
|
||||
mask = 1 << index
|
||||
knowns = knowns[:]
|
||||
for i in range(len(knowns)):
|
||||
known = knowns[i]
|
||||
if known[0] & mask or (not (known[0] & mask) and reverse):
|
||||
knowns[i] = (known[0], known[1] ^ 1)
|
||||
return knowns
|
||||
|
||||
def remove_bit(i, n):
|
||||
return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
|
||||
|
||||
def split_at(knowns, N, i):
|
||||
mask = 1 << i
|
||||
left = [(remove_bit(j, i), value) for (j, value) in knowns if (j & mask) == 0]
|
||||
right = [(remove_bit(j, i), value) for (j, value) in knowns if not (j & mask) == 0]
|
||||
return (left, right)
|
||||
|
||||
def factor_at(knowns, N, i, identity_value=1):
|
||||
mask = 1 << i
|
||||
left = [(j, value) for (j, value) in knowns if value == identity_value or (j & mask) == 0]
|
||||
right = [(j, value) for (j, value) in knowns if value == identity_value or not (j & mask) == 0]
|
||||
return (left, right)
|
||||
|
||||
def span(s, N):
|
||||
lower_bound = (1 << N) - 1
|
||||
upper_bound = 0
|
||||
for (x, _) in s:
|
||||
upper_bound |= x
|
||||
lower_bound &= x
|
||||
return 2 ** count_one_bits(lower_bound ^ upper_bound)
|
||||
|
||||
def compute_coherence(pair, N):
|
||||
(left, right) = pair
|
||||
left_coherence = compute_split_knowns_r(left, N)
|
||||
right_coherence = compute_split_knowns_r(right, N)
|
||||
|
||||
ratio = min(len(left), len(right)) / max(len(left), len(right))
|
||||
# evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0
|
||||
# evenness = left_coherence - right_coherence
|
||||
evenness = (1.0 - ((1.0 - left_coherence) - (1.0 - right_coherence)) ** 2)
|
||||
# return 0.75 * min(left_coherence, right_coherence) + 0.25 * evenness ** 2
|
||||
# return 0.8 * min(left_coherence, right_coherence) + 0.2 * evenness ** 2
|
||||
# coherence = left_coherence if left_depth > right_depth else right_coherence if right_depth > left_depth else (left_coherence + right_coherence) / 2.0
|
||||
# depth = max(left_depth, right_depth)
|
||||
# return (depth, 0.9 * coherence + 0.1 * (1.0 - (evenness ** 2)))
|
||||
# return 0.8 * min(left_coherence, right_coherence) + 0.2 * (1.0 - (evenness ** 2))
|
||||
# return 0.75 * min(left_coherence, right_coherence) + 0.25 * (evenness ** 2)
|
||||
# return ((left_coherence * len(left) + right_coherence * len(right)) / (len(left) +len(right))) * min(left_coherence, right_coherence) * evenness
|
||||
# return min(left_coherence, right_coherence) * (evenness ** 2)
|
||||
# coherence = ((len(left) / (len(left) + len(right))) * left_coherence + (len(right) / (len(left) + len(right))) * right_coherence)
|
||||
# return min(left_coherence, right_coherence) * (evenness ** 2)
|
||||
span_left = span(left, N)
|
||||
span_right = span(right, N)
|
||||
weighted_left_coherence = span_left * left_coherence / (span_left + span_right)
|
||||
weighted_right_coherence = span_right * right_coherence / (span_left + span_right)
|
||||
return (weighted_left_coherence + weighted_right_coherence) * (evenness ** 2)
|
||||
|
||||
def compute_split_knowns_r(knowns, N):
|
||||
# if len(knowns) == 0:
|
||||
# return 1.0
|
||||
|
||||
# hist = np.zeros(N)
|
||||
# for i in range(0, N):
|
||||
# mask = 1 << i
|
||||
# for (j, value) in knowns:
|
||||
# if j & mask == 0:
|
||||
# hist[i] += 1
|
||||
|
||||
# constant_bits = [i for i in range(0, N) if hist[i] == 0 or hist[i] == len(knowns)]
|
||||
# if len(constant_bits) > 0:
|
||||
# constant_bits.reverse()
|
||||
# for n in constant_bits:
|
||||
# knowns = [(remove_bit(j, n), value) for (j, value) in knowns]
|
||||
# return compute_split_knowns_r(knowns, N - len(constant_bits), depth)
|
||||
|
||||
if len(knowns) == 1:
|
||||
return 1.0
|
||||
if len(knowns) == 2:
|
||||
if knowns[0][1] == knowns[1][1]:
|
||||
return 1.0
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
sum = 0
|
||||
denominator = 0
|
||||
for i in range(0, N):
|
||||
(left, right) = split_at(knowns, N, i)
|
||||
if len(left) == 0 or len(right) == 0:
|
||||
continue
|
||||
weight = min(span(left, N), span(right, N))
|
||||
# weight = max(span(left, N), span(right, N)) / min(span(left, N), span(right, N))
|
||||
# weight = 1.0 - (abs(len(left) - len(right)) / (len(left) + len(right)))
|
||||
if weight == 0:
|
||||
continue
|
||||
partial = compute_coherence((left, right), N - 1)
|
||||
sum += weight * partial
|
||||
denominator += weight
|
||||
return sum / denominator
|
||||
|
||||
def invert(knowns):
|
||||
inverted_knowns = []
|
||||
for (i, value) in knowns:
|
||||
inverted_knowns.append((i, 1 - value))
|
||||
return inverted_knowns
|
||||
|
||||
def reduce(knowns, N):
|
||||
flips = []
|
||||
best_coherence = compute_split_knowns_r(knowns, N)
|
||||
print(best_coherence)
|
||||
print(knowns)
|
||||
print()
|
||||
while best_coherence < 1.0:
|
||||
best_index = -1
|
||||
best_reverse = False
|
||||
# best_coherence = 0
|
||||
for i in range(0, N):
|
||||
for reverse in [False, True]:
|
||||
mutated_knowns = xor_by_index(knowns, i, reverse)
|
||||
# coherence = compute_coherence(mutated_knowns, N)
|
||||
coherence = compute_split_knowns_r(mutated_knowns, N)
|
||||
print(i, reverse, coherence)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_index = i
|
||||
best_reverse = reverse
|
||||
if best_index < 0:
|
||||
break
|
||||
knowns = xor_by_index(knowns, best_index, best_reverse)
|
||||
flips.append((best_index, best_reverse))
|
||||
print()
|
||||
print(best_index, best_reverse, best_coherence)
|
||||
print(knowns)
|
||||
print()
|
||||
return (knowns, best_coherence, flips)
|
||||
|
||||
def solve(knowns, N):
|
||||
(knowns, coherence, flips) = reduce(knowns, N)
|
||||
if coherence == 1.0:
|
||||
inverted = knowns[0][1]
|
||||
return (inverted, flips, None)
|
||||
|
||||
raise Exception('Stop')
|
||||
|
||||
best_coherence = 0
|
||||
best_index = -1
|
||||
best_identity_value = False
|
||||
print()
|
||||
for i in range(0, N):
|
||||
for identity_value in [0, 1]:
|
||||
coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N)
|
||||
print(i, identity_value, coherence)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_index = i
|
||||
best_identity_value = identity_value
|
||||
print()
|
||||
(left, right) = factor_at(knowns, N, best_index, best_identity_value)
|
||||
return (0, flips, (best_identity_value, solve(left, N), solve(right, N)))
|
||||
|
||||
def evaluate(model, n, value = 0):
|
||||
(inverted, flips, child) = model
|
||||
for (i, invert) in flips:
|
||||
mask = (1 << i)
|
||||
masked_n = n & mask
|
||||
if (masked_n > 0 and not invert) or (masked_n == 0 and invert):
|
||||
value = 1 - value
|
||||
if not child is None:
|
||||
(identity, left_child, right_child) = child
|
||||
left = evaluate(left_child, n, 1 - identity)
|
||||
right = evaluate(right_child, n, 1 - identity)
|
||||
if left and right:
|
||||
value = 1 - value
|
||||
if identity == 0:
|
||||
value = 1 - value
|
||||
if inverted:
|
||||
value = 1 - value
|
||||
return value
|
||||
|
||||
def run_for_input(input):
|
||||
N = 8
|
||||
S = 2 ** N
|
||||
train_size = 128
|
||||
test_size = 100
|
||||
f = xor_n
|
||||
|
||||
knowns = [(i, f(i)) for i in input]
|
||||
|
||||
# knowns = []
|
||||
# train_samples = set()
|
||||
# for i in range(0, train_size):
|
||||
# k = random.randint(0, S)
|
||||
# while k in train_samples:
|
||||
# k = random.randint(0, S)
|
||||
# knowns.append((k, f(i)))
|
||||
# train_samples.add(k)
|
||||
|
||||
model = solve(knowns, N)
|
||||
print(model)
|
||||
# print(model)
|
||||
correct = 0
|
||||
for i in range(0, test_size):
|
||||
k = random.randint(0, S - 1)
|
||||
if f(k) == evaluate(model, k):
|
||||
correct += 1
|
||||
print(str(correct) + "/" + str(test_size))
|
||||
|
||||
def run():
|
||||
inputs = [
|
||||
# [0, 1, 2, 3, 4, 5, 6, 7],
|
||||
# [0, 3, 4, 5, 7],
|
||||
# [3, 5, 6, 10, 12, 14],
|
||||
# [1, 3, 7, 10, 14, 15],
|
||||
# [0, 3, 5, 6, 10, 11, 12],
|
||||
# [0, 3, 5, 6, 10, 11, 12, 24, 30],
|
||||
[0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127],
|
||||
# [128, 131, 248, 0, 7, 13, 17, 19],
|
||||
# [23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255]
|
||||
]
|
||||
results = []
|
||||
for i, input in enumerate(inputs):
|
||||
success = False
|
||||
try:
|
||||
run_for_input(input)
|
||||
success = True
|
||||
except:
|
||||
pass
|
||||
results.append(success)
|
||||
print(results)
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
219
model_probabilities8.py
Executable file
219
model_probabilities8.py
Executable file
@ -0,0 +1,219 @@
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
def count_one_bits(n):
|
||||
return bin(n).count("1")
|
||||
|
||||
def xor_n(n):
|
||||
return count_one_bits(n) % 2
|
||||
|
||||
def sha_n(n):
|
||||
m = hashlib.sha256()
|
||||
m.update(str(n).encode("utf-8"))
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def xor_by_index(knowns, index, reverse=False):
|
||||
mask = 1 << index
|
||||
knowns = knowns[:]
|
||||
for i in range(len(knowns)):
|
||||
known = knowns[i]
|
||||
if known[0] & mask or (not (known[0] & mask) and reverse):
|
||||
knowns[i] = (known[0], known[1] ^ 1)
|
||||
return knowns
|
||||
|
||||
def remove_bit(i, n):
|
||||
return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
|
||||
|
||||
def split_at(knowns, N, i):
|
||||
mask = 1 << i
|
||||
left = [(remove_bit(j, i), value) for (j, value) in knowns if (j & mask) == 0]
|
||||
right = [(remove_bit(j, i), value) for (j, value) in knowns if not (j & mask) == 0]
|
||||
return (left, right)
|
||||
|
||||
def factor_at(knowns, N, i, identity_value=1):
|
||||
mask = 1 << i
|
||||
left = [(j, value) for (j, value) in knowns if value == identity_value or (j & mask) == 0]
|
||||
right = [(j, value) for (j, value) in knowns if value == identity_value or not (j & mask) == 0]
|
||||
return (left, right)
|
||||
|
||||
def compute_coherence(pair, N):
|
||||
(left, right) = pair
|
||||
left_coherence = compute_split_knowns_r(left, N)
|
||||
right_coherence = compute_split_knowns_r(right, N)
|
||||
ratio = min(len(left), len(right)) / max(len(left), len(right))
|
||||
# evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0
|
||||
# evenness = left_coherence - right_coherence
|
||||
evenness = (1.0 - ((1.0 - left_coherence) - (1.0 - right_coherence)) ** 2)
|
||||
# return 0.75 * min(left_coherence, right_coherence) + 0.25 * evenness ** 2
|
||||
# return 0.8 * min(left_coherence, right_coherence) + 0.2 * evenness ** 2
|
||||
# coherence = left_coherence if left_depth > right_depth else right_coherence if right_depth > left_depth else (left_coherence + right_coherence) / 2.0
|
||||
# depth = max(left_depth, right_depth)
|
||||
# return (depth, 0.9 * coherence + 0.1 * (1.0 - (evenness ** 2)))
|
||||
# return 0.8 * min(left_coherence, right_coherence) + 0.2 * (1.0 - (evenness ** 2))
|
||||
# return 0.75 * min(left_coherence, right_coherence) + 0.25 * (evenness ** 2)
|
||||
# return ((left_coherence * len(left) + right_coherence * len(right)) / (len(left) +len(right))) * min(left_coherence, right_coherence) * evenness
|
||||
# return min(left_coherence, right_coherence) * (evenness ** 2)
|
||||
coherence = ((len(left) / (len(left) + len(right))) * left_coherence + (len(right) / (len(left) + len(right))) * right_coherence)
|
||||
return min(left_coherence, right_coherence) * (evenness ** 2)
|
||||
|
||||
def compute_split_knowns_r(knowns, N):
|
||||
# if len(knowns) == 0:
|
||||
# return 1.0
|
||||
|
||||
# hist = np.zeros(N)
|
||||
# for i in range(0, N):
|
||||
# mask = 1 << i
|
||||
# for (j, value) in knowns:
|
||||
# if j & mask == 0:
|
||||
# hist[i] += 1
|
||||
|
||||
# constant_bits = [i for i in range(0, N) if hist[i] == 0 or hist[i] == len(knowns)]
|
||||
# if len(constant_bits) > 0:
|
||||
# constant_bits.reverse()
|
||||
# for n in constant_bits:
|
||||
# knowns = [(remove_bit(j, n), value) for (j, value) in knowns]
|
||||
# return compute_split_knowns_r(knowns, N - len(constant_bits), depth)
|
||||
|
||||
if len(knowns) == 1:
|
||||
return 1.0
|
||||
if len(knowns) == 2:
|
||||
if knowns[0][1] == knowns[1][1]:
|
||||
return 1.0
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
sum = 0
|
||||
denominator = 0
|
||||
for i in range(0, N):
|
||||
(left, right) = split_at(knowns, N, i)
|
||||
weight = min(len(left), len(right)) / max(len(left), len(right))
|
||||
# weight = 1.0 - (abs(len(left) - len(right)) / (len(left) + len(right)))
|
||||
if weight == 0:
|
||||
continue
|
||||
partial = compute_coherence((left, right), N - 1)
|
||||
sum += weight * partial
|
||||
denominator += weight
|
||||
return sum / denominator
|
||||
|
||||
def invert(knowns):
|
||||
inverted_knowns = []
|
||||
for (i, value) in knowns:
|
||||
inverted_knowns.append((i, 1 - value))
|
||||
return inverted_knowns
|
||||
|
||||
def reduce(knowns, N):
|
||||
flips = []
|
||||
best_coherence = compute_split_knowns_r(knowns, N)
|
||||
print(best_coherence)
|
||||
print(knowns)
|
||||
print()
|
||||
while best_coherence < 1.0:
|
||||
best_index = -1
|
||||
best_reverse = False
|
||||
# best_coherence = 0
|
||||
for i in range(0, N):
|
||||
for reverse in [False, True]:
|
||||
mutated_knowns = xor_by_index(knowns, i, reverse)
|
||||
# coherence = compute_coherence(mutated_knowns, N)
|
||||
coherence = compute_split_knowns_r(mutated_knowns, N)
|
||||
print(i, reverse, coherence)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_index = i
|
||||
best_reverse = reverse
|
||||
if best_index < 0:
|
||||
break
|
||||
knowns = xor_by_index(knowns, best_index, best_reverse)
|
||||
flips.append((best_index, best_reverse))
|
||||
print()
|
||||
print(best_index, best_reverse, best_coherence)
|
||||
print(knowns)
|
||||
print()
|
||||
return (knowns, best_coherence, flips)
|
||||
|
||||
def solve(knowns, N):
|
||||
(knowns, coherence, flips) = reduce(knowns, N)
|
||||
if coherence == 1.0:
|
||||
inverted = knowns[0][1]
|
||||
return (inverted, flips, None)
|
||||
|
||||
raise Exception('Stop')
|
||||
|
||||
best_coherence = 0
|
||||
best_index = -1
|
||||
best_identity_value = False
|
||||
print()
|
||||
for i in range(0, N):
|
||||
for identity_value in [0, 1]:
|
||||
coherence = compute_coherence(factor_at(knowns, N, i, identity_value), N)
|
||||
print(i, identity_value, coherence)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_index = i
|
||||
best_identity_value = identity_value
|
||||
print()
|
||||
(left, right) = factor_at(knowns, N, best_index, best_identity_value)
|
||||
return (0, flips, (best_identity_value, solve(left, N), solve(right, N)))
|
||||
|
||||
def evaluate(model, n, value = 0):
|
||||
(inverted, flips, child) = model
|
||||
for (i, invert) in flips:
|
||||
mask = (1 << i)
|
||||
masked_n = n & mask
|
||||
if (masked_n > 0 and not invert) or (masked_n == 0 and invert):
|
||||
value = 1 - value
|
||||
if not child is None:
|
||||
(identity, left_child, right_child) = child
|
||||
left = evaluate(left_child, n, 1 - identity)
|
||||
right = evaluate(right_child, n, 1 - identity)
|
||||
if left and right:
|
||||
value = 1 - value
|
||||
if identity == 0:
|
||||
value = 1 - value
|
||||
if inverted:
|
||||
value = 1 - value
|
||||
return value
|
||||
|
||||
def main():
|
||||
N = 8
|
||||
S = 2 ** N
|
||||
train_size = 128
|
||||
test_size = 100
|
||||
f = xor_n
|
||||
|
||||
knowns = [(i, f(i)) for i in [
|
||||
# 0, 1, 2, 3, 4, 5, 6, 7
|
||||
# 0, 3, 4, 5, 7
|
||||
# 3, 5, 6, 10, 12, 14
|
||||
# 1, 3, 7, 10, 14, 15
|
||||
# 0, 3, 5, 6, 10, 11, 12
|
||||
# 0, 3, 5, 6, 10, 11, 12, 24, 30
|
||||
# 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127
|
||||
# 128, 131, 248, 0, 7, 13, 17, 19
|
||||
23, 38, 46, 89, 108, 110, 114, 119, 137, 168, 177, 201, 206, 232, 247, 255
|
||||
]]
|
||||
|
||||
# knowns = []
|
||||
# train_samples = set()
|
||||
# for i in range(0, train_size):
|
||||
# k = random.randint(0, S)
|
||||
# while k in train_samples:
|
||||
# k = random.randint(0, S)
|
||||
# knowns.append((k, f(i)))
|
||||
# train_samples.add(k)
|
||||
|
||||
model = solve(knowns, N)
|
||||
print(model)
|
||||
# print(model)
|
||||
correct = 0
|
||||
for i in range(0, test_size):
|
||||
k = random.randint(0, S - 1)
|
||||
if f(k) == evaluate(model, k):
|
||||
correct += 1
|
||||
print(str(correct) + "/" + str(test_size))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
310
model_probabilities9.py
Executable file
310
model_probabilities9.py
Executable file
@ -0,0 +1,310 @@
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
import secrets
|
||||
from struct import pack, pack_into, unpack_from
|
||||
|
||||
def bit_at_index(buffer, index):
|
||||
offset = (index >> 3) % len(buffer)
|
||||
return buffer[offset] & (1 << (index & 0b111)) != 0
|
||||
|
||||
def count_one_bits(n):
|
||||
return bin(n).count("1")
|
||||
|
||||
def hamming_distance(a, b):
|
||||
distance = 0
|
||||
for i in range(0, len(a)):
|
||||
distance += count_one_bits(a[i] ^ b[i])
|
||||
return distance
|
||||
|
||||
def xor_n(n):
|
||||
return count_one_bits(n) % 2
|
||||
|
||||
def sha(x):
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def apply_flips(samples, inputs, flips):
|
||||
samples = samples[:]
|
||||
for i in range(len(samples)):
|
||||
(key, old_value) = samples[i]
|
||||
new_value = old_value
|
||||
for index in flips:
|
||||
if bit_at_index(inputs[key], index):
|
||||
new_value = new_value ^ 1
|
||||
if not new_value == old_value:
|
||||
samples[i] = (key, new_value)
|
||||
return samples
|
||||
|
||||
def coherence_for_knowns(knowns, distances, N):
|
||||
if len(knowns) == 1:
|
||||
return 1.0
|
||||
coherences = []
|
||||
for i in range(0, len(knowns)):
|
||||
(a_key, a_value) = knowns[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(knowns)):
|
||||
if i == j:
|
||||
continue
|
||||
(b_key, b_value) = knowns[j]
|
||||
distance = distances[a_key][b_key]
|
||||
weight = 1.0 / (2 ** distance)
|
||||
denominator += weight
|
||||
if a_value == b_value:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def iterate_indices(indices, N):
|
||||
carry_index = -1
|
||||
for i in range(0, len(indices)):
|
||||
j = len(indices) - i - 1
|
||||
if indices[j] + 1 + i < N:
|
||||
carry_index = j
|
||||
break
|
||||
if carry_index < 0:
|
||||
return None
|
||||
base_value = indices[carry_index]
|
||||
for i in range(0, len(indices) - carry_index):
|
||||
new_value = base_value + i + 1
|
||||
if new_value >= N:
|
||||
return None
|
||||
indices[carry_index + i] = new_value
|
||||
return indices
|
||||
|
||||
def compute_indices(samples, inputs, N):
|
||||
zero_buckets = [False for i in range(0, N)]
|
||||
one_buckets = [False for i in range(0, N)]
|
||||
for (key, _) in samples:
|
||||
for index in range(0, N):
|
||||
if bit_at_index(inputs[key], index):
|
||||
one_buckets[index] = True
|
||||
else:
|
||||
zero_buckets[index] = True
|
||||
return [index for index in range(0, N) if zero_buckets[index] and one_buckets[index]]
|
||||
|
||||
def compute_distances(inputs, distances):
|
||||
for i in range(0, len(inputs)):
|
||||
a = inputs[i]
|
||||
for j in range(i, len(inputs)):
|
||||
b = inputs[j]
|
||||
distance = hamming_distance(a, b) if j != i else 0
|
||||
distances[i][j] = distance
|
||||
distances[j][i] = distance
|
||||
|
||||
def reduce(samples, inputs, distances, N):
|
||||
available_indices = compute_indices(samples, inputs, N)
|
||||
flips = []
|
||||
best_coherence = coherence_for_knowns(samples, distances, N)
|
||||
# print(best_coherence)
|
||||
# print(knowns)
|
||||
# print()
|
||||
depth = 1
|
||||
while depth <= len(available_indices) and depth < 2:
|
||||
while best_coherence < 1.0:
|
||||
best_flip = None
|
||||
try_indices = [i for i in range(0, depth)]
|
||||
while not try_indices is None:
|
||||
try_flip = [available_indices[i] for i in try_indices]
|
||||
mutated_samples = apply_flips(samples, inputs, try_flip)
|
||||
coherence = coherence_for_knowns(mutated_samples, distances, N)
|
||||
# print(try_flip, coherence)
|
||||
if coherence > best_coherence:
|
||||
best_coherence = coherence
|
||||
best_flip = try_flip
|
||||
try_indices = iterate_indices(try_indices, len(available_indices))
|
||||
|
||||
if best_flip is None:
|
||||
depth += 1
|
||||
# print(depth)
|
||||
break
|
||||
samples = apply_flips(samples, inputs, best_flip)
|
||||
flips += best_flip
|
||||
available_indices = [index for index in available_indices if index not in best_flip]
|
||||
depth = 1
|
||||
# print()
|
||||
# print(best_flip, best_coherence)
|
||||
# print(knowns)
|
||||
# print()
|
||||
# print(depth)
|
||||
if len(available_indices) == 0:
|
||||
break
|
||||
if best_coherence == 1.0:
|
||||
break
|
||||
return (samples, best_coherence, flips)
|
||||
|
||||
def dominant_value(knowns, M=2):
|
||||
buckets = [0 for i in range(0, M)]
|
||||
for (_, value) in knowns:
|
||||
buckets[value] += 1
|
||||
return buckets.index(max(buckets))
|
||||
|
||||
def solve(samples, inputs, distances, N):
|
||||
(samples, coherence, flips) = reduce(samples, inputs, distances, N)
|
||||
if coherence == 1.0:
|
||||
inverted = samples[0][1]
|
||||
return (inverted, flips, None)
|
||||
|
||||
identity = dominant_value(samples)
|
||||
left = [(key, 1) for (key, value) in samples if value != identity]
|
||||
right = [(key, 1) for (key, value) in samples if value != identity]
|
||||
for (key, value) in samples:
|
||||
if value == identity:
|
||||
if random.random() > 0.5:
|
||||
left.append((key, 0))
|
||||
else:
|
||||
right.append((key, 0))
|
||||
|
||||
return (0, flips, (identity, solve(left, inputs, distances, N), solve(right, inputs, distances, N)))
|
||||
|
||||
def evaluate(model, x, value = 0):
|
||||
(inverted, flips, child) = model
|
||||
for i in flips:
|
||||
if bit_at_index(x, i) != 0:
|
||||
value ^= 1
|
||||
if not child is None:
|
||||
(identity, left_child, right_child) = child
|
||||
left = evaluate(left_child, x)
|
||||
right = evaluate(right_child, x)
|
||||
if left & right != identity:
|
||||
value ^= 1
|
||||
if inverted:
|
||||
value ^= 1
|
||||
return value
|
||||
|
||||
def transform(x, layers):
|
||||
x[0] = 0
|
||||
for layer in layers:
|
||||
prefix = 0
|
||||
for i in range(0, len(layer)):
|
||||
model = layer[i]
|
||||
value = evaluate(model, x)
|
||||
prefix <<= 1
|
||||
prefix |= value
|
||||
x[0] = prefix
|
||||
|
||||
def encode_f(f, buffer, offset=0):
|
||||
(inverted, flips, residual) = f
|
||||
pack_into('B', buffer, offset, inverted)
|
||||
offset += 1
|
||||
for index in flips:
|
||||
pack_into('B', buffer, offset, 0)
|
||||
offset += 1
|
||||
pack_into('I', buffer, offset, index)
|
||||
offset += 4
|
||||
if residual is None:
|
||||
pack_into('B', buffer, offset, 1)
|
||||
offset += 1
|
||||
return offset
|
||||
(inverted, left, right) = residual
|
||||
pack_into('B', buffer, offset, 2 if not inverted else 3)
|
||||
offset += 1
|
||||
offset = encode_f(left, buffer, offset)
|
||||
offset = encode_f(right, buffer, offset)
|
||||
return offset
|
||||
|
||||
def decode_f(buffer, offset = 0):
|
||||
[inverted] = unpack_from('B', buffer, offset)
|
||||
offset += 1
|
||||
inverted &= 0b1
|
||||
flips = []
|
||||
while offset < len(buffer):
|
||||
[opcode] = unpack_from('B', buffer, offset)
|
||||
offset += 1
|
||||
opcode &= 0b11
|
||||
if opcode == 0:
|
||||
[index] = unpack_from('I', buffer, offset)
|
||||
offset += 4
|
||||
flips.append(index)
|
||||
elif opcode == 1:
|
||||
return (offset, (inverted, flips, None))
|
||||
else:
|
||||
(offset, left) = decode_f(buffer, offset)
|
||||
(offset, right) = decode_f(buffer, offset)
|
||||
gate_inverted = 0 if opcode == 2 else 1
|
||||
return (offset, (gate_inverted, flips, (left, right)))
|
||||
return (offset, (inverted, [], None))
|
||||
|
||||
def random_input():
|
||||
return bytearray(1) + secrets.token_bytes(3)
|
||||
|
||||
def main():
|
||||
N = 32
|
||||
S = 2 ** N
|
||||
train_size = 64
|
||||
test_size = 1000
|
||||
f = sha
|
||||
num_epochs = 4
|
||||
num_layers = 7
|
||||
layers_samples = []
|
||||
layers = []
|
||||
score = 0.5
|
||||
distances = np.zeros((train_size, train_size))
|
||||
|
||||
for epoch in range(0, num_epochs):
|
||||
layer = []
|
||||
layer_samples = []
|
||||
total_correct = 0.0
|
||||
layer_index = 0
|
||||
total_difficulty = 0
|
||||
difficulty = 0
|
||||
while layer_index < num_layers:
|
||||
inputs = []
|
||||
samples = []
|
||||
raw_samples = []
|
||||
for i in range(0, train_size):
|
||||
x = random_input()
|
||||
y = f(x)
|
||||
transform(x, layers)
|
||||
inputs.append(x)
|
||||
samples.append((i, y))
|
||||
raw_samples.append((x, y))
|
||||
|
||||
compute_distances(inputs, distances)
|
||||
model = solve(samples, inputs, distances, N)
|
||||
# print(model)
|
||||
# encoded = bytearray(1024)
|
||||
# offset = encode_f(model, encoded)
|
||||
# decoded_model = decode_f(encoded)
|
||||
# print()
|
||||
# print(decoded_model)
|
||||
|
||||
# correct = 0
|
||||
# for (x, y) in samples:
|
||||
# if evaluate(model, inputs[x]) == y:
|
||||
# correct += 1
|
||||
# print(str(correct) + "/" + str(train_size))
|
||||
|
||||
correct = 0
|
||||
for _ in range(0, test_size):
|
||||
x = random_input()
|
||||
y = f(x)
|
||||
transform(x, layers)
|
||||
if evaluate(model, x) == y:
|
||||
correct += 1
|
||||
difficulty += 1
|
||||
local_score = correct / test_size
|
||||
if local_score < score - 0.0001 * difficulty:
|
||||
continue
|
||||
# print_score = round(local_score * 10000.0) / 100.0
|
||||
# print('Layer ' + str(layer_index) + ': ' + str(candidates) + ' ' + str(print_score) + '%')
|
||||
layer_index += 1
|
||||
total_correct += correct
|
||||
total_difficulty += difficulty
|
||||
difficulty = 0
|
||||
layer.append(model)
|
||||
layer_samples.append(raw_samples)
|
||||
score = total_correct / (test_size * num_layers)
|
||||
average_difficulty = round(total_difficulty * 100.0 / num_layers) / 100.0
|
||||
print_score = round(score * 10000.0) / 100.0
|
||||
print('Epoch ' + str(epoch) + ': ' + str(average_difficulty) + ' ' + str(print_score) + '%')
|
||||
layers.append(layer)
|
||||
layers_samples.append(layer_samples)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
96
mutations.cl
Normal file
96
mutations.cl
Normal file
@ -0,0 +1,96 @@
|
||||
__kernel void compute_distances(__global const uchar* x, __global float* distances) {
|
||||
int i = get_global_id(0);
|
||||
int j = get_global_id(1);
|
||||
int index = i * get_global_size(1) + j;
|
||||
if (i == j) {
|
||||
distances[index] = 0;
|
||||
return;
|
||||
}
|
||||
float distance = 0;
|
||||
for (int k = 0; k < {N}; k++) {
|
||||
distance += x[i * {N} + k] ^ x[j * {N} + k];
|
||||
}
|
||||
distances[index] = pow(2, -distance);
|
||||
}
|
||||
|
||||
__kernel void evaluate(__global const uchar* program, __global const uchar* x, __global uchar* scratch, __global uchar* y) {
|
||||
int program_index = get_global_id(0) * {MAX_PROGRAM_SIZE} * (1 + {N} + 2);
|
||||
int scratch_index = get_global_id(0) * {MAX_PROGRAM_SIZE};
|
||||
int input_index = get_global_id(1) * {N};
|
||||
int output_index = get_global_id(1);
|
||||
|
||||
scratch[scratch_index] = 0;
|
||||
|
||||
for (int i = 0; i < {MAX_PROGRAM_SIZE}; i++) {
|
||||
uchar output = program[program_index++];
|
||||
|
||||
for (int j = 0; j < {N}; j++) {
|
||||
output += program[program_index++] * x[input_index + j];
|
||||
}
|
||||
int left_index = program[program_index++];
|
||||
int right_index = program[program_index++];
|
||||
|
||||
output += scratch[scratch_index + left_index] * scratch[scratch_index + right_index];
|
||||
output %= {M};
|
||||
|
||||
if (program[program_index] == 255) {
|
||||
y[output_index] = output;
|
||||
return;
|
||||
} else {
|
||||
scratch[scratch_index + i + 1] = output;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void compute_coherences(__global const uchar* y, __global const uchar* z, __global const float* distances, __global float* coherences) {
|
||||
int index = get_global_id(0);
|
||||
int sample_size = get_global_size(0);
|
||||
|
||||
float numerator = 0;
|
||||
float denominator = 0;
|
||||
for (int i = 0; i < sample_size; i++) {
|
||||
int p = z[i] ^ y[index * sample_size + i];
|
||||
for (int j = 0; j < sample_size; j++) {
|
||||
int q = z[j] ^ y[index * sample_size + j];
|
||||
float distance = distances[i * sample_size + j];
|
||||
denominator += distance;
|
||||
if (p == q) {
|
||||
numerator += distance;
|
||||
}
|
||||
}
|
||||
}
|
||||
coherences[index] = numerator / denominator;
|
||||
}
|
||||
|
||||
__kernel void initialize_sort(__global uint* indices, __global uint* offset) {
|
||||
uint index = get_global_id(0);
|
||||
indices[index] = index;
|
||||
if (index == 0) {
|
||||
*offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void increment_offset(__global uint* offset) {
|
||||
uint x = *offset;
|
||||
if (x == 0) {
|
||||
*offset = 1;
|
||||
} else {
|
||||
*offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void sort(__global const float* coherences, __global uint* indices, __global uint* offset) {
|
||||
uint index = get_global_id(0) * 2 + *offset;
|
||||
uint a = indices[index];
|
||||
uint b = indices[index + 1];
|
||||
float coherence_a = coherences[a];
|
||||
float coherence_b = coherences[b];
|
||||
if (coherence_a < coherence_b) {
|
||||
indices[index] = b;
|
||||
indices[index + 1] = a;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void evolve(__global const uchar* program, __global float* coherences) {
|
||||
int index_a = get_global_id(0);
|
||||
}
|
511
mutations.py
Normal file
511
mutations.py
Normal file
@ -0,0 +1,511 @@
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
from struct import pack, pack_into, unpack_from
|
||||
import secrets
|
||||
|
||||
from numpy import hamming
|
||||
|
||||
N = 8
|
||||
|
||||
def bit_at_index(buffer, index):
|
||||
offset = (index >> 3) % len(buffer)
|
||||
return buffer[offset] & (1 << (index & 0b111)) != 0
|
||||
|
||||
def count_one_bits(n):
|
||||
return bin(n).count("1")
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def encode_f(f, buffer, offset=0):
|
||||
(inverted, flips, child) = f
|
||||
pack_into('I', buffer, offset, inverted)
|
||||
offset += 4
|
||||
for index in flips:
|
||||
pack_into('I', buffer, offset, 0)
|
||||
offset += 4
|
||||
pack_into('I', buffer, offset, index)
|
||||
offset += 4
|
||||
if child is None:
|
||||
pack_into('I', buffer, offset, 1)
|
||||
offset += 4
|
||||
return offset
|
||||
(inverted, left, right) = child
|
||||
pack_into('I', buffer, offset, 2 if not inverted else 3)
|
||||
offset += 4
|
||||
offset = encode_f(left, buffer, offset)
|
||||
offset = encode_f(right, buffer, offset)
|
||||
return offset
|
||||
|
||||
def generate_random_branch(p_mutation):
|
||||
global N
|
||||
|
||||
p_add_indices = p_mutation * random.random()
|
||||
p_add_children = p_mutation * random.random()
|
||||
|
||||
inverted = random.randint(0, 1)
|
||||
indices = set()
|
||||
children = []
|
||||
|
||||
# randomly add indices
|
||||
while random.random() < p_add_indices and len(indices) < N:
|
||||
available_indices = [i for i in range(0, N) if i not in indices]
|
||||
if len(available_indices) == 1:
|
||||
indices.add(available_indices[0])
|
||||
continue
|
||||
indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
|
||||
# randomly add children
|
||||
while random.random() < p_add_children:
|
||||
child_inverted = random.randint(0, 1)
|
||||
left = generate_random_branch(p_add_children)
|
||||
right = generate_random_branch(p_add_children)
|
||||
children.append((child_inverted, left, right))
|
||||
return (inverted, indices, children)
|
||||
|
||||
def mutate_f(f, p_mutation):
|
||||
global N
|
||||
(inverted, indices, children) = f
|
||||
mutated_indices = set(indices)
|
||||
mutated_children = children[:]
|
||||
|
||||
p_invert = p_mutation * random.random()
|
||||
p_drop_indices = p_mutation * random.random()
|
||||
p_add_indices = p_mutation * random.random()
|
||||
p_drop_children = p_mutation * random.random()
|
||||
p_mutate_child = p_mutation * random.random()
|
||||
p_clone_child = p_mutation * random.random()
|
||||
p_invert_child = p_mutation * random.random()
|
||||
p_add_children = p_mutation * random.random()
|
||||
|
||||
# randomly invert
|
||||
if random.random() < p_invert:
|
||||
inverted ^= 1
|
||||
# randomly drop indices
|
||||
while random.random() < p_drop_indices and len(mutated_indices) > 0:
|
||||
mutated_indices.pop()
|
||||
# randomly add indices
|
||||
while random.random() < p_add_indices and len(mutated_indices) < N:
|
||||
available_indices = [i for i in range(0, N) if i not in mutated_indices]
|
||||
if len(available_indices) == 1:
|
||||
mutated_indices.add(available_indices[0])
|
||||
continue
|
||||
mutated_indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
|
||||
# randomly drop children
|
||||
while random.random() < p_drop_children and len(mutated_children) > 0:
|
||||
if len(mutated_children) == 1:
|
||||
del mutated_children[0]
|
||||
break
|
||||
del mutated_children[random.randint(0, len(mutated_children) - 1)]
|
||||
# randomly clone children
|
||||
while random.random() < p_clone_child and len(mutated_children) > 0:
|
||||
index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
|
||||
(child_inverted, left, right) = mutated_children[index]
|
||||
if random.random() < p_invert_child:
|
||||
child_inverted ^= 1
|
||||
clone = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
|
||||
mutated_children.append(clone)
|
||||
# randomly mutate children
|
||||
while random.random() < p_mutate_child and len(mutated_children) > 0:
|
||||
index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
|
||||
(child_inverted, left, right) = mutated_children[index]
|
||||
if random.random() < p_invert_child:
|
||||
child_inverted ^= 1
|
||||
mutated_children[index] = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
|
||||
# randomly add children
|
||||
while random.random() < p_add_children:
|
||||
child_inverted = random.randint(0, 1)
|
||||
left = generate_random_branch(p_mutation)
|
||||
right = generate_random_branch(p_mutation)
|
||||
mutated_children.append((child_inverted, left, right))
|
||||
return (inverted, mutated_indices, mutated_children)
|
||||
|
||||
def decode_f(buffer, mutate = False, offset = 0, skip_invert = False):
|
||||
global N
|
||||
inverted = 0
|
||||
if not skip_invert:
|
||||
[inverted] = unpack_from('I', buffer, offset)
|
||||
offset += 4
|
||||
# random invert
|
||||
if mutate and random.random() < 0.01:
|
||||
inverted ^= 1
|
||||
inverted &= 0b1
|
||||
flips = set()
|
||||
# random add flip
|
||||
while mutate and random.random() < 0.5 and len(flips) < N:
|
||||
available_indices = [i for i in range(0, N) if i not in flips]
|
||||
if len(available_indices) == 1:
|
||||
flips.add(available_indices[0])
|
||||
continue
|
||||
flips.add(available_indices[random.randint(0, len(available_indices) - 1)])
|
||||
while offset < len(buffer):
|
||||
# random create branch
|
||||
if mutate and random.random() < 0.01:
|
||||
gate_inverted = random.randint(0, 1)
|
||||
left = generate_random_branch()
|
||||
(offset, right) = decode_f(buffer, mutate, offset, True)
|
||||
return (offset, (inverted, flips, (gate_inverted, left, right)))
|
||||
[opcode] = unpack_from('I', buffer, offset)
|
||||
offset += 4
|
||||
opcode &= 0b11
|
||||
if opcode == 0:
|
||||
[index] = unpack_from('I', buffer, offset)
|
||||
offset += 4
|
||||
# random skip flip
|
||||
if mutate and random.random() < 0.01:
|
||||
continue
|
||||
if index in flips:
|
||||
flips.remove(index)
|
||||
else:
|
||||
flips.add(index)
|
||||
elif opcode == 1:
|
||||
return (offset, (inverted, flips, None))
|
||||
else:
|
||||
(offset, left) = decode_f(buffer, mutate, offset)
|
||||
(offset, right) = decode_f(buffer, mutate, offset)
|
||||
gate_inverted = 0 if opcode == 2 else 1
|
||||
# random invert
|
||||
if mutate and random.random() < 0.01:
|
||||
gate_inverted ^= 1
|
||||
# random skip branch
|
||||
if mutate and random.random() < 0.01:
|
||||
return (offset, (inverted, flips, None))
|
||||
return (offset, (inverted, flips, (gate_inverted, left, right)))
|
||||
return (offset, (inverted, [], None))
|
||||
|
||||
def generate_program(f):
|
||||
statement = ""
|
||||
(inverted, indices, children) = f
|
||||
if inverted:
|
||||
statement += "1^"
|
||||
statement += "("
|
||||
for i in indices:
|
||||
statement += "(x[" + str(i) + ">>3]&(1<<(" + str(i) + "&0b111))!=0)^"
|
||||
for child in children:
|
||||
(gate_inverted, left, right) = child
|
||||
if gate_inverted:
|
||||
statement += "1^"
|
||||
statement += "((" + generate_program(left) + ")&(" + generate_program(right) + "))^"
|
||||
statement += "0)"
|
||||
return statement
|
||||
|
||||
def compile_f(f):
|
||||
program = 'def f(x):\n\treturn ' + generate_program(f)
|
||||
scope = {}
|
||||
exec(program, scope)
|
||||
return scope['f']
|
||||
|
||||
def evaluate(model, x, value = 0):
|
||||
(inverted, indices, children) = model
|
||||
for i in indices:
|
||||
if bit_at_index(x, i) != 0:
|
||||
value ^= 1
|
||||
for child in children:
|
||||
(child_inverted, left, right) = child
|
||||
left = evaluate(left, x)
|
||||
right = evaluate(right, x)
|
||||
if left & right != child_inverted:
|
||||
value ^= 1
|
||||
if inverted:
|
||||
value ^= 1
|
||||
return value
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(N / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(x)
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def xor(x):
|
||||
num_one_bits = 0
|
||||
for n in x:
|
||||
num_one_bits += count_one_bits(n)
|
||||
return num_one_bits % 2
|
||||
|
||||
def random_sample(m, n):
|
||||
inputs = np.zeros((m, n))
|
||||
for i in range(0, m):
|
||||
for j in range(0, n):
|
||||
inputs[i][j] = random.randint(0, 1)
|
||||
return inputs
|
||||
|
||||
def update_sample(sample, index):
|
||||
global N
|
||||
for j in range(0, N):
|
||||
sample[index][j] = random.randint(0, 1)
|
||||
|
||||
def coherence(inputs, outputs):
|
||||
coherences = []
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
y_a = outputs[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = inputs[j]
|
||||
y_b = outputs[j]
|
||||
distance = hamming_distance(x_a, x_b)
|
||||
weight = 1.0 / (2 ** distance)
|
||||
denominator += weight
|
||||
if y_a == y_b:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def score(f, sample, distances):
|
||||
return coherence([(x, f(x) ^ y) for (x, y) in sample], distances)
|
||||
|
||||
def compute_distances(inputs, distances, scratch):
|
||||
for i in range(0, len(inputs)):
|
||||
a = inputs[i]
|
||||
for j in range(i, len(inputs)):
|
||||
if i == j:
|
||||
distances[i][j] = 0
|
||||
continue
|
||||
b = inputs[j]
|
||||
distance = 2 ** -hamming_distance(a, b, scratch)
|
||||
distances[i][j] = distance
|
||||
distances[j][i] = distance
|
||||
|
||||
def update_distances(inputs, distances, i, scratch):
|
||||
a = inputs[i]
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
distances[i][j] = 0
|
||||
continue
|
||||
b = inputs[j]
|
||||
distance = 2 ** -hamming_distance(a, b, scratch)
|
||||
distances[i][j] = distance
|
||||
distances[j][i] = distance
|
||||
|
||||
def evaluate_sample(model, sample, output):
|
||||
stack = [model]
|
||||
(_, _, _, root_scratch, _) = model
|
||||
while len(stack) > 0:
|
||||
layer = stack.pop()
|
||||
(inverted, xors, child, scratch, touched) = layer
|
||||
if child is None:
|
||||
np.matmul(sample, xors, scratch)
|
||||
np.mod(scratch, 2, scratch)
|
||||
if inverted == 1:
|
||||
np.logical_xor(1, scratch, scratch)
|
||||
touched[0] = 1
|
||||
else:
|
||||
(child_inverted, left, right) = child
|
||||
(_, _, _, left_scratch, left_touched) = left
|
||||
(_, _, _, right_scratch, right_touched) = right
|
||||
if left_touched[0] and right_touched[0]:
|
||||
np.multiply(left_scratch, right_scratch, output)
|
||||
np.matmul(sample, xors, scratch)
|
||||
np.mod(scratch, 2, scratch)
|
||||
if inverted:
|
||||
np.logical_xor(scratch, 1, scratch)
|
||||
if child_inverted:
|
||||
np.logical_xor(output, 1, output)
|
||||
np.logical_xor(scratch, output, scratch)
|
||||
touched[0] = 1
|
||||
else:
|
||||
stack.insert(0, layer)
|
||||
stack.insert(0, left)
|
||||
stack.insert(0, right)
|
||||
np.copyto(output, root_scratch)
|
||||
reset_model(model)
|
||||
|
||||
def reset_model(model):
|
||||
stack = [model]
|
||||
while len(stack) > 0:
|
||||
layer = stack.pop()
|
||||
(_, _, child, _, touched) = layer
|
||||
touched[0] = 0
|
||||
if not child is None:
|
||||
(_, left, right) = child
|
||||
stack.append(left)
|
||||
stack.append(right)
|
||||
|
||||
def clone_model(model, p_mutation):
|
||||
global N
|
||||
|
||||
p_invert = p_mutation * random.random()
|
||||
p_invert_child = p_mutation * random.random()
|
||||
p_flip = p_mutation * random.random()
|
||||
p_add_child = p_mutation * random.random()
|
||||
# p_drop_child = p_mutation * random.random() * 0.5
|
||||
p_drop_child = 0
|
||||
|
||||
(inverted, xors, child, scratch, touched) = model
|
||||
if random.random() < p_invert:
|
||||
inverted ^= 1
|
||||
clone_xors = np.zeros((N,))
|
||||
np.copyto(clone_xors, xors)
|
||||
for i in range(0, N):
|
||||
if random.random() < p_flip:
|
||||
clone_xors[i] = int(clone_xors[i]) ^ 1
|
||||
clone_scratch = np.zeros(np.shape(scratch))
|
||||
clone_touched = np.zeros(np.shape(touched))
|
||||
if child is None:
|
||||
if random.random() < p_add_child:
|
||||
sample_size = len(scratch)
|
||||
child_inverted = random.randint(0, 1)
|
||||
left = random_child(sample_size, p_mutation)
|
||||
right = random_child(sample_size, p_mutation)
|
||||
return (inverted, clone_xors, (child_inverted, left, right), clone_scratch, clone_touched)
|
||||
return (inverted, clone_xors, None, clone_scratch, clone_touched)
|
||||
if random.random() < p_drop_child:
|
||||
return (inverted, clone_xors, None, clone_scratch, clone_touched)
|
||||
(child_inverted, left, right) = child
|
||||
if random.random() < p_invert_child:
|
||||
inverted ^= 1
|
||||
clone_left = clone_model(left, p_mutation)
|
||||
clone_right = clone_model(right, p_mutation)
|
||||
return (inverted, clone_xors, (child_inverted, clone_left, clone_right), clone_scratch, clone_touched)
|
||||
|
||||
def random_child(sample_size, p_mutation):
|
||||
global N
|
||||
inverted = random.randint(0, 1)
|
||||
xors = np.zeros((N,))
|
||||
scratch = np.zeros((sample_size,))
|
||||
touched = np.zeros((1,))
|
||||
|
||||
p_flip = p_mutation * random.random()
|
||||
p_child = p_mutation * random.random()
|
||||
|
||||
index = random.randint(0, N - 1)
|
||||
xors[index] = 1
|
||||
for i in range(0, N):
|
||||
if random.random() < p_flip:
|
||||
xors[i] = 1
|
||||
# if random.random() < p_child:
|
||||
# child_inverted = random.randint(0, 1)
|
||||
# left = random_child(sample_size, p_mutation * random.random())
|
||||
# right = random_child(sample_size, p_mutation * random.random())
|
||||
# return (inverted, xors, (child_inverted, left, right), scratch, touched)
|
||||
return (inverted, xors, None, scratch, touched)
|
||||
|
||||
def size(model):
|
||||
(_, xors, child, _, _) = model
|
||||
xor_size = np.sum(xors)
|
||||
if not child is None:
|
||||
(_, left, right) = child
|
||||
return xor_size + size(left) * size(right)
|
||||
return xor_size
|
||||
|
||||
def null_candidate(sample_size):
|
||||
global N
|
||||
return (0, np.zeros((N,)), None, np.zeros((sample_size,)), np.zeros((1,)))
|
||||
|
||||
def main():
|
||||
global N
|
||||
epochs = 10000
|
||||
num_survivors = 100
|
||||
num_offspring = 10
|
||||
num_candidates = num_survivors + num_survivors * num_offspring
|
||||
sample_size = 32
|
||||
eval_size = 100
|
||||
p_mutation = 0.5
|
||||
g = sha
|
||||
current_generation = [null_candidate(sample_size) for _ in range(0, num_candidates)]
|
||||
|
||||
distances = np.zeros((sample_size, sample_size))
|
||||
output_equality = np.zeros((sample_size, sample_size))
|
||||
inputs = random_sample(sample_size, N)
|
||||
scratch = np.zeros(N,)
|
||||
compute_distances(inputs, distances, scratch)
|
||||
expected_outputs = np.zeros((sample_size,))
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
outputs = np.zeros((sample_size,))
|
||||
output_xor = np.zeros((sample_size,))
|
||||
ones = np.ones((sample_size,))
|
||||
numerators = np.zeros((sample_size,))
|
||||
denominators = np.zeros((sample_size,))
|
||||
coherences = np.zeros((sample_size,))
|
||||
np.matmul(ones, distances, denominators)
|
||||
scores = np.zeros((num_candidates,))
|
||||
max_score = 0
|
||||
last_score = 0
|
||||
streak = 0
|
||||
|
||||
for epoch in range(0, epochs):
|
||||
for i in range(0, num_candidates):
|
||||
candidate = current_generation[i]
|
||||
evaluate_sample(candidate, inputs, outputs)
|
||||
np.logical_xor(outputs, expected_outputs, output_xor)
|
||||
for p in range(0, sample_size):
|
||||
for q in range(0, sample_size):
|
||||
m = int(output_xor[p])
|
||||
n = int(output_xor[q])
|
||||
output_equality[p][q] = 1 ^ m ^ n
|
||||
np.multiply(output_equality, distances, output_equality)
|
||||
np.matmul(ones, output_equality, numerators)
|
||||
np.divide(numerators, denominators, coherences)
|
||||
score = np.average(coherences)
|
||||
scores[i] = score
|
||||
|
||||
top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
|
||||
survivors = [current_generation[index] for index in top_n]
|
||||
|
||||
# f = lambda x: evaluate(current_generation[0], x)
|
||||
# correct = 0
|
||||
# for i in range(0, eval_size):
|
||||
# x = random_input()
|
||||
# if f(x) == g(x):
|
||||
# correct += 1
|
||||
|
||||
top_score = scores[top_n[-1]]
|
||||
print(epoch, top_score, size(survivors[-1]))
|
||||
if top_score <= max_score:
|
||||
p_mutation += 0.01
|
||||
else:
|
||||
p_mutation = 0.5
|
||||
max_score = top_score
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
current_generation[i] = survivors[i]
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
candidate = survivors[i]
|
||||
for j in range(0, num_offspring):
|
||||
index = num_survivors + j * num_survivors + i
|
||||
current_generation[index] = clone_model(candidate, random.random())
|
||||
|
||||
# while random.random() < 0.5:
|
||||
if last_score == top_score:
|
||||
# streak += 1
|
||||
# else:
|
||||
# streak = 0
|
||||
# if streak >= 4:
|
||||
# streak = 0
|
||||
inputs = random_sample(sample_size, N)
|
||||
compute_distances(inputs, distances, scratch)
|
||||
np.matmul(ones, distances, denominators)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
# expected_outputs = np.zeros((sample_size,))
|
||||
# for i in range(0, sample_size):
|
||||
# expected_outputs[i] = g(inputs[i])
|
||||
# index = random.randint(0, sample_size - 1)
|
||||
# update_sample(inputs, index)
|
||||
# expected_outputs[index] = g(inputs[index])
|
||||
# update_distances(inputs, distances, index, scratch)
|
||||
# np.matmul(ones, distances, denominators)
|
||||
last_score = top_score
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
425
mutations10.py
Normal file
425
mutations10.py
Normal file
@ -0,0 +1,425 @@
|
||||
from enum import unique
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
import time
|
||||
|
||||
N = 8
|
||||
M = 2
|
||||
|
||||
def vec_to_int(x):
|
||||
z = 0
|
||||
for i in range(0, len(x)):
|
||||
z <<= 1
|
||||
z |= x[i]
|
||||
return z
|
||||
|
||||
def timeit(f):
|
||||
def timed(*args, **kw):
|
||||
ts = time.time()
|
||||
result = f(*args, **kw)
|
||||
te = time.time()
|
||||
|
||||
print('func:%r took: %2.4f sec' % (f.__name__, te-ts))
|
||||
return result
|
||||
return timed
|
||||
|
||||
class Candidate:
|
||||
def __init__(self, layer, slots):
|
||||
global N
|
||||
self.layer = layer
|
||||
self.node_count = layer
|
||||
self.offsets = np.zeros((self.node_count, N + 1 + slots)).astype(np.int32)
|
||||
|
||||
class Probabilities:
|
||||
def __init__(self, layer, slots):
|
||||
global N
|
||||
self.layer = layer
|
||||
self.slots = slots
|
||||
self.node_count = layer
|
||||
self.p_offsets = np.zeros((self.node_count, N + 1 + slots))
|
||||
self.p_offsets.fill(0.5)
|
||||
self.offset_coherences = np.zeros((2, self.node_count, N + 1 + slots, 2, self.node_count, N + 1 + slots))
|
||||
self.offset_coherences.fill(-1)
|
||||
self.deltas = np.zeros((self.node_count, N + 1 + slots, 2, self.node_count, N + 1 + slots))
|
||||
|
||||
def has_converged(self):
|
||||
for i in range(0,self.node_count):
|
||||
for j in range(0, len(self.p_offsets[i])):
|
||||
if self.p_offsets[i][j] > 0.05 and self.p_offsets[i][j] < 0.95:
|
||||
return False
|
||||
return True
|
||||
|
||||
def flatten(self):
|
||||
candidate = Candidate(self.layer, self.slots)
|
||||
for i in range(0, self.node_count):
|
||||
for j in range(0, len(self.p_offsets[i])):
|
||||
candidate.offsets[i][j] = 1 if self.p_offsets[i][j] >= 0.5 else 0
|
||||
return candidate
|
||||
|
||||
def clamp(x, min_value = 0.01, max_value = 1):
|
||||
return min(max(x, min_value), max_value)
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
# 00100111 x4
|
||||
# 00000110 x1
|
||||
def sha(v):
|
||||
global M
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] % M
|
||||
|
||||
def xor(x):
|
||||
num_one_bits = 0
|
||||
for i in range(0, len(x)):
|
||||
if i == 0:
|
||||
continue
|
||||
num_one_bits += x[i]
|
||||
return num_one_bits % 2
|
||||
|
||||
|
||||
# 0 ^ 1 ^ (2 ^ (4 * (5 ^ 0 * 7))) * (3 ^ 6 * 7)
|
||||
# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * (5 ^ 0 * 7)) ^ 4 * 6 * 7 * (5 ^ 0 * 7)
|
||||
# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * 5 ^ 0 * 3 * 4 * 7 ^ 4 * 5 * 6 * 7 ^ 0 * 4 * 6 * 7
|
||||
|
||||
# 0 ^ 1 ^ 2*3 ^ 2*6*7 ^ 3*4*5 ^ 0*3*4*7 ^ 4*5*6*7 ^ 0*4*6*7
|
||||
# What about strictly SOP?
|
||||
# That is, 1-Hot of increasing complexity?
|
||||
# How would that work?
|
||||
# Candidate generation could apply some kind of softmax to filter down to one
|
||||
#
|
||||
def test_fn(x):
|
||||
# 0 1
|
||||
# 2 | 3
|
||||
# 4 | 5 | 6 | 7
|
||||
# | | 0 | 7 | | | |
|
||||
return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7])))
|
||||
|
||||
def candidate_fn(x):
|
||||
return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2])
|
||||
|
||||
def true_fn(x):
|
||||
return x[0] ^ x[1] ^ (x[3] * x[2])
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def coherence(outputs, distances):
|
||||
coherences = []
|
||||
for i in range(0, len(outputs)):
|
||||
y_a = outputs[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(outputs)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = outputs[j]
|
||||
weight = distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def random_sample(m, n, layers, g, compute_scratch):
|
||||
inputs = np.zeros((m, n)).astype(np.int32)
|
||||
augmented_inputs = np.zeros((m, n + len(layers) + 1)).astype(np.int32)
|
||||
outputs = np.zeros((m,)).astype(np.int32)
|
||||
for i in range(0, m):
|
||||
for j in range(0, n):
|
||||
val = random.randint(0, 1)
|
||||
inputs[i][j] = val
|
||||
augmented_inputs[i][j] = val
|
||||
outputs[i] = g(inputs[i])
|
||||
augmented_inputs[i][n] = 1
|
||||
for j in range(0, len(layers)):
|
||||
augmented_inputs[i][n + j] = evaluate_candidate(layers[j], augmented_inputs[i], compute_scratch)
|
||||
return (inputs, augmented_inputs, outputs)
|
||||
|
||||
def populate_distances(inputs, distances, scratch):
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, scratch)
|
||||
distances[i][j] = 1.0 / (2 ** distance)
|
||||
|
||||
def evaluate(layers, candidate, x, compute_scratch):
|
||||
z = evaluate_layers(layers, x, compute_scratch)
|
||||
z ^= evaluate_candidate(candidate, x, compute_scratch)
|
||||
return z
|
||||
|
||||
def evaluate_layers(layers, x, compute_scratch):
|
||||
z = 0
|
||||
for layer in layers:
|
||||
z ^= evaluate_candidate(layer, x, compute_scratch)
|
||||
return z
|
||||
|
||||
def evaluate_candidate(candidate, x, compute_scratch):
|
||||
y = 1
|
||||
for j in range(0, candidate.node_count):
|
||||
value = 0
|
||||
compute_scratch.fill(0)
|
||||
compute_scratch[0:len(candidate.offsets[j])] = candidate.offsets[j]
|
||||
np.multiply(compute_scratch, x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
y &= value
|
||||
return y
|
||||
|
||||
@timeit
|
||||
def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch):
|
||||
global M
|
||||
|
||||
for i in range(0, sample_size):
|
||||
outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch)
|
||||
for j in range(1, num_candidates):
|
||||
np.copyto(outputs[j], outputs[0])
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
base_score = coherence(output_xor, distances)
|
||||
|
||||
scores.fill(0)
|
||||
unique_candidates = {}
|
||||
for j in range(0, num_candidates):
|
||||
create_candidate(probabilities, candidates[j])
|
||||
unique_candidates[candidate_str(candidates[j])] = j
|
||||
|
||||
for i in range(0, sample_size):
|
||||
for _, j in unique_candidates.items():
|
||||
candidate = candidates[j]
|
||||
outputs[j][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
|
||||
for _, j in unique_candidates.items():
|
||||
candidate = candidates[j]
|
||||
np.subtract(outputs[j], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
scores[j] = score
|
||||
return base_score
|
||||
|
||||
|
||||
def compute_uplift(candidate, layers, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch):
|
||||
global M
|
||||
|
||||
for i in range(0, sample_size):
|
||||
outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch)
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
base_score = coherence(output_xor, distances)
|
||||
|
||||
for i in range(0, sample_size):
|
||||
outputs[0][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
|
||||
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
return (base_score, score)
|
||||
|
||||
@timeit
|
||||
def update_probabilities(probabilities, candidates, inputs, base_score, scores, scale):
|
||||
num_candidates = len(candidates)
|
||||
|
||||
probabilities.offset_coherences.fill(-1)
|
||||
for p in range(0, num_candidates):
|
||||
candidate = candidates[p]
|
||||
if scores[p] == 0:
|
||||
continue
|
||||
# score = max(scores[p], base_score)
|
||||
score = scores[p]
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, len(candidate.offsets[j])):
|
||||
i = candidate.offsets[j][k]
|
||||
for m in range(0, probabilities.node_count):
|
||||
for n in range(0, len(candidate.offsets[m])):
|
||||
l = candidate.offsets[m][n]
|
||||
probabilities.offset_coherences[i][j][k][l][m][n] = max(score, probabilities.offset_coherences[i][j][k][l][m][n])
|
||||
|
||||
p_offsets_next = np.empty_like(probabilities.p_offsets)
|
||||
inertia = 0
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, len(p_offsets_next[j])):
|
||||
delta = 0
|
||||
count = 0
|
||||
for m in range(0, probabilities.node_count):
|
||||
for n in range(0, len(p_offsets_next[m])):
|
||||
# if j == m and k == n:
|
||||
# continue
|
||||
p_j1_if_m0 = probabilities.offset_coherences[1][j][k][0][m][n]
|
||||
p_j0_if_m0 = probabilities.offset_coherences[0][j][k][0][m][n]
|
||||
p_j1_if_m1 = probabilities.offset_coherences[1][j][k][1][m][n]
|
||||
p_j0_if_m1 = probabilities.offset_coherences[0][j][k][1][m][n]
|
||||
if p_j1_if_m0 >= 0 and p_j0_if_m0 >= 0:
|
||||
# delta_if_m0 = (p_j1_if_m0 - base_score) - (p_j0_if_m0 - base_score)
|
||||
delta_if_m0 = p_j1_if_m0 - p_j0_if_m0
|
||||
delta += delta_if_m0 * (1.0 - probabilities.p_offsets[m][n]) * scale
|
||||
count += 1
|
||||
if p_j1_if_m1 >= 0 and p_j0_if_m1 >= 0:
|
||||
# delta_if_m1 = (p_j1_if_m1 - base_score) - (p_j0_if_m1 - base_score)
|
||||
delta_if_m1 = p_j1_if_m1 - p_j0_if_m1
|
||||
delta += delta_if_m1 * probabilities.p_offsets[m][n] * scale
|
||||
count += 1
|
||||
if count > 0:
|
||||
delta /= count
|
||||
p_offsets_next[j][k] = clamp(probabilities.p_offsets[j][k] + delta, 0, 1)
|
||||
inertia += abs(p_offsets_next[j][k] - probabilities.p_offsets[j][k])
|
||||
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, len(probabilities.p_offsets[j])):
|
||||
p_offset_next = 0.9 * probabilities.p_offsets[j][k] + 0.1 * p_offsets_next[j][k]
|
||||
# if p_offset_next <= 0.05:
|
||||
# p_offset_next = 0.0
|
||||
# elif p_offset_next >= 0.95:
|
||||
# p_offset_next = 1.0
|
||||
probabilities.p_offsets[j][k] = p_offset_next
|
||||
|
||||
return inertia
|
||||
|
||||
def create_candidate(probabilities, candidate):
|
||||
candidate.offsets.fill(0)
|
||||
for i in range(0, probabilities.node_count):
|
||||
max_value = -1
|
||||
max_index = -1
|
||||
for j in range(0, len(probabilities.p_offsets[i])):
|
||||
value = random.random() + probabilities.p_offsets[i][j]
|
||||
if value > max_value:
|
||||
max_value = value
|
||||
max_index = j
|
||||
# candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0
|
||||
candidate.offsets[i][max_index] = 1
|
||||
|
||||
def copy_candidate(src, dest):
|
||||
for i in range(0, src.node_count):
|
||||
for j in range(0, len(src.offsets[i])):
|
||||
dest.offsets[i][j] = src.offsets[i][j]
|
||||
|
||||
def p(x):
|
||||
return math.ceil(x * 100) / 100
|
||||
|
||||
def p_a(x):
|
||||
return [p(z) for z in x]
|
||||
|
||||
def print_probabilities(probabilities):
|
||||
print('=====================')
|
||||
for i in range(0, probabilities.node_count):
|
||||
print(i, p_a(probabilities.p_offsets[i]))
|
||||
print('=====================')
|
||||
|
||||
def candidate_str(candidate):
|
||||
build_str = ''
|
||||
for i in range(0, candidate.node_count):
|
||||
for j in range(0, len(candidate.offsets[i])):
|
||||
build_str += str(candidate.offsets[i][j])
|
||||
return build_str
|
||||
|
||||
def main():
|
||||
global N, M
|
||||
sample_size = 64
|
||||
num_candidates = 100
|
||||
num_survivors = 1
|
||||
uplift_sample_size = 100
|
||||
output_xor = np.zeros(sample_size,)
|
||||
scratch = np.zeros((N,))
|
||||
int_scratch = np.zeros((N,)).astype(np.int32)
|
||||
g = test_fn
|
||||
layers = []
|
||||
augment_layers = []
|
||||
layer = 1
|
||||
(inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, int_scratch)
|
||||
distances = np.zeros((sample_size, sample_size))
|
||||
populate_distances(inputs, distances, scratch)
|
||||
outputs = np.zeros((num_candidates + num_survivors, sample_size,)).astype(np.int32)
|
||||
scores = np.zeros((num_candidates + num_survivors,))
|
||||
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
|
||||
while score < 1:
|
||||
probabilities = Probabilities(layer, len(augment_layers))
|
||||
candidates = [Candidate(layer, len(augment_layers)) for _ in range(0, num_candidates + num_survivors)]
|
||||
augmented_int_scratch = np.zeros((N + 1 + len(augment_layers),)).astype(np.int32)
|
||||
(inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, augmented_int_scratch)
|
||||
populate_distances(inputs, distances, scratch)
|
||||
|
||||
inertia = 1
|
||||
epoch = 1
|
||||
while inertia > 0.001 and epoch < 1000 and not probabilities.has_converged():
|
||||
base_score = compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, augmented_inputs, outputs, output_xor, expected_outputs, sample_size, augmented_int_scratch)
|
||||
round_inertia = update_probabilities(probabilities, candidates, augmented_inputs, base_score, scores, 1 + 0.01 * epoch)
|
||||
inertia = 0.9 * inertia + 0.1 * round_inertia
|
||||
|
||||
print_probabilities(probabilities)
|
||||
for candidate in layers:
|
||||
print(candidate.offsets)
|
||||
max_score = np.max(scores)
|
||||
print(base_score, max_score,round_inertia, inertia)
|
||||
|
||||
top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
src_index = top_n[i]
|
||||
dest_index = num_candidates + i
|
||||
if src_index == dest_index:
|
||||
continue
|
||||
src = candidates[src_index]
|
||||
dest = candidates[dest_index]
|
||||
candidates[dest_index] = src
|
||||
candidates[src_index] = dest
|
||||
|
||||
(inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, augmented_int_scratch)
|
||||
populate_distances(inputs, distances, scratch)
|
||||
epoch += 1
|
||||
|
||||
candidate = probabilities.flatten()
|
||||
print(candidate.offsets)
|
||||
for j in range(0, sample_size):
|
||||
outputs[0][j] = evaluate(layers, candidate, augmented_inputs[j], augmented_int_scratch)
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
|
||||
average_base_score = 0
|
||||
average_score = 0
|
||||
for i in range(0, uplift_sample_size):
|
||||
(inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, augmented_int_scratch)
|
||||
populate_distances(inputs, distances, scratch)
|
||||
(base_score, score) = compute_uplift(candidate, layers, distances, augmented_inputs, outputs, output_xor, expected_outputs, sample_size, augmented_int_scratch)
|
||||
average_base_score += base_score
|
||||
average_score += score
|
||||
average_base_score /= uplift_sample_size
|
||||
average_score /= uplift_sample_size
|
||||
uplift = average_score - average_base_score
|
||||
print(uplift)
|
||||
|
||||
if uplift <= 0:
|
||||
layer += 1
|
||||
# augment_layers = layers[1:]
|
||||
continue
|
||||
|
||||
layers.append(candidate)
|
||||
# if layer == 1:
|
||||
# layer += 1
|
||||
|
||||
for candidate in layers:
|
||||
print(candidate.offsets)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
535
mutations11.py
Normal file
535
mutations11.py
Normal file
@ -0,0 +1,535 @@
|
||||
from enum import unique
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
import time
|
||||
|
||||
N = 8
|
||||
N_ACTUAL = 2 * ((N - 1) + 8)
|
||||
M = 2
|
||||
|
||||
def vec_to_int(x):
|
||||
z = 0
|
||||
for i in range(0, len(x)):
|
||||
z <<= 1
|
||||
z |= x[i]
|
||||
return z
|
||||
|
||||
def timeit(f):
|
||||
def timed(*args, **kw):
|
||||
ts = time.time()
|
||||
result = f(*args, **kw)
|
||||
te = time.time()
|
||||
|
||||
print('func:%r took: %2.4f sec' % (f.__name__, te-ts))
|
||||
return result
|
||||
return timed
|
||||
|
||||
class Candidate:
|
||||
def __init__(self, layer):
|
||||
global N_ACTUAL
|
||||
self.layer = layer
|
||||
self.offsets = np.zeros((N_ACTUAL)).astype(np.int32)
|
||||
|
||||
class Probabilities:
|
||||
def __init__(self, layer):
|
||||
global N_ACTUAL
|
||||
self.layer = layer
|
||||
self.p_offsets = np.zeros((N_ACTUAL))
|
||||
self.p_offsets.fill(0.5)
|
||||
self.p_offsets_next = np.zeros((N_ACTUAL))
|
||||
self.offset_coherences = np.zeros((N_ACTUAL))
|
||||
self.offset_coherences.fill(-1)
|
||||
self.knowns = set()
|
||||
|
||||
def snap(self):
|
||||
reset = False
|
||||
for j in range(0, len(self.p_offsets)):
|
||||
if self.p_offsets[j] > 0.6 and self.p_offsets[j] < 0.95:
|
||||
self.p_offsets[j] = 1.0
|
||||
self.knowns.add(j)
|
||||
flip = j ^ 0b1
|
||||
self.p_offsets[flip] = 0.0
|
||||
reset = True
|
||||
break
|
||||
elif self.p_offsets[j] < 0.05:
|
||||
self.p_offsets[j] = 0.0
|
||||
if reset:
|
||||
for j in range(0, len(self.p_offsets)):
|
||||
flip = j ^ 0b1
|
||||
if self.p_offsets[j] < 0.95 and self.p_offsets[flip] < 0.95:
|
||||
self.p_offsets[j] = 0.5
|
||||
|
||||
def eliminate_random_known(self):
|
||||
if len(self.knowns) == 0:
|
||||
return False
|
||||
index = random.sample(self.knowns, 1)[0]
|
||||
self.knowns.remove(index)
|
||||
return True
|
||||
|
||||
def reset(self):
|
||||
self.p_offsets.fill(0.5)
|
||||
for index in self.knowns:
|
||||
flip = index ^ 0b1
|
||||
self.p_offsets[index] = 1.0
|
||||
self.p_offsets[flip] = 0.0
|
||||
|
||||
def all_zeros(self):
|
||||
for j in range(0, len(self.p_offsets)):
|
||||
if self.p_offsets[j] > 0.05 and self.p_offsets[j] < 0.95:
|
||||
return False
|
||||
return True
|
||||
|
||||
def has_converged(self):
|
||||
if self.all_zeros():
|
||||
return True
|
||||
|
||||
top_n = sorted(range(len(self.p_offsets)), key=lambda i: self.p_offsets[i])[-self.layer:]
|
||||
for i in top_n:
|
||||
if self.p_offsets[i] < 0.95:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def flatten(self):
|
||||
candidate = Candidate(self.layer)
|
||||
top_n = sorted(range(len(self.p_offsets)), key=lambda i: self.p_offsets[i])[-self.layer:]
|
||||
for i in top_n:
|
||||
if self.p_offsets[i] < 0.95:
|
||||
return None
|
||||
candidate.offsets[i] = 1
|
||||
|
||||
return candidate
|
||||
|
||||
def clamp(x, min_value = 0.01, max_value = 1):
|
||||
return min(max(x, min_value), max_value)
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
# 00100111 x4
|
||||
# 00000110 x1
|
||||
def sha(v):
|
||||
global M
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] % M
|
||||
|
||||
def sha_byte(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result
|
||||
|
||||
def xor(x):
|
||||
num_one_bits = 0
|
||||
for i in range(0, len(x)):
|
||||
if i == 0:
|
||||
continue
|
||||
num_one_bits += x[i]
|
||||
return num_one_bits % 2
|
||||
|
||||
|
||||
# 0 ^ 1 ^ (2 ^ (4 * (5 ^ 0 * 7))) * (3 ^ 6 * 7)
|
||||
# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * (5 ^ 0 * 7)) ^ 4 * 6 * 7 * (5 ^ 0 * 7)
|
||||
# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * 5 ^ 0 * 3 * 4 * 7 ^ 4 * 5 * 6 * 7 ^ 0 * 4 * 6 * 7
|
||||
|
||||
# 0 ^ 1 ^ 2*3 ^ 2*6*7 ^ 3*4*5 ^ 0*3*4*7 ^ 4*5*6*7 ^ 0*4*6*7
|
||||
# What about strictly SOP?
|
||||
# That is, 1-Hot of increasing complexity?
|
||||
# How would that work?
|
||||
# Candidate generation could apply some kind of softmax to filter down to one
|
||||
#
|
||||
def test_fn(x):
|
||||
# 0 1
|
||||
# 2 | 3
|
||||
# 4 | 5 | 6 | 7
|
||||
# | | 0 | 7 | | | |
|
||||
return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7])))
|
||||
|
||||
def candidate_fn(x):
|
||||
return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2])
|
||||
|
||||
def true_fn(x):
|
||||
return x[0] ^ x[1] ^ (x[3] * x[2])
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def coherence(outputs, distances):
|
||||
coherences = []
|
||||
for i in range(0, len(outputs)):
|
||||
y_a = outputs[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(outputs)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = outputs[j]
|
||||
weight = distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def random_sample(m, inputs, augmented_inputs, outputs):
|
||||
global N, N_ACTUAL
|
||||
for i in range(0, m):
|
||||
for j in range(0, N):
|
||||
val = random.randint(0, 1)
|
||||
inputs[i][j] = val
|
||||
if j > 0:
|
||||
augmented_inputs[i][(j - 1) * 2] = val
|
||||
augmented_inputs[i][(j - 1) * 2 + 1] = 1 - val
|
||||
# augmented_inputs[i][j * 2] = val
|
||||
# augmented_inputs[i][j * 2 + 1] = 1 - val
|
||||
output = sha_byte(inputs[i])
|
||||
outputs[i] = inputs[i][0]
|
||||
for k in range(0, 1):
|
||||
output_byte = output[k]
|
||||
for j in range(0, 8):
|
||||
val = (output_byte >> j) & 0b1;
|
||||
inputs[i][k * 8 + j] = val
|
||||
augmented_inputs[i][(N - 1 + k * 8 + j) * 2] = val
|
||||
augmented_inputs[i][(N - 1 + k * 8 + j) * 2 + 1] = 1 - val
|
||||
# outputs[i] = g(inputs[i])
|
||||
return (inputs, augmented_inputs, outputs)
|
||||
|
||||
def populate_distances(inputs, distances, scratch):
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, scratch)
|
||||
distances[i][j] = 1.0 / (2 ** distance)
|
||||
|
||||
def evaluate(layers, candidate, x, compute_scratch):
|
||||
z = evaluate_layers(layers, x, compute_scratch)
|
||||
z ^= evaluate_candidate(candidate, x, compute_scratch)
|
||||
return z
|
||||
|
||||
def evaluate_layers(layers, x, compute_scratch):
|
||||
z = 0
|
||||
for layer in layers:
|
||||
z ^= evaluate_candidate(layer, x, compute_scratch)
|
||||
return z
|
||||
|
||||
def evaluate_candidate(candidate, x, compute_scratch):
|
||||
compute_scratch.fill(0)
|
||||
compute_scratch[0:len(candidate.offsets)] = candidate.offsets
|
||||
np.multiply(compute_scratch, x, compute_scratch)
|
||||
return 1 if np.sum(compute_scratch) - np.sum(candidate.offsets) == 0 else 0
|
||||
|
||||
def layer_str(layer):
|
||||
parts = []
|
||||
for i in range(0, len(layer.offsets)):
|
||||
if layer.offsets[i] == 1:
|
||||
parts.append('x[' + str(i) + ']')
|
||||
return '*'.join(parts)
|
||||
|
||||
def cache_layers(layers):
|
||||
expr = 'def f(x):\n\tresult=0\n'
|
||||
for i in range(0, len(layers)):
|
||||
layer = layers[i]
|
||||
expr += '\tresult^=' + layer_str(layer) + '\n'
|
||||
expr += '\treturn result\n'
|
||||
scope = {}
|
||||
exec(expr, scope)
|
||||
return scope['f']
|
||||
|
||||
@timeit
|
||||
def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch, cached_f):
|
||||
global M
|
||||
|
||||
for i in range(0, sample_size):
|
||||
outputs[0][i] = cached_f(inputs[i])
|
||||
# outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch)
|
||||
# check = cached_f(inputs[i])
|
||||
# if check != outputs[0][i]:
|
||||
# raise Exception('Mistake')
|
||||
for j in range(1, num_candidates):
|
||||
np.copyto(outputs[j], outputs[0])
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
base_score = coherence(output_xor, distances)
|
||||
|
||||
scores.fill(0)
|
||||
unique_candidates = {}
|
||||
for j in range(0, num_candidates):
|
||||
create_candidate(probabilities, candidates[j])
|
||||
unique_candidates[candidate_str(candidates[j])] = j
|
||||
|
||||
for i in range(0, sample_size):
|
||||
for _, j in unique_candidates.items():
|
||||
candidate = candidates[j]
|
||||
outputs[j][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
|
||||
for _, j in unique_candidates.items():
|
||||
candidate = candidates[j]
|
||||
np.subtract(outputs[j], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
scores[j] = score
|
||||
# for j in range(0, num_candidates):
|
||||
# candidate = candidates[j]
|
||||
# create_candidate(probabilities, candidate)
|
||||
|
||||
# for i in range(0, sample_size):
|
||||
# for j in range(0, num_candidates):
|
||||
# candidate = candidates[j]
|
||||
# outputs[j][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
|
||||
|
||||
# for j in range(0, num_candidates):
|
||||
# candidate = candidates[j]
|
||||
# np.subtract(outputs[j], expected_outputs, output_xor)
|
||||
# np.mod(output_xor, M, output_xor)
|
||||
# score = coherence(output_xor, distances)
|
||||
# scores[j] = score
|
||||
|
||||
return base_score
|
||||
|
||||
|
||||
def compute_uplift(candidate, layers, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch):
|
||||
global M
|
||||
|
||||
for i in range(0, sample_size):
|
||||
outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch)
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
base_score = coherence(output_xor, distances)
|
||||
|
||||
for i in range(0, sample_size):
|
||||
outputs[0][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
|
||||
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
return (base_score, score)
|
||||
|
||||
@timeit
|
||||
def update_probabilities(probabilities, candidates, inputs, base_score, scores, scale):
|
||||
num_candidates = len(candidates)
|
||||
|
||||
probabilities.offset_coherences.fill(-1)
|
||||
for p in range(0, num_candidates):
|
||||
score = scores[p]
|
||||
if score == 0:
|
||||
continue
|
||||
candidate = candidates[p]
|
||||
|
||||
for j in range(0, len(candidate.offsets)):
|
||||
if candidate.offsets[j] == 0:
|
||||
continue
|
||||
probabilities.offset_coherences[j] = max(score, probabilities.offset_coherences[j])
|
||||
|
||||
inertia = 0
|
||||
for j in range(0, len(probabilities.p_offsets_next)):
|
||||
p = probabilities.offset_coherences[j]
|
||||
delta = p - base_score if p >= 0 else 0
|
||||
probabilities.p_offsets_next[j] = clamp(probabilities.p_offsets[j] + delta, 0, 1)
|
||||
inertia += abs(probabilities.p_offsets_next[j] - probabilities.p_offsets[j])
|
||||
|
||||
for j in range(0, len(probabilities.p_offsets_next)):
|
||||
p_offset_next = 0.9 * probabilities.p_offsets[j] + 0.1 * probabilities.p_offsets_next[j]
|
||||
# if p_offset_next <= 0.05:
|
||||
# p_offset_next = 0.0
|
||||
# elif p_offset_next >= 0.95:
|
||||
# p_offset_next = 1.0
|
||||
probabilities.p_offsets[j] = p_offset_next
|
||||
# total = np.sum(probabilities.p_offsets[j])
|
||||
# probabilities.p_offsets[j] *= 1.0 / total
|
||||
|
||||
probabilities.snap()
|
||||
|
||||
return inertia
|
||||
|
||||
def create_candidate(probabilities, candidate):
|
||||
candidate.offsets.fill(0)
|
||||
scores = np.empty_like(candidate.offsets).astype(np.float32)
|
||||
for j in range(0, len(probabilities.p_offsets)):
|
||||
if probabilities.p_offsets[j] == 1.0:
|
||||
scores[j] = 1000
|
||||
elif probabilities.p_offsets[j] == 0.0:
|
||||
scores[j] = -1000
|
||||
else:
|
||||
scores[j] = random.random() + probabilities.p_offsets[j]
|
||||
top = sorted(range(len(scores)), key=lambda i: scores[i], reverse = True)
|
||||
picked = set()
|
||||
for i in top:
|
||||
flip = i ^ 0b1
|
||||
if flip in picked:
|
||||
continue
|
||||
candidate.offsets[i] = 1
|
||||
picked.add(i)
|
||||
if len(picked) == candidate.layer:
|
||||
return
|
||||
|
||||
def copy_candidate(src, dest):
|
||||
for j in range(0, len(src.offsets)):
|
||||
dest.offsets[j] = src.offsets[j]
|
||||
|
||||
def p(x):
|
||||
return math.ceil(x * 100) / 100
|
||||
|
||||
def p_a(x):
|
||||
return [p(z) for z in x]
|
||||
|
||||
def print_probabilities(probabilities):
|
||||
print('=====================')
|
||||
print(p_a(probabilities.p_offsets))
|
||||
print('=====================')
|
||||
|
||||
def candidate_str(candidate):
|
||||
build_str = ''
|
||||
for j in range(0, len(candidate.offsets)):
|
||||
build_str += str(candidate.offsets[j])
|
||||
return build_str
|
||||
|
||||
def main():
|
||||
global N, N_ACTUAL, M
|
||||
sample_size = 64
|
||||
num_candidates = 100
|
||||
num_survivors = 1
|
||||
uplift_sample_size = 128
|
||||
output_xor = np.zeros(sample_size,)
|
||||
scratch = np.zeros((N,))
|
||||
int_scratch = np.zeros((N,)).astype(np.int32)
|
||||
g = sha
|
||||
layers = []
|
||||
unique_layers = set()
|
||||
augment_layers = []
|
||||
layer = 1
|
||||
inputs = np.zeros((sample_size, N)).astype(np.int32)
|
||||
augmented_inputs = np.zeros((sample_size, N_ACTUAL)).astype(np.int32)
|
||||
expected_outputs = np.zeros((sample_size,)).astype(np.int32)
|
||||
random_sample(sample_size, inputs, augmented_inputs, expected_outputs)
|
||||
distances = np.zeros((sample_size, sample_size))
|
||||
populate_distances(inputs, distances, scratch)
|
||||
outputs = np.zeros((num_candidates + num_survivors, sample_size,)).astype(np.int32)
|
||||
scores = np.zeros((num_candidates + num_survivors,))
|
||||
cached_f = cache_layers(layers)
|
||||
probabilities = Probabilities(1)
|
||||
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
|
||||
with open('model.txt', 'w') as f:
|
||||
while score < 1:
|
||||
probabilities.layer = layer
|
||||
candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)]
|
||||
augmented_int_scratch = np.zeros((N_ACTUAL,)).astype(np.int32)
|
||||
random_sample(sample_size, inputs, augmented_inputs, expected_outputs)
|
||||
populate_distances(inputs, distances, scratch)
|
||||
|
||||
inertia = 1
|
||||
epoch = 1
|
||||
while inertia > 0.001 and epoch < 2000 and not probabilities.has_converged():
|
||||
base_score = compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, augmented_inputs, outputs, output_xor, expected_outputs, sample_size, augmented_int_scratch, cached_f)
|
||||
round_inertia = update_probabilities(probabilities, candidates, augmented_inputs, base_score, scores, 1 + 0.01 * epoch)
|
||||
inertia = 0.9 * inertia + 0.1 * round_inertia
|
||||
|
||||
print_probabilities(probabilities)
|
||||
# for candidate in layers:
|
||||
# print(candidate.offsets)
|
||||
max_score = np.max(scores)
|
||||
print(base_score, max_score,round_inertia, inertia)
|
||||
|
||||
top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
src_index = top_n[i]
|
||||
dest_index = num_candidates + i
|
||||
if src_index == dest_index:
|
||||
continue
|
||||
src = candidates[src_index]
|
||||
dest = candidates[dest_index]
|
||||
candidates[dest_index] = src
|
||||
candidates[src_index] = dest
|
||||
|
||||
random_sample(sample_size, inputs, augmented_inputs, expected_outputs)
|
||||
populate_distances(inputs, distances, scratch)
|
||||
epoch += 1
|
||||
|
||||
candidate = probabilities.flatten()
|
||||
# uplift = -1
|
||||
# if not candidate is None:
|
||||
# print(candidate.offsets)
|
||||
# for j in range(0, sample_size):
|
||||
# outputs[0][j] = evaluate(layers, candidate, augmented_inputs[j], augmented_int_scratch)
|
||||
# np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
# np.mod(output_xor, M, output_xor)
|
||||
# score = coherence(output_xor, distances)
|
||||
|
||||
# average_base_score = 0
|
||||
# average_score = 0
|
||||
# for i in range(0, uplift_sample_size):
|
||||
# (inputs, augmented_inputs, expected_outputs) = random_sample(sample_size, N, augment_layers, g, augmented_int_scratch)
|
||||
# populate_distances(inputs, distances, scratch)
|
||||
# (base_score, score) = compute_uplift(candidate, layers, distances, augmented_inputs, outputs, output_xor, expected_outputs, sample_size, augmented_int_scratch)
|
||||
# average_base_score += base_score
|
||||
# average_score += score
|
||||
# average_base_score /= uplift_sample_size
|
||||
# average_score /= uplift_sample_size
|
||||
# uplift = average_score - average_base_score
|
||||
# print(uplift)
|
||||
|
||||
# if uplift <= 0:
|
||||
# layer += 1
|
||||
# # augment_layers = layers[1:]
|
||||
# continue
|
||||
if candidate is None:
|
||||
if probabilities.eliminate_random_known():
|
||||
probabilities.reset()
|
||||
continue
|
||||
layer += 1
|
||||
continue
|
||||
|
||||
layer_id = candidate_str(candidate)
|
||||
if layer_id in unique_layers:
|
||||
if probabilities.eliminate_random_known():
|
||||
if probabilities.eliminate_random_known():
|
||||
probabilities.reset()
|
||||
continue
|
||||
layer += 1
|
||||
continue
|
||||
|
||||
unique_layers.add(layer_id)
|
||||
layers.append(candidate)
|
||||
cached_f = cache_layers(layers)
|
||||
probabilities.eliminate_random_known()
|
||||
probabilities.reset()
|
||||
|
||||
for i in range(0, len(candidate.offsets)):
|
||||
if candidate.offsets[i] == 1:
|
||||
f.write(str(i))
|
||||
f.write(' ')
|
||||
f.write('\n')
|
||||
f.flush()
|
||||
|
||||
# if layer == 1:
|
||||
# layer += 1
|
||||
|
||||
for candidate in layers:
|
||||
print(candidate.offsets)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
391
mutations12.py
Normal file
391
mutations12.py
Normal file
@ -0,0 +1,391 @@
|
||||
import bisect
|
||||
from email.mime import base
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def index_hash(indices):
|
||||
return ','.join([str(index) for index in sorted(indices)])
|
||||
|
||||
class Candidate():
|
||||
def __init__(self, indices):
|
||||
self.indices = indices[:]
|
||||
|
||||
def evaluate(self, x):
|
||||
if len(x) in self.indices:
|
||||
return 0
|
||||
value = 1
|
||||
for index in self.indices:
|
||||
value *= x[index]
|
||||
return value
|
||||
|
||||
def id(self):
|
||||
return index_hash(self.indices)
|
||||
|
||||
def eval_str(self):
|
||||
parts = []
|
||||
for index in self.indices:
|
||||
parts.append('x[' + str(index) + ']')
|
||||
return '*'.join(parts)
|
||||
|
||||
class Probabilities():
|
||||
def __init__(self):
|
||||
self.N = 8
|
||||
self.actual_N = self.N * 2
|
||||
self.num_terms = 1
|
||||
self.num_candidates = 100
|
||||
self.sample_size = 64
|
||||
self.p = np.zeros((self.actual_N + 1,))
|
||||
self.p_temp = np.empty_like(self.p)
|
||||
self.next_p = np.empty_like(self.p)
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
self.epoch = 0
|
||||
|
||||
self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
|
||||
self.distances = np.zeros((self.sample_size, self.sample_size))
|
||||
self.xor_square = np.zeros((self.sample_size, self.sample_size))
|
||||
self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.max_coherences = np.zeros((self.actual_N + 1))
|
||||
self.max_candidates = [None for _ in range(0, self.actual_N)]
|
||||
|
||||
self.layers = []
|
||||
self.base = None
|
||||
self.rings = []
|
||||
|
||||
self.scratch = np.zeros((self.actual_N,))
|
||||
|
||||
self.last_value = -1
|
||||
self.rounds = 0
|
||||
self.average_delta_over_null = 0
|
||||
|
||||
def randomize_inputs(self):
|
||||
for i in range(0, self.sample_size):
|
||||
for j in range(0, self.N):
|
||||
val = random.randint(0, 1)
|
||||
self.inputs[i][j * 2] = val
|
||||
self.inputs[i][j * 2 + 1] = val ^ 1
|
||||
|
||||
def populate_distances(self):
|
||||
for i in range(0, len(self.inputs)):
|
||||
x_a = self.inputs[i]
|
||||
for j in range(0, len(self.inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = self.inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, self.scratch)
|
||||
self.distances[i][j] = 1.0 / (2 ** distance)
|
||||
|
||||
def compute_rings(self):
|
||||
self.rings = []
|
||||
for i in range(0, len(self.inputs)):
|
||||
x_a = self.inputs[i]
|
||||
min_distance = self.actual_N
|
||||
indices = []
|
||||
for j in range(0, len(self.inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = self.inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, self.scratch)
|
||||
if distance < min_distance:
|
||||
min_distance = distance
|
||||
indices = [j]
|
||||
elif distance == min_distance:
|
||||
indices.append(j)
|
||||
self.rings.append(indices)
|
||||
|
||||
def compute_expected_outputs(self):
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.expected_outputs[i] = sha(self.inputs[i])
|
||||
|
||||
def compute_base_outputs(self):
|
||||
if self.base is None:
|
||||
self.base_outputs.fill(0)
|
||||
return
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.base_outputs[i] = self.base(self.inputs[i])
|
||||
|
||||
def coherence(self, outputs=None):
|
||||
if outputs is None:
|
||||
outputs = self.outputs
|
||||
np.logical_xor(outputs, self.expected_outputs, self.output_xor)
|
||||
coherences = []
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(self.output_xor)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = self.output_xor[j]
|
||||
weight = self.distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def ring_coherence(self, outputs=None):
|
||||
if outputs is None:
|
||||
outputs = self.outputs
|
||||
np.logical_xor(outputs, self.expected_outputs, self.output_xor)
|
||||
total = 0
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
indices = self.rings[i]
|
||||
coherence = sum([1 if self.output_xor[j] == y_a else 0 for j in indices]) / len(indices)
|
||||
total += coherence
|
||||
return total / len(self.output_xor)
|
||||
|
||||
def normalize_p(self):
|
||||
check = self.knowns[:]
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] < 0:
|
||||
self.p[i] = 0
|
||||
for i in range(0, len(self.p)):
|
||||
if i in self.knowns:
|
||||
flip = i ^ 0b1
|
||||
self.p[i] = 0.0
|
||||
self.p[flip] = 0.0
|
||||
else:
|
||||
check.append(i)
|
||||
stop_id = index_hash(check)
|
||||
check.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p[i] = 0.0
|
||||
total = np.sum(self.p)
|
||||
if total > 0:
|
||||
for i in range(0, len(self.p)):
|
||||
self.p[i] = self.p[i] / total
|
||||
|
||||
def reset_p(self):
|
||||
self.p.fill(1.0)
|
||||
self.normalize_p()
|
||||
|
||||
def threshold(self):
|
||||
# return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
|
||||
return 1.0 - (self.epoch / 100)
|
||||
|
||||
def get_converged_index(self):
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] > self.threshold():
|
||||
return i
|
||||
return None
|
||||
|
||||
def add_layer(self):
|
||||
self.add_stop()
|
||||
layer = Candidate(self.knowns)
|
||||
self.layers.append(layer)
|
||||
self.base = self.cache_layers()
|
||||
self.knowns.pop()
|
||||
self.reset_p()
|
||||
|
||||
def random_sample(self):
|
||||
self.randomize_inputs()
|
||||
self.populate_distances()
|
||||
# self.compute_rings()
|
||||
self.compute_expected_outputs()
|
||||
self.compute_base_outputs()
|
||||
return self.coherence(self.base_outputs)
|
||||
# return self.ring_coherence(self.base_outputs)
|
||||
|
||||
def random_candidate(self):
|
||||
indices = self.knowns[:]
|
||||
np.copyto(self.p_temp, self.p)
|
||||
self.p_temp[self.actual_N] = 0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
for _ in range(0, self.num_terms - len(self.knowns)):
|
||||
index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
|
||||
indices.append(index)
|
||||
flip = index ^ 0b1
|
||||
self.p_temp[index] = 0
|
||||
self.p_temp[flip] = 0
|
||||
for i in range(0, len(self.p_temp)):
|
||||
if i not in indices:
|
||||
indices.append(i)
|
||||
stop_id = index_hash(indices)
|
||||
indices.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p_temp[i] = 0.0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
return Candidate(indices)
|
||||
|
||||
def add_stop(self):
|
||||
stop_id = index_hash(self.knowns)
|
||||
self.stops.add(stop_id)
|
||||
|
||||
def update(self):
|
||||
self.epoch += 1
|
||||
base_coherence = self.random_sample()
|
||||
self.max_coherences.fill(0)
|
||||
for i in range(0, self.actual_N):
|
||||
self.max_candidates[i] = None
|
||||
visited = set()
|
||||
has_candidate = False
|
||||
# np.copyto(self.next_p, self.p)
|
||||
for _ in range(0, self.num_candidates):
|
||||
candidate = self.random_candidate()
|
||||
if candidate is None:
|
||||
continue
|
||||
candidate_id = candidate.id()
|
||||
if candidate_id in visited:
|
||||
continue
|
||||
visited.add(candidate_id)
|
||||
if self.actual_N in candidate.indices:
|
||||
continue
|
||||
has_candidate = True
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
|
||||
# coherence = self.ring_coherence()
|
||||
coherence = self.coherence()
|
||||
# if coherence <= base_coherence:
|
||||
# continue
|
||||
# for index in candidate.indices:
|
||||
# self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
|
||||
# self.p_temp[index] += 0
|
||||
for index in candidate.indices:
|
||||
if coherence > self.max_coherences[index]:
|
||||
self.max_coherences[index] = coherence
|
||||
self.max_candidates[index] = candidate
|
||||
# self.max_coherences[index] = max(self.max_coherences[index], coherence)
|
||||
# np.copyto(self.p, self.next_p)
|
||||
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
for i in range(0, self.actual_N):
|
||||
candidate = self.max_candidates[i]
|
||||
if candidate is None:
|
||||
continue
|
||||
for index in candidate.indices:
|
||||
self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
|
||||
# print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
|
||||
self.normalize_p()
|
||||
# print(self.p)
|
||||
|
||||
# np.subtract(self.p_temp, self.p, self.p_temp)
|
||||
# np.abs(self.p_temp, self.p_temp)
|
||||
# delta = np.sum(self.p_temp) / len(self.p_temp)
|
||||
# print(delta, np.argmax(self.p))
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
# for i in range(0, len(self.p_temp)):
|
||||
# self.p_temp[i] = round(self.p_temp[i] * 100) / 100
|
||||
# print(self.p_temp)
|
||||
|
||||
index = np.argmax(self.p)
|
||||
delta_over_null = self.p[index] - self.p[self.actual_N]
|
||||
if self.epoch == 0:
|
||||
self.average_delta_over_null = delta_over_null
|
||||
else:
|
||||
self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
|
||||
diff = self.num_terms - len(self.knowns)
|
||||
|
||||
print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
|
||||
|
||||
# Always iterate for a minimum number of epochs
|
||||
if self.epoch < 15:
|
||||
return
|
||||
if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
|
||||
return
|
||||
if self.average_delta_over_null < 0.001:
|
||||
index = self.actual_N
|
||||
else:
|
||||
index = np.argmax(self.p)
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
# if index == self.last_value:
|
||||
# self.rounds += 1
|
||||
# else:
|
||||
# self.rounds = 0
|
||||
# self.last_value = index
|
||||
|
||||
# if self.rounds < 10 and self.epoch < 100:
|
||||
# return
|
||||
|
||||
# if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
|
||||
# return
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
|
||||
# print(self.p)
|
||||
# print(self.threshold())
|
||||
# print(self.p)
|
||||
# index = self.get_converged_index()
|
||||
if not index is None or not has_candidate:
|
||||
# print(index, delta, np.argmax(self.p))
|
||||
self.epoch = 0
|
||||
if index == self.actual_N or not has_candidate:
|
||||
if len(self.knowns) > 0:
|
||||
self.add_stop()
|
||||
self.knowns.pop()
|
||||
print('Backtrack: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
return
|
||||
self.num_terms += 1
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
print(self.num_terms)
|
||||
return
|
||||
self.knowns.append(index)
|
||||
# bisect.insort(self.knowns, index)
|
||||
if len(self.knowns) == self.num_terms:
|
||||
print('Add layer: ' + str(self.knowns))
|
||||
self.add_layer()
|
||||
else:
|
||||
print('Found term: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
print(base_coherence)
|
||||
return
|
||||
|
||||
def cache_layers(self):
|
||||
expr = 'def f(x):\n\tresult=0\n'
|
||||
for layer in self.layers:
|
||||
expr += '\tresult^=' + layer.eval_str() + '\n'
|
||||
expr += '\treturn result\n'
|
||||
scope = {}
|
||||
exec(expr, scope)
|
||||
return scope['f']
|
||||
|
||||
def main():
|
||||
probabilities = Probabilities()
|
||||
while probabilities.num_terms <= probabilities.N:
|
||||
probabilities.update()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
447
mutations13.py
Normal file
447
mutations13.py
Normal file
@ -0,0 +1,447 @@
|
||||
import bisect
|
||||
from email.mime import base
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def index_hash(indices):
|
||||
return ','.join([str(index) for index in sorted(indices)])
|
||||
|
||||
class Candidate():
|
||||
def __init__(self, indices):
|
||||
self.indices = indices[:]
|
||||
self.uplift = 0
|
||||
|
||||
def evaluate(self, x):
|
||||
if len(x) in self.indices:
|
||||
return 0
|
||||
value = 1
|
||||
for index in self.indices:
|
||||
value *= x[index]
|
||||
return value
|
||||
|
||||
def id(self):
|
||||
return index_hash(self.indices)
|
||||
|
||||
def eval_str(self):
|
||||
parts = []
|
||||
for index in self.indices:
|
||||
parts.append('x[' + str(index) + ']')
|
||||
return '*'.join(parts)
|
||||
|
||||
class Probabilities():
|
||||
def __init__(self):
|
||||
self.N = 8
|
||||
self.actual_N = self.N * 2
|
||||
self.num_terms = 1
|
||||
self.num_candidates = 100
|
||||
self.sample_size = 64
|
||||
self.p = np.zeros((self.actual_N + 1,))
|
||||
self.p_temp = np.empty_like(self.p)
|
||||
self.next_p = np.empty_like(self.p)
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
self.epoch = 0
|
||||
|
||||
self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
|
||||
self.distances = np.zeros((self.sample_size, self.sample_size))
|
||||
self.xor_square = np.zeros((self.sample_size, self.sample_size))
|
||||
self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.max_coherences = np.zeros((self.actual_N + 1))
|
||||
self.max_candidates = [None for _ in range(0, self.actual_N)]
|
||||
|
||||
self.layers = []
|
||||
self.base = None
|
||||
self.rings = []
|
||||
|
||||
self.scratch = np.zeros((self.actual_N,))
|
||||
|
||||
self.last_value = -1
|
||||
self.rounds = 0
|
||||
self.average_delta_over_null = 0
|
||||
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
|
||||
def randomize_inputs(self):
|
||||
for i in range(0, self.sample_size):
|
||||
for j in range(0, self.N):
|
||||
val = random.randint(0, 1)
|
||||
self.inputs[i][j * 2] = val
|
||||
self.inputs[i][j * 2 + 1] = val ^ 1
|
||||
|
||||
def populate_distances(self):
|
||||
for i in range(0, len(self.inputs)):
|
||||
x_a = self.inputs[i]
|
||||
for j in range(0, len(self.inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = self.inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, self.scratch)
|
||||
self.distances[i][j] = 1.0 / (2 ** distance)
|
||||
|
||||
def compute_rings(self):
|
||||
self.rings = []
|
||||
for i in range(0, len(self.inputs)):
|
||||
x_a = self.inputs[i]
|
||||
min_distance = self.actual_N
|
||||
indices = []
|
||||
for j in range(0, len(self.inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = self.inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, self.scratch)
|
||||
if distance < min_distance:
|
||||
min_distance = distance
|
||||
indices = [j]
|
||||
elif distance == min_distance:
|
||||
indices.append(j)
|
||||
self.rings.append(indices)
|
||||
|
||||
def compute_expected_outputs(self):
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.expected_outputs[i] = sha(self.inputs[i])
|
||||
|
||||
def compute_base_outputs(self):
|
||||
if self.base is None:
|
||||
self.base_outputs.fill(0)
|
||||
return
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.base_outputs[i] = self.base(self.inputs[i])
|
||||
|
||||
def coherence(self, outputs=None):
|
||||
if outputs is None:
|
||||
outputs = self.outputs
|
||||
np.logical_xor(outputs, self.expected_outputs, self.output_xor)
|
||||
coherences = []
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(self.output_xor)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = self.output_xor[j]
|
||||
weight = self.distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def ring_coherence(self, outputs=None):
|
||||
if outputs is None:
|
||||
outputs = self.outputs
|
||||
np.logical_xor(outputs, self.expected_outputs, self.output_xor)
|
||||
total = 0
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
indices = self.rings[i]
|
||||
coherence = sum([1 if self.output_xor[j] == y_a else 0 for j in indices]) / len(indices)
|
||||
total += coherence
|
||||
return total / len(self.output_xor)
|
||||
|
||||
def normalize_p(self):
|
||||
check = self.knowns[:]
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] < 0:
|
||||
self.p[i] = 0
|
||||
for i in range(0, len(self.p)):
|
||||
if i in self.knowns:
|
||||
flip = i ^ 0b1
|
||||
self.p[i] = 0.0
|
||||
self.p[flip] = 0.0
|
||||
else:
|
||||
check.append(i)
|
||||
stop_id = index_hash(check)
|
||||
check.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p[i] = 0.0
|
||||
total = np.sum(self.p)
|
||||
if total > 0:
|
||||
for i in range(0, len(self.p)):
|
||||
self.p[i] = self.p[i] / total
|
||||
|
||||
def reset_p(self):
|
||||
self.p.fill(1.0)
|
||||
self.normalize_p()
|
||||
|
||||
def threshold(self):
|
||||
# return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
|
||||
return 1.0 - (self.epoch / 100)
|
||||
|
||||
def get_converged_index(self):
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] > self.threshold():
|
||||
return i
|
||||
return None
|
||||
|
||||
def add_layer(self):
|
||||
self.add_stop()
|
||||
layer = Candidate(self.knowns)
|
||||
self.layers.append(layer)
|
||||
self.base = self.cache_layers()
|
||||
self.knowns.pop()
|
||||
self.reset_p()
|
||||
|
||||
def random_sample(self):
|
||||
self.randomize_inputs()
|
||||
self.populate_distances()
|
||||
# self.compute_rings()
|
||||
self.compute_expected_outputs()
|
||||
self.compute_base_outputs()
|
||||
return self.coherence(self.base_outputs)
|
||||
# return self.ring_coherence(self.base_outputs)
|
||||
|
||||
def random_candidate(self):
|
||||
indices = self.knowns[:]
|
||||
np.copyto(self.p_temp, self.p)
|
||||
self.p_temp[self.actual_N] = 0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
for _ in range(0, self.num_terms - len(self.knowns)):
|
||||
index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
|
||||
indices.append(index)
|
||||
flip = index ^ 0b1
|
||||
self.p_temp[index] = 0
|
||||
self.p_temp[flip] = 0
|
||||
for i in range(0, len(self.p_temp)):
|
||||
if i not in indices:
|
||||
indices.append(i)
|
||||
stop_id = index_hash(indices)
|
||||
indices.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p_temp[i] = 0.0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
return Candidate(indices)
|
||||
|
||||
def seed_candidate_pool(self):
|
||||
for _ in range(0, self.num_candidates):
|
||||
candidate = self.random_candidate()
|
||||
if candidate is None:
|
||||
continue
|
||||
candidate_id = candidate.id()
|
||||
if candidate_id in self.candidate_ids:
|
||||
continue
|
||||
self.candidate_pool.append(candidate)
|
||||
self.candidate_ids.add(candidate_id)
|
||||
|
||||
def add_stop(self):
|
||||
stop_id = index_hash(self.knowns)
|
||||
self.stops.add(stop_id)
|
||||
|
||||
def update(self):
|
||||
self.epoch += 1
|
||||
base_coherence = self.random_sample()
|
||||
self.seed_candidate_pool()
|
||||
for candidate in self.candidate_pool:
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
|
||||
coherence = self.coherence()
|
||||
candidate.uplift += coherence - base_coherence
|
||||
self.candidate_pool.sort(key=lambda x: x.uplift, reverse=True)
|
||||
for drop_candidate in self.candidate_pool[self.num_candidates:]:
|
||||
self.candidate_ids.remove(drop_candidate.id())
|
||||
self.candidate_pool = self.candidate_pool[:self.num_candidates]
|
||||
# print('======')
|
||||
# print(self.epoch, base_coherence)
|
||||
# print('======')
|
||||
|
||||
# if len(self.candidate_pool) == 0:
|
||||
# print(self.p)
|
||||
|
||||
# for i in range(0, min(5, len(self.candidate_pool))):
|
||||
# candidate = self.candidate_pool[i]
|
||||
# print(candidate.id(), candidate.uplift)
|
||||
|
||||
# if self.epoch < 15:
|
||||
# return
|
||||
|
||||
if self.candidate_pool[0].uplift > 0.3:
|
||||
candidate = self.candidate_pool[0]
|
||||
candidate_id = candidate.id()
|
||||
self.candidate_ids.remove(candidate_id)
|
||||
print(candidate_id)
|
||||
self.knowns = candidate.indices
|
||||
self.add_layer()
|
||||
self.knowns = []
|
||||
self.reset_p()
|
||||
self.epoch = 0
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
|
||||
self.epoch = 0
|
||||
self.num_terms += 1
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
return
|
||||
|
||||
# np.copyto(self.next_p, self.p)
|
||||
for _ in range(0, self.num_candidates):
|
||||
candidate = self.random_candidate()
|
||||
if candidate is None:
|
||||
continue
|
||||
candidate_id = candidate.id()
|
||||
if candidate_id in visited:
|
||||
continue
|
||||
visited.add(candidate_id)
|
||||
if self.actual_N in candidate.indices:
|
||||
continue
|
||||
has_candidate = True
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
|
||||
# coherence = self.ring_coherence()
|
||||
coherence = self.coherence()
|
||||
# if coherence <= base_coherence:
|
||||
# continue
|
||||
# for index in candidate.indices:
|
||||
# self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
|
||||
# self.p_temp[index] += 0
|
||||
for index in candidate.indices:
|
||||
if coherence > self.max_coherences[index]:
|
||||
self.max_coherences[index] = coherence
|
||||
self.max_candidates[index] = candidate
|
||||
# self.max_coherences[index] = max(self.max_coherences[index], coherence)
|
||||
# np.copyto(self.p, self.next_p)
|
||||
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
for i in range(0, self.actual_N):
|
||||
candidate = self.max_candidates[i]
|
||||
if candidate is None:
|
||||
continue
|
||||
for index in candidate.indices:
|
||||
self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
|
||||
# print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
|
||||
self.normalize_p()
|
||||
# print(self.p)
|
||||
|
||||
# np.subtract(self.p_temp, self.p, self.p_temp)
|
||||
# np.abs(self.p_temp, self.p_temp)
|
||||
# delta = np.sum(self.p_temp) / len(self.p_temp)
|
||||
# print(delta, np.argmax(self.p))
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
# for i in range(0, len(self.p_temp)):
|
||||
# self.p_temp[i] = round(self.p_temp[i] * 100) / 100
|
||||
# print(self.p_temp)
|
||||
|
||||
index = np.argmax(self.p)
|
||||
delta_over_null = self.p[index] - self.p[self.actual_N]
|
||||
if self.epoch == 0:
|
||||
self.average_delta_over_null = delta_over_null
|
||||
else:
|
||||
self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
|
||||
diff = self.num_terms - len(self.knowns)
|
||||
|
||||
print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
|
||||
|
||||
# Always iterate for a minimum number of epochs
|
||||
if self.epoch < 15:
|
||||
return
|
||||
if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
|
||||
return
|
||||
if self.average_delta_over_null < 0.001:
|
||||
index = self.actual_N
|
||||
else:
|
||||
index = np.argmax(self.p)
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
# if index == self.last_value:
|
||||
# self.rounds += 1
|
||||
# else:
|
||||
# self.rounds = 0
|
||||
# self.last_value = index
|
||||
|
||||
# if self.rounds < 10 and self.epoch < 100:
|
||||
# return
|
||||
|
||||
# if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
|
||||
# return
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
|
||||
# print(self.p)
|
||||
# print(self.threshold())
|
||||
# print(self.p)
|
||||
# index = self.get_converged_index()
|
||||
if not index is None or not has_candidate:
|
||||
# print(index, delta, np.argmax(self.p))
|
||||
self.epoch = 0
|
||||
if index == self.actual_N or not has_candidate:
|
||||
if len(self.knowns) > 0:
|
||||
self.add_stop()
|
||||
self.knowns.pop()
|
||||
print('Backtrack: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
return
|
||||
self.num_terms += 1
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
print(self.num_terms)
|
||||
return
|
||||
self.knowns.append(index)
|
||||
# bisect.insort(self.knowns, index)
|
||||
if len(self.knowns) == self.num_terms:
|
||||
print('Add layer: ' + str(self.knowns))
|
||||
self.add_layer()
|
||||
else:
|
||||
print('Found term: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
print(base_coherence)
|
||||
return
|
||||
|
||||
def cache_layers(self):
|
||||
expr = 'def f(x):\n\tresult=0\n'
|
||||
for layer in self.layers:
|
||||
expr += '\tresult^=' + layer.eval_str() + '\n'
|
||||
expr += '\treturn result\n'
|
||||
scope = {}
|
||||
exec(expr, scope)
|
||||
return scope['f']
|
||||
|
||||
def main():
|
||||
probabilities = Probabilities()
|
||||
while probabilities.num_terms <= probabilities.N:
|
||||
probabilities.update()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
549
mutations14.py
Normal file
549
mutations14.py
Normal file
@ -0,0 +1,549 @@
|
||||
import bisect
|
||||
from email.mime import base
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
from pkg_resources import get_distribution
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def index_hash(indices):
|
||||
return ','.join([str(index) for index in sorted(indices)])
|
||||
|
||||
def bin_div(a, b):
|
||||
if a == 0 and b == 0:
|
||||
return 2
|
||||
if a == 1 and b == 0:
|
||||
return -1
|
||||
if a == 0 and b == 1:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
class Candidate():
|
||||
def __init__(self, indices):
|
||||
self.indices = indices[:]
|
||||
self.uplift = 0
|
||||
|
||||
def evaluate(self, x):
|
||||
if len(x) in self.indices:
|
||||
return 0
|
||||
value = 1
|
||||
for index in self.indices:
|
||||
value *= x[index]
|
||||
return value
|
||||
|
||||
def id(self):
|
||||
return index_hash(self.indices)
|
||||
|
||||
def eval_str(self):
|
||||
parts = []
|
||||
for index in self.indices:
|
||||
parts.append('x[' + str(index) + ']')
|
||||
return '*'.join(parts)
|
||||
|
||||
class Probabilities():
|
||||
def __init__(self):
|
||||
self.N = 16
|
||||
self.actual_N = self.N * 2
|
||||
self.num_terms = 1
|
||||
self.num_candidates = 100
|
||||
self.sample_size = 64
|
||||
self.p = np.zeros((self.actual_N + 1,))
|
||||
self.p_temp = np.empty_like(self.p)
|
||||
self.next_p = np.empty_like(self.p)
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
self.epoch = 0
|
||||
|
||||
self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
|
||||
self.masked_distances = np.zeros((self.sample_size, self.sample_size))
|
||||
self.distances = np.zeros((self.sample_size, self.sample_size))
|
||||
self.xor_square = np.zeros((self.sample_size, self.sample_size))
|
||||
self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.mask = np.zeros((self.sample_size))
|
||||
self.numerators = np.zeros((self.sample_size))
|
||||
self.denominators = np.zeros((self.sample_size))
|
||||
self.coherences = np.zeros((self.sample_size))
|
||||
self.max_coherences = np.zeros((self.actual_N + 1))
|
||||
self.max_candidates = [None for _ in range(0, self.actual_N)]
|
||||
self.uplifts = np.zeros((self.actual_N))
|
||||
self.subspace_uplifts = np.zeros((self.actual_N))
|
||||
|
||||
self.layers = []
|
||||
self.base = None
|
||||
|
||||
self.scratch = np.zeros((self.actual_N,))
|
||||
|
||||
self.last_value = -1
|
||||
self.rounds = 0
|
||||
self.average_delta_over_null = 0
|
||||
self.visited = set()
|
||||
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
|
||||
def randomize_inputs(self):
|
||||
for i in range(0, self.sample_size):
|
||||
for j in range(0, self.N):
|
||||
val = random.randint(0, 1)
|
||||
self.inputs[i][j * 2] = val
|
||||
self.inputs[i][j * 2 + 1] = val ^ 1
|
||||
|
||||
def populate_distances(self):
|
||||
for i in range(0, len(self.inputs)):
|
||||
x_a = self.inputs[i]
|
||||
for j in range(0, len(self.inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = self.inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, self.scratch)
|
||||
self.distances[i][j] = 1.0 / (2 ** distance)
|
||||
|
||||
def compute_expected_outputs(self):
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.expected_outputs[i] = sha(self.inputs[i])
|
||||
|
||||
def compute_base_outputs(self):
|
||||
if self.base is None:
|
||||
self.base_outputs.fill(0)
|
||||
return
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.base_outputs[i] = self.base(self.inputs[i])
|
||||
|
||||
def mat_coherence(self):
|
||||
np.abs(self.output_xor, self.mask)
|
||||
np.subtract(self.output_xor, self.mask, self.mask)
|
||||
np.divide(self.mask, 2.0, self.mask)
|
||||
np.add(1.0, self.mask, self.mask)
|
||||
self.xor_square.fill(0)
|
||||
np.copyto(self.masked_distances, self.distances)
|
||||
masked_distances_t = self.masked_distances.transpose()
|
||||
for i in range(0, len(self.xor_square)):
|
||||
self.xor_square[i] = self.output_xor
|
||||
np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i])
|
||||
np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i])
|
||||
np.sum(self.masked_distances, axis=0, out=self.denominators)
|
||||
self.xor_square = self.xor_square.transpose()
|
||||
np.logical_xor(self.xor_square, self.output_xor, self.xor_square)
|
||||
np.multiply(self.xor_square, self.masked_distances, self.xor_square)
|
||||
np.sum(self.xor_square, axis=0, out=self.numerators)
|
||||
np.divide(self.numerators, self.denominators, self.coherences)
|
||||
return 1.0 - np.nanmean(self.coherences)
|
||||
|
||||
def coherence(self, outputs=None):
|
||||
if outputs is None:
|
||||
outputs = self.outputs
|
||||
np.logical_xor(outputs, self.expected_outputs, self.output_xor)
|
||||
return self.mat_coherence()
|
||||
coherences = []
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(self.output_xor)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = self.output_xor[j]
|
||||
weight = self.distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
|
||||
raw_coherence = sum(coherences) / len(coherences)
|
||||
check_coherence = self.mat_coherence()
|
||||
|
||||
return raw_coherence
|
||||
|
||||
def div_coherence(self):
|
||||
coherences = []
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
if y_a < 0:
|
||||
continue
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(self.output_xor)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = self.output_xor[j]
|
||||
if y_b < 0:
|
||||
continue
|
||||
weight = self.distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
# if y_a < 0 or y_b < 0:
|
||||
# numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
if len(coherences) == 0:
|
||||
return 1.0
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def normalize_p(self):
|
||||
check = self.knowns[:]
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] < 0:
|
||||
self.p[i] = 0
|
||||
for i in range(0, len(self.p)):
|
||||
if i in self.knowns:
|
||||
flip = i ^ 0b1
|
||||
self.p[i] = 0.0
|
||||
self.p[flip] = 0.0
|
||||
else:
|
||||
check.append(i)
|
||||
stop_id = index_hash(check)
|
||||
check.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p[i] = 0.0
|
||||
total = np.sum(self.p)
|
||||
if total > 0:
|
||||
for i in range(0, len(self.p)):
|
||||
self.p[i] = self.p[i] / total
|
||||
|
||||
def reset_p(self):
|
||||
self.p.fill(1.0)
|
||||
self.normalize_p()
|
||||
|
||||
def threshold(self):
|
||||
# return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
|
||||
return 1.0 - (self.epoch / 100)
|
||||
|
||||
def get_converged_index(self):
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] > self.threshold():
|
||||
return i
|
||||
return None
|
||||
|
||||
def add_layer(self):
|
||||
self.add_stop()
|
||||
layer = Candidate(self.knowns)
|
||||
self.layers.append(layer)
|
||||
self.base = self.cache_layers()
|
||||
self.knowns.pop()
|
||||
self.reset_p()
|
||||
|
||||
def random_sample(self):
|
||||
self.randomize_inputs()
|
||||
self.populate_distances()
|
||||
self.compute_expected_outputs()
|
||||
self.compute_base_outputs()
|
||||
return self.coherence(self.base_outputs)
|
||||
|
||||
def random_candidate(self):
|
||||
indices = self.knowns[:]
|
||||
np.copyto(self.p_temp, self.p)
|
||||
self.p_temp[self.actual_N] = 0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
for _ in range(0, self.num_terms - len(self.knowns)):
|
||||
index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
|
||||
indices.append(index)
|
||||
flip = index ^ 0b1
|
||||
self.p_temp[index] = 0
|
||||
self.p_temp[flip] = 0
|
||||
for i in range(0, len(self.p_temp)):
|
||||
if i not in indices:
|
||||
indices.append(i)
|
||||
stop_id = index_hash(indices)
|
||||
indices.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p_temp[i] = 0.0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
return Candidate(indices)
|
||||
|
||||
def seed_candidate_pool(self):
|
||||
for _ in range(0, self.num_candidates):
|
||||
candidate = self.random_candidate()
|
||||
if candidate is None:
|
||||
continue
|
||||
candidate_id = candidate.id()
|
||||
if candidate_id in self.candidate_ids:
|
||||
continue
|
||||
self.candidate_pool.append(candidate)
|
||||
self.candidate_ids.add(candidate_id)
|
||||
|
||||
def add_stop(self):
|
||||
stop_id = index_hash(self.knowns)
|
||||
self.stops.add(stop_id)
|
||||
|
||||
def get_distribution(self, candidate, half = 1):
|
||||
count = 0
|
||||
for i in range(0, len(self.inputs)):
|
||||
value = candidate.evaluate(self.inputs[i])
|
||||
if value == half:
|
||||
self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i]
|
||||
count += 1
|
||||
else:
|
||||
self.output_xor[i] = -1
|
||||
return (count, self.mat_coherence())
|
||||
|
||||
def update(self):
|
||||
self.epoch += 1
|
||||
base_coherence = self.random_sample()
|
||||
candidate = Candidate(self.knowns[:])
|
||||
|
||||
index = -1
|
||||
subspace_index = -1
|
||||
bar = 1.0 - (self.epoch / 1000.0)
|
||||
for i in range(0, self.actual_N):
|
||||
if i in self.knowns:
|
||||
continue
|
||||
candidate.indices.append(i)
|
||||
(count_0, subspace_coherence_0) = self.get_distribution(candidate, 0)
|
||||
# (_, subspace_coherence) = self.get_distribution(candidate, 0)
|
||||
# subspace_coherence = subspace_coherence_0 * count_0 / (count_0 + count_1) + subspace_coherence_1 * count_1 / (count_0 + count_1)
|
||||
# subspace_coherence = subspace_coherence_0
|
||||
# delta = (subspace_coherence_0 - base_coherence) * count_0 / (count_0 + count_1) + (subspace_coherence_1 - base_coherence) * count_1 / (count_0 + count_1)
|
||||
delta = (subspace_coherence_0 - base_coherence) * count_0 / len(self.inputs)
|
||||
self.subspace_uplifts[i] += delta
|
||||
if self.subspace_uplifts[i] > bar:
|
||||
if subspace_index < 0 or self.subspace_uplifts[i] > self.subspace_uplifts[subspace_index]:
|
||||
subspace_index = i
|
||||
|
||||
if index_hash(candidate.indices) not in self.stops:
|
||||
for j in range(0, len(self.inputs)):
|
||||
self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j])
|
||||
coherence = self.coherence()
|
||||
delta = coherence - base_coherence
|
||||
# self.uplifts[i] = 0.9 * self.uplifts[i] + 0.1 * coherence
|
||||
self.uplifts[i] += delta
|
||||
if self.uplifts[i] > bar:
|
||||
if index < 0 or self.uplifts[i] > self.uplifts[index]:
|
||||
index = i
|
||||
candidate.indices.pop()
|
||||
|
||||
# print('=====' + str(base_coherence))
|
||||
# print(self.uplifts)
|
||||
# print(self.subspace_uplifts)
|
||||
|
||||
if index >= 0:
|
||||
self.knowns.append(index)
|
||||
print(base_coherence)
|
||||
print(self.knowns, bar)
|
||||
# print(self.uplifts)
|
||||
# print(self.subspace_uplifts)
|
||||
self.add_layer()
|
||||
self.knowns = []
|
||||
self.uplifts.fill(0)
|
||||
self.subspace_uplifts.fill(0)
|
||||
self.epoch = 0
|
||||
return
|
||||
|
||||
if subspace_index >= 0:
|
||||
self.knowns.append(subspace_index)
|
||||
print(self.knowns, bar)
|
||||
# print(self.uplifts)
|
||||
# print(self.subspace_uplifts)
|
||||
self.uplifts.fill(0)
|
||||
self.subspace_uplifts.fill(0)
|
||||
self.epoch = 0
|
||||
return
|
||||
|
||||
# print('======')
|
||||
# print(self.epoch, base_coherence)
|
||||
# print('======')
|
||||
|
||||
# if len(self.candidate_pool) == 0:
|
||||
# print(self.p)
|
||||
|
||||
# for i in range(0, min(5, len(self.candidate_pool))):
|
||||
# candidate = self.candidate_pool[i]
|
||||
# print(candidate.id(), candidate.uplift)
|
||||
|
||||
# if self.epoch < 15:
|
||||
# return
|
||||
|
||||
if self.candidate_pool[0].uplift > 0.3:
|
||||
candidate = self.candidate_pool[0]
|
||||
candidate_id = candidate.id()
|
||||
self.candidate_ids.remove(candidate_id)
|
||||
print(candidate_id)
|
||||
self.knowns = candidate.indices
|
||||
self.add_layer()
|
||||
self.knowns = []
|
||||
self.reset_p()
|
||||
self.epoch = 0
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
|
||||
self.epoch = 0
|
||||
self.num_terms += 1
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
return
|
||||
|
||||
# np.copyto(self.next_p, self.p)
|
||||
for _ in range(0, self.num_candidates):
|
||||
candidate = self.random_candidate()
|
||||
if candidate is None:
|
||||
continue
|
||||
candidate_id = candidate.id()
|
||||
if candidate_id in visited:
|
||||
continue
|
||||
visited.add(candidate_id)
|
||||
if self.actual_N in candidate.indices:
|
||||
continue
|
||||
has_candidate = True
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
|
||||
# coherence = self.ring_coherence()
|
||||
coherence = self.coherence()
|
||||
# if coherence <= base_coherence:
|
||||
# continue
|
||||
# for index in candidate.indices:
|
||||
# self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
|
||||
# self.p_temp[index] += 0
|
||||
for index in candidate.indices:
|
||||
if coherence > self.max_coherences[index]:
|
||||
self.max_coherences[index] = coherence
|
||||
self.max_candidates[index] = candidate
|
||||
# self.max_coherences[index] = max(self.max_coherences[index], coherence)
|
||||
# np.copyto(self.p, self.next_p)
|
||||
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
for i in range(0, self.actual_N):
|
||||
candidate = self.max_candidates[i]
|
||||
if candidate is None:
|
||||
continue
|
||||
for index in candidate.indices:
|
||||
self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
|
||||
# print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
|
||||
self.normalize_p()
|
||||
# print(self.p)
|
||||
|
||||
# np.subtract(self.p_temp, self.p, self.p_temp)
|
||||
# np.abs(self.p_temp, self.p_temp)
|
||||
# delta = np.sum(self.p_temp) / len(self.p_temp)
|
||||
# print(delta, np.argmax(self.p))
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
# for i in range(0, len(self.p_temp)):
|
||||
# self.p_temp[i] = round(self.p_temp[i] * 100) / 100
|
||||
# print(self.p_temp)
|
||||
|
||||
index = np.argmax(self.p)
|
||||
delta_over_null = self.p[index] - self.p[self.actual_N]
|
||||
if self.epoch == 0:
|
||||
self.average_delta_over_null = delta_over_null
|
||||
else:
|
||||
self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
|
||||
diff = self.num_terms - len(self.knowns)
|
||||
|
||||
print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
|
||||
|
||||
# Always iterate for a minimum number of epochs
|
||||
if self.epoch < 15:
|
||||
return
|
||||
if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
|
||||
return
|
||||
if self.average_delta_over_null < 0.001:
|
||||
index = self.actual_N
|
||||
else:
|
||||
index = np.argmax(self.p)
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
# if index == self.last_value:
|
||||
# self.rounds += 1
|
||||
# else:
|
||||
# self.rounds = 0
|
||||
# self.last_value = index
|
||||
|
||||
# if self.rounds < 10 and self.epoch < 100:
|
||||
# return
|
||||
|
||||
# if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
|
||||
# return
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
|
||||
# print(self.p)
|
||||
# print(self.threshold())
|
||||
# print(self.p)
|
||||
# index = self.get_converged_index()
|
||||
if not index is None or not has_candidate:
|
||||
# print(index, delta, np.argmax(self.p))
|
||||
self.epoch = 0
|
||||
if index == self.actual_N or not has_candidate:
|
||||
if len(self.knowns) > 0:
|
||||
self.add_stop()
|
||||
self.knowns.pop()
|
||||
print('Backtrack: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
return
|
||||
self.num_terms += 1
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
print(self.num_terms)
|
||||
return
|
||||
self.knowns.append(index)
|
||||
# bisect.insort(self.knowns, index)
|
||||
if len(self.knowns) == self.num_terms:
|
||||
print('Add layer: ' + str(self.knowns))
|
||||
self.add_layer()
|
||||
else:
|
||||
print('Found term: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
print(base_coherence)
|
||||
return
|
||||
|
||||
def cache_layers(self):
|
||||
expr = 'def f(x):\n\tresult=0\n'
|
||||
for layer in self.layers:
|
||||
expr += '\tresult^=' + layer.eval_str() + '\n'
|
||||
expr += '\treturn result\n'
|
||||
scope = {}
|
||||
exec(expr, scope)
|
||||
return scope['f']
|
||||
|
||||
def main():
|
||||
probabilities = Probabilities()
|
||||
# probabilities.knowns = [14]
|
||||
# probabilities.add_layer()
|
||||
# probabilities.knowns = [8]
|
||||
# probabilities.add_layer()
|
||||
# probabilities.knowns = [4]
|
||||
# probabilities.add_layer()
|
||||
while probabilities.num_terms <= probabilities.N:
|
||||
probabilities.update()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
628
mutations15.py
Normal file
628
mutations15.py
Normal file
@ -0,0 +1,628 @@
|
||||
import bisect
|
||||
from email.mime import base
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
import statistics
|
||||
|
||||
from pkg_resources import get_distribution
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def index_hash(indices):
|
||||
return ','.join([str(index) for index in sorted(indices)])
|
||||
|
||||
def bin_div(a, b):
|
||||
if a == 0 and b == 0:
|
||||
return 2
|
||||
if a == 1 and b == 0:
|
||||
return -1
|
||||
if a == 0 and b == 1:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
class Candidate():
|
||||
def __init__(self, indices):
|
||||
self.indices = indices[:]
|
||||
self.uplift = 0
|
||||
|
||||
def evaluate(self, x):
|
||||
if len(x) in self.indices:
|
||||
return 0
|
||||
value = 1
|
||||
for index in self.indices:
|
||||
value *= x[index]
|
||||
return value
|
||||
|
||||
def id(self):
|
||||
return index_hash(self.indices)
|
||||
|
||||
def eval_str(self):
|
||||
parts = []
|
||||
for index in self.indices:
|
||||
parts.append('x[' + str(index) + ']')
|
||||
return '*'.join(parts)
|
||||
|
||||
class Probabilities():
|
||||
def __init__(self):
|
||||
self.N = 8
|
||||
self.actual_N = self.N * 2
|
||||
self.num_terms = 1
|
||||
self.num_candidates = 100
|
||||
self.sample_size = 64
|
||||
self.p = np.zeros((self.actual_N + 1,))
|
||||
self.p_temp = np.empty_like(self.p)
|
||||
self.next_p = np.empty_like(self.p)
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
self.epoch = 0
|
||||
|
||||
self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
|
||||
self.masked_distances = np.zeros((self.sample_size, self.sample_size))
|
||||
self.distances = np.zeros((self.sample_size, self.sample_size))
|
||||
self.xor_square = np.zeros((self.sample_size, self.sample_size))
|
||||
self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.mask = np.zeros((self.sample_size))
|
||||
self.numerators = np.zeros((self.sample_size))
|
||||
self.denominators = np.zeros((self.sample_size))
|
||||
self.coherences = np.zeros((self.sample_size))
|
||||
self.max_coherences = np.zeros((self.actual_N + 1))
|
||||
self.max_candidates = [None for _ in range(0, self.actual_N)]
|
||||
self.uplifts = np.zeros((self.actual_N))
|
||||
self.uplift_means = np.zeros((self.actual_N))
|
||||
self.uplift_medians = np.zeros((self.actual_N))
|
||||
self.uplift_convergences = np.zeros((self.actual_N))
|
||||
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.subspace_uplifts = np.zeros((self.actual_N))
|
||||
self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)]
|
||||
self.uplift_stddevs = np.zeros((self.actual_N))
|
||||
|
||||
self.layers = []
|
||||
self.base = None
|
||||
|
||||
self.scratch = np.zeros((self.actual_N,))
|
||||
|
||||
self.last_value = -1
|
||||
self.rounds = 0
|
||||
self.average_delta_over_null = 0
|
||||
self.visited = set()
|
||||
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
self.has_added_layer = False
|
||||
|
||||
def randomize_inputs(self):
|
||||
for i in range(0, self.sample_size):
|
||||
for j in range(0, self.N):
|
||||
val = random.randint(0, 1)
|
||||
self.inputs[i][j * 2] = val
|
||||
self.inputs[i][j * 2 + 1] = val ^ 1
|
||||
|
||||
def populate_distances(self):
|
||||
for i in range(0, len(self.inputs)):
|
||||
x_a = self.inputs[i]
|
||||
for j in range(0, len(self.inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = self.inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, self.scratch)
|
||||
self.distances[i][j] = 1.0 / (2 ** distance)
|
||||
|
||||
def compute_expected_outputs(self):
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.expected_outputs[i] = sha(self.inputs[i])
|
||||
|
||||
def compute_base_outputs(self):
|
||||
if self.base is None:
|
||||
self.base_outputs.fill(0)
|
||||
return
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.base_outputs[i] = self.base(self.inputs[i])
|
||||
|
||||
def mat_coherence(self):
|
||||
np.abs(self.output_xor, self.mask)
|
||||
np.subtract(self.output_xor, self.mask, self.mask)
|
||||
np.divide(self.mask, 2.0, self.mask)
|
||||
np.add(1.0, self.mask, self.mask)
|
||||
self.xor_square.fill(0)
|
||||
np.copyto(self.masked_distances, self.distances)
|
||||
masked_distances_t = self.masked_distances.transpose()
|
||||
for i in range(0, len(self.xor_square)):
|
||||
self.xor_square[i] = self.output_xor
|
||||
np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i])
|
||||
np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i])
|
||||
np.sum(self.masked_distances, axis=0, out=self.denominators)
|
||||
self.xor_square = self.xor_square.transpose()
|
||||
np.logical_xor(self.xor_square, self.output_xor, self.xor_square)
|
||||
np.multiply(self.xor_square, self.masked_distances, self.xor_square)
|
||||
np.sum(self.xor_square, axis=0, out=self.numerators)
|
||||
np.divide(self.numerators, self.denominators, self.coherences)
|
||||
return 1.0 - np.nanmean(self.coherences)
|
||||
|
||||
def coherence(self, outputs=None):
|
||||
if outputs is None:
|
||||
outputs = self.outputs
|
||||
np.logical_xor(outputs, self.expected_outputs, self.output_xor)
|
||||
return self.mat_coherence()
|
||||
coherences = []
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(self.output_xor)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = self.output_xor[j]
|
||||
weight = self.distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
|
||||
raw_coherence = sum(coherences) / len(coherences)
|
||||
check_coherence = self.mat_coherence()
|
||||
|
||||
return raw_coherence
|
||||
|
||||
def div_coherence(self):
|
||||
coherences = []
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
if y_a < 0:
|
||||
continue
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(self.output_xor)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = self.output_xor[j]
|
||||
if y_b < 0:
|
||||
continue
|
||||
weight = self.distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
# if y_a < 0 or y_b < 0:
|
||||
# numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
if len(coherences) == 0:
|
||||
return 1.0
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def normalize_p(self):
|
||||
check = self.knowns[:]
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] < 0:
|
||||
self.p[i] = 0
|
||||
for i in range(0, len(self.p)):
|
||||
if i in self.knowns:
|
||||
flip = i ^ 0b1
|
||||
self.p[i] = 0.0
|
||||
self.p[flip] = 0.0
|
||||
else:
|
||||
check.append(i)
|
||||
stop_id = index_hash(check)
|
||||
check.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p[i] = 0.0
|
||||
total = np.sum(self.p)
|
||||
if total > 0:
|
||||
for i in range(0, len(self.p)):
|
||||
self.p[i] = self.p[i] / total
|
||||
|
||||
def reset_p(self):
|
||||
self.p.fill(1.0)
|
||||
self.normalize_p()
|
||||
|
||||
def threshold(self):
|
||||
# return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
|
||||
return 1.0 - (self.epoch / 1000)
|
||||
|
||||
def get_converged_index(self):
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] > self.threshold():
|
||||
return i
|
||||
return None
|
||||
|
||||
def add_layer(self):
|
||||
self.has_added_layer = True
|
||||
self.add_stop()
|
||||
layer = Candidate(self.knowns)
|
||||
self.layers.append(layer)
|
||||
self.base = self.cache_layers()
|
||||
self.knowns.pop()
|
||||
self.reset_p()
|
||||
|
||||
def random_sample(self):
|
||||
self.randomize_inputs()
|
||||
self.populate_distances()
|
||||
self.compute_expected_outputs()
|
||||
self.compute_base_outputs()
|
||||
return self.coherence(self.base_outputs)
|
||||
|
||||
def random_candidate(self):
|
||||
indices = self.knowns[:]
|
||||
np.copyto(self.p_temp, self.p)
|
||||
self.p_temp[self.actual_N] = 0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
for _ in range(0, self.num_terms - len(self.knowns)):
|
||||
index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
|
||||
indices.append(index)
|
||||
flip = index ^ 0b1
|
||||
self.p_temp[index] = 0
|
||||
self.p_temp[flip] = 0
|
||||
for i in range(0, len(self.p_temp)):
|
||||
if i not in indices:
|
||||
indices.append(i)
|
||||
stop_id = index_hash(indices)
|
||||
indices.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p_temp[i] = 0.0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
return Candidate(indices)
|
||||
|
||||
def seed_candidate_pool(self):
|
||||
for _ in range(0, self.num_candidates):
|
||||
candidate = self.random_candidate()
|
||||
if candidate is None:
|
||||
continue
|
||||
candidate_id = candidate.id()
|
||||
if candidate_id in self.candidate_ids:
|
||||
continue
|
||||
self.candidate_pool.append(candidate)
|
||||
self.candidate_ids.add(candidate_id)
|
||||
|
||||
def add_stop(self):
|
||||
stop_id = index_hash(self.knowns)
|
||||
self.stops.add(stop_id)
|
||||
|
||||
def get_distribution(self, candidate, half = 1):
|
||||
count = 0
|
||||
for i in range(0, len(self.inputs)):
|
||||
value = candidate.evaluate(self.inputs[i])
|
||||
if value == half:
|
||||
self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i]
|
||||
count += 1
|
||||
else:
|
||||
self.output_xor[i] = -1
|
||||
return (count, self.mat_coherence())
|
||||
|
||||
def update(self):
|
||||
self.epoch += 1
|
||||
|
||||
index = -1
|
||||
subspace_index = -1
|
||||
# bar = 1.0 - (self.epoch / 10000.0)
|
||||
if self.epoch >= 200:
|
||||
self.uplifts.fill(0)
|
||||
self.subspace_uplifts.fill(0)
|
||||
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.epoch = 0
|
||||
# if len(self.knowns) > 0 and not self.has_added_layer:
|
||||
# self.add_stop()
|
||||
# self.knowns.pop()
|
||||
self.has_added_layer = False
|
||||
if len(self.knowns) == 0:
|
||||
self.num_terms += 1
|
||||
self.stops = set()
|
||||
else:
|
||||
self.add_stop()
|
||||
self.knowns.pop()
|
||||
self.update()
|
||||
return
|
||||
|
||||
base_coherence = self.random_sample()
|
||||
candidate = Candidate(self.knowns[:])
|
||||
|
||||
for i in range(0, self.actual_N):
|
||||
# if i in self.knowns:
|
||||
# continue
|
||||
candidate.indices.append(i)
|
||||
try:
|
||||
if i in self.knowns:
|
||||
continue
|
||||
if index_hash(candidate.indices) in self.stops:
|
||||
continue
|
||||
|
||||
if len(candidate.indices) < self.num_terms:
|
||||
(count_0, subspace_coherence_0) = self.get_distribution(candidate, 0)
|
||||
delta_0 = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size
|
||||
(count_1, subspace_coherence_1) = self.get_distribution(candidate, 1)
|
||||
delta_1 = (subspace_coherence_1 - base_coherence) * count_1 / self.sample_size
|
||||
self.uplift_samples[i].append(delta_0)
|
||||
self.uplift_samples[i].append(delta_1)
|
||||
mean = statistics.mean(self.uplift_samples[i])
|
||||
median = statistics.median(self.uplift_samples[i])
|
||||
self.uplift_convergences[i] = abs(self.uplift_medians[i] - median)
|
||||
self.uplift_means[i] = mean
|
||||
self.uplift_medians[i] = median
|
||||
if self.epoch > 20 and self.uplift_convergences[i] < 1e-5 and self.uplift_medians[i] > 0:
|
||||
if subspace_index < 0 or self.uplift_medians[i] > self.uplift_medians[subspace_index]:
|
||||
subspace_index = i
|
||||
# if self.uplift_convergences[i] < 1e-6 and self.uplift_means[i] > 0:
|
||||
# if subspace_index < 0 or self.uplift_means[i] > self.uplift_means[subspace_index]:
|
||||
# subspace_index = i
|
||||
# self.subspace_uplifts[i] += delta
|
||||
# if self.subspace_uplifts[i] > bar:
|
||||
# if subspace_index < 0 or self.subspace_uplifts[i] > self.subspace_uplifts[subspace_index]:
|
||||
# subspace_index = i
|
||||
else:
|
||||
for j in range(0, len(self.inputs)):
|
||||
self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j])
|
||||
coherence = self.coherence()
|
||||
delta = coherence - base_coherence
|
||||
self.uplift_samples[i].append(delta)
|
||||
self.uplift_ranges[i][0] = max(self.uplift_samples[i])
|
||||
self.uplift_ranges[i][1] = min(self.uplift_samples[i])
|
||||
mean = statistics.mean(self.uplift_samples[i])
|
||||
median = statistics.median(self.uplift_samples[i])
|
||||
if len(self.uplift_samples[i]) >= 2:
|
||||
stddev = statistics.stdev(self.uplift_samples[i])
|
||||
self.uplift_stddevs[i] = stddev
|
||||
self.uplift_convergences[i] = abs(self.uplift_medians[i] - median)
|
||||
self.uplift_means[i] = mean
|
||||
self.uplift_medians[i] = median
|
||||
# self.uplifts[i] = 0.9 * self.uplifts[i] + 0.1 * coherence
|
||||
self.uplifts[i] += delta
|
||||
middle = self.uplift_ranges[i][1] + (self.uplift_ranges[i][0] - self.uplift_ranges[i][1]) / 2
|
||||
|
||||
if self.epoch > 20 and self.uplift_convergences[i] < 1e-5 and self.uplift_medians[i] > 0:
|
||||
if index < 0 or self.uplift_medians[i] > self.uplift_medians[index]:
|
||||
index = i
|
||||
# if self.epoch > 100 and max(self.uplift_samples[i]) + min(self.uplift_samples[i]) > 0.01:
|
||||
# if index < 0 or max(self.uplift_samples[i]) + min(self.uplift_samples[i]) > max(self.uplift_samples[index]) + min(self.uplift_samples[index]):
|
||||
# index = i
|
||||
# if self.uplift_convergences[i] < 1e-6 and self.uplift_means[i] > 0:
|
||||
# if index < 0 or self.uplift_means[i] > self.uplift_means[index]:
|
||||
# index = i
|
||||
# if self.uplifts[i] > bar:
|
||||
# if index < 0 or self.uplifts[i] > self.uplifts[index]:
|
||||
# index = i
|
||||
finally:
|
||||
candidate.indices.pop()
|
||||
|
||||
# print('=====' + str(base_coherence))
|
||||
# print(self.uplifts)
|
||||
# print(self.uplift_means)
|
||||
# print(self.uplift_medians)
|
||||
# print(self.uplift_stddevs)
|
||||
# print(self.uplift_ranges)
|
||||
# print(self.uplift_convergences)
|
||||
# print(self.subspace_uplifts)
|
||||
|
||||
if index >= 0:
|
||||
self.knowns.append(index)
|
||||
print(base_coherence)
|
||||
print(self.knowns, self.epoch)
|
||||
# print(self.uplift_medians)
|
||||
# print(self.uplifts)
|
||||
# print(self.subspace_uplifts)
|
||||
self.add_layer()
|
||||
self.uplifts.fill(0)
|
||||
self.subspace_uplifts.fill(0)
|
||||
self.uplift_medians.fill(0)
|
||||
self.uplift_convergences.fill(0)
|
||||
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.epoch = 0
|
||||
return
|
||||
|
||||
if subspace_index >= 0:
|
||||
self.knowns.append(subspace_index)
|
||||
print(self.knowns, self.epoch)
|
||||
# print(self.uplifts)
|
||||
# print(self.subspace_uplifts)
|
||||
self.uplifts.fill(0)
|
||||
self.subspace_uplifts.fill(0)
|
||||
self.uplift_medians.fill(0)
|
||||
self.uplift_convergences.fill(0)
|
||||
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.epoch = 0
|
||||
return
|
||||
|
||||
# print('======')
|
||||
# print(self.epoch, base_coherence)
|
||||
# print('======')
|
||||
|
||||
# if len(self.candidate_pool) == 0:
|
||||
# print(self.p)
|
||||
|
||||
# for i in range(0, min(5, len(self.candidate_pool))):
|
||||
# candidate = self.candidate_pool[i]
|
||||
# print(candidate.id(), candidate.uplift)
|
||||
|
||||
# if self.epoch < 15:
|
||||
# return
|
||||
|
||||
if self.candidate_pool[0].uplift > 0.3:
|
||||
candidate = self.candidate_pool[0]
|
||||
candidate_id = candidate.id()
|
||||
self.candidate_ids.remove(candidate_id)
|
||||
print(candidate_id)
|
||||
self.knowns = candidate.indices
|
||||
self.add_layer()
|
||||
self.knowns = []
|
||||
self.reset_p()
|
||||
self.epoch = 0
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
|
||||
self.epoch = 0
|
||||
self.num_terms += 1
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
return
|
||||
|
||||
# np.copyto(self.next_p, self.p)
|
||||
for _ in range(0, self.num_candidates):
|
||||
candidate = self.random_candidate()
|
||||
if candidate is None:
|
||||
continue
|
||||
candidate_id = candidate.id()
|
||||
if candidate_id in visited:
|
||||
continue
|
||||
visited.add(candidate_id)
|
||||
if self.actual_N in candidate.indices:
|
||||
continue
|
||||
has_candidate = True
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
|
||||
# coherence = self.ring_coherence()
|
||||
coherence = self.coherence()
|
||||
# if coherence <= base_coherence:
|
||||
# continue
|
||||
# for index in candidate.indices:
|
||||
# self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
|
||||
# self.p_temp[index] += 0
|
||||
for index in candidate.indices:
|
||||
if coherence > self.max_coherences[index]:
|
||||
self.max_coherences[index] = coherence
|
||||
self.max_candidates[index] = candidate
|
||||
# self.max_coherences[index] = max(self.max_coherences[index], coherence)
|
||||
# np.copyto(self.p, self.next_p)
|
||||
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
for i in range(0, self.actual_N):
|
||||
candidate = self.max_candidates[i]
|
||||
if candidate is None:
|
||||
continue
|
||||
for index in candidate.indices:
|
||||
self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
|
||||
# print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
|
||||
self.normalize_p()
|
||||
# print(self.p)
|
||||
|
||||
# np.subtract(self.p_temp, self.p, self.p_temp)
|
||||
# np.abs(self.p_temp, self.p_temp)
|
||||
# delta = np.sum(self.p_temp) / len(self.p_temp)
|
||||
# print(delta, np.argmax(self.p))
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
# for i in range(0, len(self.p_temp)):
|
||||
# self.p_temp[i] = round(self.p_temp[i] * 100) / 100
|
||||
# print(self.p_temp)
|
||||
|
||||
index = np.argmax(self.p)
|
||||
delta_over_null = self.p[index] - self.p[self.actual_N]
|
||||
if self.epoch == 0:
|
||||
self.average_delta_over_null = delta_over_null
|
||||
else:
|
||||
self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
|
||||
diff = self.num_terms - len(self.knowns)
|
||||
|
||||
print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
|
||||
|
||||
# Always iterate for a minimum number of epochs
|
||||
if self.epoch < 15:
|
||||
return
|
||||
if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
|
||||
return
|
||||
if self.average_delta_over_null < 0.001:
|
||||
index = self.actual_N
|
||||
else:
|
||||
index = np.argmax(self.p)
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
# if index == self.last_value:
|
||||
# self.rounds += 1
|
||||
# else:
|
||||
# self.rounds = 0
|
||||
# self.last_value = index
|
||||
|
||||
# if self.rounds < 10 and self.epoch < 100:
|
||||
# return
|
||||
|
||||
# if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
|
||||
# return
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
|
||||
# print(self.p)
|
||||
# print(self.threshold())
|
||||
# print(self.p)
|
||||
# index = self.get_converged_index()
|
||||
if not index is None or not has_candidate:
|
||||
# print(index, delta, np.argmax(self.p))
|
||||
self.epoch = 0
|
||||
if index == self.actual_N or not has_candidate:
|
||||
if len(self.knowns) > 0:
|
||||
self.add_stop()
|
||||
self.knowns.pop()
|
||||
print('Backtrack: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
return
|
||||
self.num_terms += 1
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
print(self.num_terms)
|
||||
return
|
||||
self.knowns.append(index)
|
||||
# bisect.insort(self.knowns, index)
|
||||
if len(self.knowns) == self.num_terms:
|
||||
print('Add layer: ' + str(self.knowns))
|
||||
self.add_layer()
|
||||
else:
|
||||
print('Found term: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
print(base_coherence)
|
||||
return
|
||||
|
||||
def cache_layers(self):
|
||||
expr = 'def f(x):\n\tresult=0\n'
|
||||
for layer in self.layers:
|
||||
expr += '\tresult^=' + layer.eval_str() + '\n'
|
||||
expr += '\treturn result\n'
|
||||
scope = {}
|
||||
exec(expr, scope)
|
||||
return scope['f']
|
||||
|
||||
def main():
|
||||
probabilities = Probabilities()
|
||||
# probabilities.knowns = [14]
|
||||
# probabilities.add_layer()
|
||||
# probabilities.knowns = [8]
|
||||
# probabilities.add_layer()
|
||||
# probabilities.knowns = [4]
|
||||
# probabilities.add_layer()
|
||||
while probabilities.num_terms <= probabilities.N:
|
||||
probabilities.update()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
663
mutations16.py
Normal file
663
mutations16.py
Normal file
@ -0,0 +1,663 @@
|
||||
import bisect
|
||||
from cmath import isnan
|
||||
from email.mime import base
|
||||
import matplotlib.pyplot as plt
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
import statistics
|
||||
|
||||
from pkg_resources import get_distribution
|
||||
from scipy import stats
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def index_hash(indices):
|
||||
return ','.join([str(index) for index in sorted(indices)])
|
||||
|
||||
def bin_div(a, b):
|
||||
if a == 0 and b == 0:
|
||||
return 2
|
||||
if a == 1 and b == 0:
|
||||
return -1
|
||||
if a == 0 and b == 1:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
class Candidate():
|
||||
def __init__(self, indices):
|
||||
self.indices = indices[:]
|
||||
self.uplift = 0
|
||||
|
||||
def evaluate(self, x):
|
||||
if len(x) in self.indices:
|
||||
return 0
|
||||
value = 1
|
||||
for index in self.indices:
|
||||
value *= x[index]
|
||||
return value
|
||||
|
||||
def id(self):
|
||||
return index_hash(self.indices)
|
||||
|
||||
def eval_str(self):
|
||||
parts = []
|
||||
for index in self.indices:
|
||||
parts.append('x[' + str(index) + ']')
|
||||
return '*'.join(parts)
|
||||
|
||||
class Probabilities():
|
||||
def __init__(self):
|
||||
self.N = 16
|
||||
self.actual_N = self.N * 2
|
||||
self.num_terms = 1
|
||||
self.num_candidates = 100
|
||||
self.sample_size = 64
|
||||
self.p = np.zeros((self.actual_N + 1,))
|
||||
self.p_temp = np.empty_like(self.p)
|
||||
self.next_p = np.empty_like(self.p)
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
self.epoch = 0
|
||||
|
||||
self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
|
||||
self.masked_distances = np.zeros((self.sample_size, self.sample_size))
|
||||
self.distances = np.zeros((self.sample_size, self.sample_size))
|
||||
self.xor_square = np.zeros((self.sample_size, self.sample_size))
|
||||
self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.mask = np.zeros((self.sample_size))
|
||||
self.numerators = np.zeros((self.sample_size))
|
||||
self.denominators = np.zeros((self.sample_size))
|
||||
self.coherences = np.zeros((self.sample_size))
|
||||
self.max_coherences = np.zeros((self.actual_N + 1))
|
||||
self.max_candidates = [None for _ in range(0, self.actual_N)]
|
||||
self.uplifts = np.zeros((self.actual_N))
|
||||
self.uplift_means = np.zeros((self.actual_N))
|
||||
self.uplift_medians = np.zeros((self.actual_N))
|
||||
self.uplift_convergences = np.zeros((self.actual_N))
|
||||
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.superspace_uplift_samples = []
|
||||
self.subspace_uplifts = np.zeros((self.actual_N))
|
||||
self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)]
|
||||
self.uplift_stddevs = np.zeros((self.actual_N))
|
||||
|
||||
self.layers = []
|
||||
self.layer_confidence = {}
|
||||
self.base = None
|
||||
|
||||
self.scratch = np.zeros((self.actual_N,))
|
||||
|
||||
self.last_value = -1
|
||||
self.rounds = 0
|
||||
self.average_delta_over_null = 0
|
||||
self.visited = set()
|
||||
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
self.has_added_layer = False
|
||||
|
||||
def randomize_inputs(self):
|
||||
for i in range(0, self.sample_size):
|
||||
for j in range(0, self.N):
|
||||
val = random.randint(0, 1)
|
||||
self.inputs[i][j * 2] = val
|
||||
self.inputs[i][j * 2 + 1] = val ^ 1
|
||||
|
||||
def populate_distances(self):
|
||||
for i in range(0, len(self.inputs)):
|
||||
x_a = self.inputs[i]
|
||||
for j in range(0, len(self.inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = self.inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, self.scratch)
|
||||
self.distances[i][j] = 1.0 / (2 ** distance)
|
||||
|
||||
def compute_expected_outputs(self):
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.expected_outputs[i] = sha(self.inputs[i])
|
||||
|
||||
def compute_base_outputs(self):
|
||||
if self.base is None:
|
||||
self.base_outputs.fill(0)
|
||||
return
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.base_outputs[i] = self.base(self.inputs[i])
|
||||
|
||||
def mat_coherence(self):
|
||||
np.abs(self.output_xor, self.mask)
|
||||
np.subtract(self.output_xor, self.mask, self.mask)
|
||||
np.divide(self.mask, 2.0, self.mask)
|
||||
np.add(1.0, self.mask, self.mask)
|
||||
self.xor_square.fill(0)
|
||||
np.copyto(self.masked_distances, self.distances)
|
||||
masked_distances_t = self.masked_distances.transpose()
|
||||
for i in range(0, len(self.xor_square)):
|
||||
self.xor_square[i] = self.output_xor
|
||||
np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i])
|
||||
np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i])
|
||||
np.sum(self.masked_distances, axis=0, out=self.denominators)
|
||||
self.xor_square = self.xor_square.transpose()
|
||||
np.logical_xor(self.xor_square, self.output_xor, self.xor_square)
|
||||
np.multiply(self.xor_square, self.masked_distances, self.xor_square)
|
||||
np.sum(self.xor_square, axis=0, out=self.numerators)
|
||||
np.divide(self.numerators, self.denominators, self.coherences)
|
||||
mean = np.nanmean(self.coherences)
|
||||
if isnan(mean):
|
||||
mean = 1.0
|
||||
return 1.0 - mean
|
||||
|
||||
def coherence(self, outputs=None):
|
||||
if outputs is None:
|
||||
outputs = self.outputs
|
||||
np.logical_xor(outputs, self.expected_outputs, self.output_xor)
|
||||
return self.mat_coherence()
|
||||
coherences = []
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(self.output_xor)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = self.output_xor[j]
|
||||
weight = self.distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
|
||||
raw_coherence = sum(coherences) / len(coherences)
|
||||
check_coherence = self.mat_coherence()
|
||||
|
||||
return raw_coherence
|
||||
|
||||
def div_coherence(self):
|
||||
coherences = []
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
if y_a < 0:
|
||||
continue
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(self.output_xor)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = self.output_xor[j]
|
||||
if y_b < 0:
|
||||
continue
|
||||
weight = self.distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
# if y_a < 0 or y_b < 0:
|
||||
# numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
if len(coherences) == 0:
|
||||
return 1.0
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def normalize_p(self):
|
||||
check = self.knowns[:]
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] < 0:
|
||||
self.p[i] = 0
|
||||
for i in range(0, len(self.p)):
|
||||
if i in self.knowns:
|
||||
flip = i ^ 0b1
|
||||
self.p[i] = 0.0
|
||||
self.p[flip] = 0.0
|
||||
else:
|
||||
check.append(i)
|
||||
stop_id = index_hash(check)
|
||||
check.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p[i] = 0.0
|
||||
total = np.sum(self.p)
|
||||
if total > 0:
|
||||
for i in range(0, len(self.p)):
|
||||
self.p[i] = self.p[i] / total
|
||||
|
||||
def reset_p(self):
|
||||
self.p.fill(1.0)
|
||||
self.normalize_p()
|
||||
|
||||
def threshold(self):
|
||||
# return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
|
||||
return 1.0 - (self.epoch / 1000)
|
||||
|
||||
def get_converged_index(self):
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] > self.threshold():
|
||||
return i
|
||||
return None
|
||||
|
||||
def add_layer(self):
|
||||
self.has_added_layer = True
|
||||
self.add_stop()
|
||||
layer = Candidate(self.knowns)
|
||||
self.layers.append(layer)
|
||||
self.base = self.cache_layers()
|
||||
self.knowns.pop()
|
||||
self.reset_p()
|
||||
|
||||
def random_sample(self):
|
||||
self.randomize_inputs()
|
||||
self.populate_distances()
|
||||
self.compute_expected_outputs()
|
||||
self.compute_base_outputs()
|
||||
return self.coherence(self.base_outputs)
|
||||
|
||||
def random_candidate(self):
|
||||
indices = self.knowns[:]
|
||||
np.copyto(self.p_temp, self.p)
|
||||
self.p_temp[self.actual_N] = 0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
for _ in range(0, self.num_terms - len(self.knowns)):
|
||||
index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
|
||||
indices.append(index)
|
||||
flip = index ^ 0b1
|
||||
self.p_temp[index] = 0
|
||||
self.p_temp[flip] = 0
|
||||
for i in range(0, len(self.p_temp)):
|
||||
if i not in indices:
|
||||
indices.append(i)
|
||||
stop_id = index_hash(indices)
|
||||
indices.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p_temp[i] = 0.0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
return Candidate(indices)
|
||||
|
||||
def seed_candidate_pool(self):
|
||||
for _ in range(0, self.num_candidates):
|
||||
candidate = self.random_candidate()
|
||||
if candidate is None:
|
||||
continue
|
||||
candidate_id = candidate.id()
|
||||
if candidate_id in self.candidate_ids:
|
||||
continue
|
||||
self.candidate_pool.append(candidate)
|
||||
self.candidate_ids.add(candidate_id)
|
||||
|
||||
def add_stop(self):
|
||||
stop_id = index_hash(self.knowns)
|
||||
self.stops.add(stop_id)
|
||||
|
||||
def get_distribution(self, candidate, half = 1):
|
||||
count = 0
|
||||
for i in range(0, len(self.inputs)):
|
||||
value = candidate.evaluate(self.inputs[i])
|
||||
if value == half:
|
||||
self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i]
|
||||
count += 1
|
||||
else:
|
||||
self.output_xor[i] = -1
|
||||
return (count, self.mat_coherence())
|
||||
|
||||
def update(self):
|
||||
self.epoch += 1
|
||||
|
||||
base_coherence = self.random_sample()
|
||||
candidate = Candidate(self.knowns[:])
|
||||
|
||||
if len(candidate.indices) > 0:
|
||||
index = candidate.indices.pop()
|
||||
try:
|
||||
count_0, superspace_coherence_0 = self.get_distribution(candidate, 0)
|
||||
count_1, superspace_coherence_1 = self.get_distribution(candidate, 1)
|
||||
# delta = (superspace_coherence - base_coherence) * count / self.sample_size
|
||||
delta = superspace_coherence_0 - superspace_coherence_1
|
||||
self.superspace_uplift_samples.append(delta)
|
||||
finally:
|
||||
candidate.indices.append(index)
|
||||
|
||||
for i in range(0, self.actual_N):
|
||||
candidate.indices.append(i)
|
||||
try:
|
||||
if i in self.knowns:
|
||||
continue
|
||||
|
||||
count_0, subspace_coherence_0 = self.get_distribution(candidate, 0)
|
||||
# count_1, subspace_coherence_1 = self.get_distribution(candidate, 1)
|
||||
delta = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size
|
||||
# delta = subspace_coherence_0 - subspace_coherence_1
|
||||
self.subspace_uplift_samples[i].append(delta)
|
||||
|
||||
# if index_hash(candidate.indices) in self.stops:
|
||||
# continue
|
||||
|
||||
for j in range(0, len(self.inputs)):
|
||||
self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j])
|
||||
|
||||
coherence = self.coherence()
|
||||
delta = coherence - base_coherence
|
||||
self.uplift_samples[i].append(delta)
|
||||
finally:
|
||||
candidate.indices.pop()
|
||||
|
||||
if self.epoch >= 100:
|
||||
# for i in range(0, self.actual_N):
|
||||
# parameters = stats.norm.fit(self.uplift_samples[i])
|
||||
# print(i, parameters)
|
||||
# print(i, stats.kstest(self.uplift_samples[i], "norm", parameters))
|
||||
|
||||
# fig, axs = plt.subplots(4, 4)
|
||||
# for i in range(0, 4):
|
||||
# for j in range(0, 4):
|
||||
# n, bins, patches = axs[i][j].hist(self.uplift_samples[i * 4 + j], 50, density=True, facecolor='g', alpha=0.75)
|
||||
# plt.show()
|
||||
|
||||
try:
|
||||
index = -1
|
||||
best_mu = -1
|
||||
confidence = -1
|
||||
for i in range(0, self.actual_N):
|
||||
if len(self.uplift_samples[i]) == 0:
|
||||
continue
|
||||
parameters = stats.norm.fit(self.uplift_samples[i])
|
||||
(mu, _) = parameters
|
||||
# median = statistics.median(self.uplift_samples[i])
|
||||
if mu > 0:
|
||||
result = stats.kstest(self.uplift_samples[i], stats.norm.cdf, parameters)
|
||||
layer_id = index_hash(self.knowns + [i])
|
||||
if layer_id in self.layer_confidence:
|
||||
layer_confidence = self.layer_confidence[layer_id]
|
||||
if layer_confidence >= result.pvalue:
|
||||
continue
|
||||
if index < 0 or mu > best_mu:
|
||||
best_mu = mu
|
||||
index = i
|
||||
confidence = result.pvalue
|
||||
if index >= 0:
|
||||
self.knowns.append(index)
|
||||
self.layer_confidence[index_hash(self.knowns)] = confidence
|
||||
# num_terms = len(self.knowns)
|
||||
print(self.knowns, best_mu, confidence)
|
||||
print(base_coherence)
|
||||
self.add_layer()
|
||||
# if num_terms > self.num_terms:
|
||||
# self.stops = set()
|
||||
# self.num_terms = num_terms
|
||||
self.knowns = []
|
||||
return
|
||||
|
||||
index = -1
|
||||
best_mu = -1
|
||||
superspace_median = statistics.median(self.superspace_uplift_samples) if len(self.superspace_uplift_samples) > 0 else -1
|
||||
for i in range(0, self.actual_N):
|
||||
if len(self.subspace_uplift_samples[i]) == 0:
|
||||
continue
|
||||
# median = statistics.median(self.subspace_uplift_samples[i])
|
||||
parameters = stats.norm.fit(self.subspace_uplift_samples[i])
|
||||
(mu, _) = parameters
|
||||
if mu > 0:
|
||||
result = stats.kstest(self.subspace_uplift_samples[i], stats.norm.cdf, parameters)
|
||||
# print(i, mu, result.pvalue)
|
||||
if result.pvalue > 0.95:
|
||||
if index < 0 or mu > best_mu:
|
||||
# if median > best_median:
|
||||
best_mu = mu
|
||||
index = i
|
||||
|
||||
if index >= 0:
|
||||
self.knowns.append(index)
|
||||
print(self.knowns, best_mu)
|
||||
return
|
||||
|
||||
if len(self.knowns) > 0:
|
||||
# self.add_stop()
|
||||
self.knowns = []
|
||||
finally:
|
||||
self.epoch = 0
|
||||
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.superspace_uplift_samples = []
|
||||
return
|
||||
|
||||
# print('=====' + str(base_coherence))
|
||||
# print(self.uplifts)
|
||||
# print(self.uplift_means)
|
||||
# print(self.uplift_medians)
|
||||
# print(self.uplift_stddevs)
|
||||
# print(self.uplift_ranges)
|
||||
# print(self.uplift_convergences)
|
||||
# print(self.subspace_uplifts)
|
||||
|
||||
if index >= 0:
|
||||
self.knowns.append(index)
|
||||
print(base_coherence)
|
||||
print(self.knowns, self.epoch)
|
||||
# print(self.uplift_medians)
|
||||
# print(self.uplifts)
|
||||
# print(self.subspace_uplifts)
|
||||
self.add_layer()
|
||||
self.uplifts.fill(0)
|
||||
self.subspace_uplifts.fill(0)
|
||||
self.uplift_medians.fill(0)
|
||||
self.uplift_convergences.fill(0)
|
||||
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.epoch = 0
|
||||
return
|
||||
|
||||
if subspace_index >= 0:
|
||||
self.knowns.append(subspace_index)
|
||||
print(self.knowns, self.epoch)
|
||||
# print(self.uplifts)
|
||||
# print(self.subspace_uplifts)
|
||||
self.uplifts.fill(0)
|
||||
self.subspace_uplifts.fill(0)
|
||||
self.uplift_medians.fill(0)
|
||||
self.uplift_convergences.fill(0)
|
||||
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.epoch = 0
|
||||
return
|
||||
|
||||
# print('======')
|
||||
# print(self.epoch, base_coherence)
|
||||
# print('======')
|
||||
|
||||
# if len(self.candidate_pool) == 0:
|
||||
# print(self.p)
|
||||
|
||||
# for i in range(0, min(5, len(self.candidate_pool))):
|
||||
# candidate = self.candidate_pool[i]
|
||||
# print(candidate.id(), candidate.uplift)
|
||||
|
||||
# if self.epoch < 15:
|
||||
# return
|
||||
|
||||
if self.candidate_pool[0].uplift > 0.3:
|
||||
candidate = self.candidate_pool[0]
|
||||
candidate_id = candidate.id()
|
||||
self.candidate_ids.remove(candidate_id)
|
||||
print(candidate_id)
|
||||
self.knowns = candidate.indices
|
||||
self.add_layer()
|
||||
self.knowns = []
|
||||
self.reset_p()
|
||||
self.epoch = 0
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
|
||||
self.epoch = 0
|
||||
self.num_terms += 1
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
return
|
||||
|
||||
# np.copyto(self.next_p, self.p)
|
||||
for _ in range(0, self.num_candidates):
|
||||
candidate = self.random_candidate()
|
||||
if candidate is None:
|
||||
continue
|
||||
candidate_id = candidate.id()
|
||||
if candidate_id in visited:
|
||||
continue
|
||||
visited.add(candidate_id)
|
||||
if self.actual_N in candidate.indices:
|
||||
continue
|
||||
has_candidate = True
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
|
||||
# coherence = self.ring_coherence()
|
||||
coherence = self.coherence()
|
||||
# if coherence <= base_coherence:
|
||||
# continue
|
||||
# for index in candidate.indices:
|
||||
# self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
|
||||
# self.p_temp[index] += 0
|
||||
for index in candidate.indices:
|
||||
if coherence > self.max_coherences[index]:
|
||||
self.max_coherences[index] = coherence
|
||||
self.max_candidates[index] = candidate
|
||||
# self.max_coherences[index] = max(self.max_coherences[index], coherence)
|
||||
# np.copyto(self.p, self.next_p)
|
||||
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
for i in range(0, self.actual_N):
|
||||
candidate = self.max_candidates[i]
|
||||
if candidate is None:
|
||||
continue
|
||||
for index in candidate.indices:
|
||||
self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
|
||||
# print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
|
||||
self.normalize_p()
|
||||
# print(self.p)
|
||||
|
||||
# np.subtract(self.p_temp, self.p, self.p_temp)
|
||||
# np.abs(self.p_temp, self.p_temp)
|
||||
# delta = np.sum(self.p_temp) / len(self.p_temp)
|
||||
# print(delta, np.argmax(self.p))
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
# for i in range(0, len(self.p_temp)):
|
||||
# self.p_temp[i] = round(self.p_temp[i] * 100) / 100
|
||||
# print(self.p_temp)
|
||||
|
||||
index = np.argmax(self.p)
|
||||
delta_over_null = self.p[index] - self.p[self.actual_N]
|
||||
if self.epoch == 0:
|
||||
self.average_delta_over_null = delta_over_null
|
||||
else:
|
||||
self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
|
||||
diff = self.num_terms - len(self.knowns)
|
||||
|
||||
print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
|
||||
|
||||
# Always iterate for a minimum number of epochs
|
||||
if self.epoch < 15:
|
||||
return
|
||||
if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
|
||||
return
|
||||
if self.average_delta_over_null < 0.001:
|
||||
index = self.actual_N
|
||||
else:
|
||||
index = np.argmax(self.p)
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
# if index == self.last_value:
|
||||
# self.rounds += 1
|
||||
# else:
|
||||
# self.rounds = 0
|
||||
# self.last_value = index
|
||||
|
||||
# if self.rounds < 10 and self.epoch < 100:
|
||||
# return
|
||||
|
||||
# if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
|
||||
# return
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
|
||||
# print(self.p)
|
||||
# print(self.threshold())
|
||||
# print(self.p)
|
||||
# index = self.get_converged_index()
|
||||
if not index is None or not has_candidate:
|
||||
# print(index, delta, np.argmax(self.p))
|
||||
self.epoch = 0
|
||||
if index == self.actual_N or not has_candidate:
|
||||
if len(self.knowns) > 0:
|
||||
self.add_stop()
|
||||
self.knowns.pop()
|
||||
print('Backtrack: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
return
|
||||
self.num_terms += 1
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
print(self.num_terms)
|
||||
return
|
||||
self.knowns.append(index)
|
||||
# bisect.insort(self.knowns, index)
|
||||
if len(self.knowns) == self.num_terms:
|
||||
print('Add layer: ' + str(self.knowns))
|
||||
self.add_layer()
|
||||
else:
|
||||
print('Found term: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
print(base_coherence)
|
||||
return
|
||||
|
||||
def cache_layers(self):
|
||||
expr = 'def f(x):\n\tresult=0\n'
|
||||
for layer in self.layers:
|
||||
expr += '\tresult^=' + layer.eval_str() + '\n'
|
||||
expr += '\treturn result\n'
|
||||
scope = {}
|
||||
exec(expr, scope)
|
||||
return scope['f']
|
||||
|
||||
def main():
|
||||
probabilities = Probabilities()
|
||||
# probabilities.knowns = [14]
|
||||
# probabilities.add_layer()
|
||||
# probabilities.knowns = [8]
|
||||
# probabilities.add_layer()
|
||||
# probabilities.knowns = [4]
|
||||
# probabilities.add_layer()
|
||||
while probabilities.num_terms <= probabilities.N:
|
||||
probabilities.update()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
669
mutations17.py
Normal file
669
mutations17.py
Normal file
@ -0,0 +1,669 @@
|
||||
import bisect
|
||||
from cmath import isnan
|
||||
from email.mime import base
|
||||
import matplotlib.pyplot as plt
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
import statistics
|
||||
|
||||
from pkg_resources import get_distribution
|
||||
from scipy import stats
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def xor(v):
|
||||
total = np.sum(v)
|
||||
value = total % 2
|
||||
return np.sum(v) % 2
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def index_hash(indices):
|
||||
return ','.join([str(index) for index in sorted(indices)])
|
||||
|
||||
def bin_div(a, b):
|
||||
if a == 0 and b == 0:
|
||||
return 2
|
||||
if a == 1 and b == 0:
|
||||
return -1
|
||||
if a == 0 and b == 1:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
class Candidate():
|
||||
def __init__(self, indices):
|
||||
self.indices = indices[:]
|
||||
self.uplift = 0
|
||||
|
||||
def evaluate(self, x):
|
||||
if len(x) in self.indices:
|
||||
return 0
|
||||
value = 1
|
||||
for index in self.indices:
|
||||
value *= x[index]
|
||||
return value
|
||||
|
||||
def id(self):
|
||||
return index_hash(self.indices)
|
||||
|
||||
def eval_str(self):
|
||||
parts = []
|
||||
for index in self.indices:
|
||||
parts.append('x[' + str(index) + ']')
|
||||
return '*'.join(parts)
|
||||
|
||||
class Probabilities():
|
||||
def __init__(self):
|
||||
self.N = 8
|
||||
self.actual_N = self.N * 2
|
||||
self.num_terms = 1
|
||||
self.num_candidates = 100
|
||||
# self.sample_size = self.N ** 2
|
||||
self.sample_size = 64
|
||||
self.p = np.zeros((self.actual_N + 1,))
|
||||
self.p_temp = np.empty_like(self.p)
|
||||
self.next_p = np.empty_like(self.p)
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
self.epoch = 0
|
||||
|
||||
self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
|
||||
self.raw_inputs = np.zeros((self.sample_size, self.N)).astype(np.int32)
|
||||
self.masked_distances = np.zeros((self.sample_size, self.sample_size))
|
||||
self.distances = np.zeros((self.sample_size, self.sample_size))
|
||||
self.xor_square = np.zeros((self.sample_size, self.sample_size))
|
||||
self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.mask = np.zeros((self.sample_size))
|
||||
self.numerators = np.zeros((self.sample_size))
|
||||
self.denominators = np.zeros((self.sample_size))
|
||||
self.coherences = np.zeros((self.sample_size))
|
||||
self.max_coherences = np.zeros((self.actual_N + 1))
|
||||
self.max_candidates = [None for _ in range(0, self.actual_N)]
|
||||
self.uplifts = np.zeros((self.actual_N))
|
||||
self.uplift_means = np.zeros((self.actual_N))
|
||||
self.uplift_medians = np.zeros((self.actual_N))
|
||||
self.uplift_convergences = np.zeros((self.actual_N))
|
||||
# self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.superspace_uplift_samples = []
|
||||
self.subspace_uplifts = np.zeros((self.actual_N))
|
||||
self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)]
|
||||
self.uplift_stddevs = np.zeros((self.actual_N))
|
||||
|
||||
self.samples = 1000
|
||||
# self.samples = 200
|
||||
self.base_coherence_samples = np.zeros((self.samples))
|
||||
self.coherence_samples = np.zeros((self.actual_N, self.samples))
|
||||
self.subspace_uplift_left_samples = np.zeros((self.actual_N, self.samples))
|
||||
self.subspace_uplift_right_samples = np.zeros((self.actual_N, self.samples))
|
||||
|
||||
self.layers = []
|
||||
self.layer_confidence = {}
|
||||
self.base = None
|
||||
|
||||
self.scratch = np.zeros((self.N,))
|
||||
|
||||
self.last_value = -1
|
||||
self.rounds = 0
|
||||
self.average_delta_over_null = 0
|
||||
self.visited = set()
|
||||
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
self.has_added_layer = False
|
||||
|
||||
def randomize_inputs(self):
|
||||
for i in range(0, self.sample_size):
|
||||
for j in range(0, self.N):
|
||||
val = random.randint(0, 1)
|
||||
self.raw_inputs[i][j] = val
|
||||
self.inputs[i][j * 2] = val
|
||||
self.inputs[i][j * 2 + 1] = val ^ 1
|
||||
|
||||
def populate_distances(self):
|
||||
for i in range(0, len(self.raw_inputs)):
|
||||
x_a = self.raw_inputs[i]
|
||||
for j in range(0, len(self.raw_inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = self.raw_inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, self.scratch)
|
||||
self.distances[i][j] = 1.0 / (2 ** (distance - 1)) if distance > 0 else 0
|
||||
# self.distances[i][j] = 1.0 / (distance ** 2) if distance > 0 else 0
|
||||
|
||||
def compute_expected_outputs(self):
|
||||
for i in range(0, len(self.raw_inputs)):
|
||||
self.expected_outputs[i] = xor(self.raw_inputs[i])
|
||||
|
||||
def compute_base_outputs(self):
|
||||
if self.base is None:
|
||||
self.base_outputs.fill(0)
|
||||
return
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.base_outputs[i] = self.base(self.inputs[i])
|
||||
|
||||
def mat_coherence(self):
|
||||
np.abs(self.output_xor, self.mask)
|
||||
np.subtract(self.output_xor, self.mask, self.mask)
|
||||
np.divide(self.mask, 2.0, self.mask)
|
||||
np.add(1.0, self.mask, self.mask)
|
||||
self.xor_square.fill(0)
|
||||
np.copyto(self.masked_distances, self.distances)
|
||||
masked_distances_t = self.masked_distances.transpose()
|
||||
for i in range(0, len(self.xor_square)):
|
||||
self.xor_square[i] = self.output_xor
|
||||
np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i])
|
||||
np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i])
|
||||
np.sum(self.masked_distances, axis=0, out=self.denominators)
|
||||
self.xor_square = self.xor_square.transpose()
|
||||
np.logical_xor(self.xor_square, self.output_xor, self.xor_square)
|
||||
np.multiply(self.xor_square, self.masked_distances, self.xor_square)
|
||||
np.sum(self.xor_square, axis=0, out=self.numerators)
|
||||
np.divide(self.numerators, self.denominators, self.coherences)
|
||||
mean = np.nanmean(self.coherences)
|
||||
if isnan(mean):
|
||||
mean = 1.0
|
||||
return 1.0 - mean
|
||||
|
||||
def coherence(self, outputs=None):
|
||||
if outputs is None:
|
||||
outputs = self.outputs
|
||||
np.logical_xor(outputs, self.expected_outputs, self.output_xor)
|
||||
return self.mat_coherence()
|
||||
coherences = []
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(self.output_xor)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = self.output_xor[j]
|
||||
weight = self.distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
|
||||
raw_coherence = sum(coherences) / len(coherences)
|
||||
check_coherence = self.mat_coherence()
|
||||
|
||||
return raw_coherence
|
||||
|
||||
def div_coherence(self):
|
||||
coherences = []
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
if y_a < 0:
|
||||
continue
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(self.output_xor)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = self.output_xor[j]
|
||||
if y_b < 0:
|
||||
continue
|
||||
weight = self.distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
# if y_a < 0 or y_b < 0:
|
||||
# numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
if len(coherences) == 0:
|
||||
return 1.0
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def normalize_p(self):
|
||||
check = self.knowns[:]
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] < 0:
|
||||
self.p[i] = 0
|
||||
for i in range(0, len(self.p)):
|
||||
if i in self.knowns:
|
||||
flip = i ^ 0b1
|
||||
self.p[i] = 0.0
|
||||
self.p[flip] = 0.0
|
||||
else:
|
||||
check.append(i)
|
||||
stop_id = index_hash(check)
|
||||
check.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p[i] = 0.0
|
||||
total = np.sum(self.p)
|
||||
if total > 0:
|
||||
for i in range(0, len(self.p)):
|
||||
self.p[i] = self.p[i] / total
|
||||
|
||||
def reset_p(self):
|
||||
self.p.fill(1.0)
|
||||
self.normalize_p()
|
||||
|
||||
def threshold(self):
|
||||
# return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
|
||||
return 1.0 - (self.epoch / 1000)
|
||||
|
||||
def get_converged_index(self):
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] > self.threshold():
|
||||
return i
|
||||
return None
|
||||
|
||||
def add_layer(self):
|
||||
self.has_added_layer = True
|
||||
self.add_stop()
|
||||
layer = Candidate(self.knowns)
|
||||
self.layers.append(layer)
|
||||
self.base = self.cache_layers()
|
||||
self.knowns.pop()
|
||||
self.reset_p()
|
||||
|
||||
def random_sample(self):
|
||||
self.randomize_inputs()
|
||||
self.populate_distances()
|
||||
self.compute_expected_outputs()
|
||||
self.compute_base_outputs()
|
||||
return self.coherence(self.base_outputs)
|
||||
|
||||
def random_candidate(self):
|
||||
indices = self.knowns[:]
|
||||
np.copyto(self.p_temp, self.p)
|
||||
self.p_temp[self.actual_N] = 0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
for _ in range(0, self.num_terms - len(self.knowns)):
|
||||
index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
|
||||
indices.append(index)
|
||||
flip = index ^ 0b1
|
||||
self.p_temp[index] = 0
|
||||
self.p_temp[flip] = 0
|
||||
for i in range(0, len(self.p_temp)):
|
||||
if i not in indices:
|
||||
indices.append(i)
|
||||
stop_id = index_hash(indices)
|
||||
indices.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p_temp[i] = 0.0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
return Candidate(indices)
|
||||
|
||||
def seed_candidate_pool(self):
|
||||
for _ in range(0, self.num_candidates):
|
||||
candidate = self.random_candidate()
|
||||
if candidate is None:
|
||||
continue
|
||||
candidate_id = candidate.id()
|
||||
if candidate_id in self.candidate_ids:
|
||||
continue
|
||||
self.candidate_pool.append(candidate)
|
||||
self.candidate_ids.add(candidate_id)
|
||||
|
||||
def add_stop(self):
|
||||
stop_id = index_hash(self.knowns)
|
||||
self.stops.add(stop_id)
|
||||
|
||||
def get_distribution(self, candidate, half = 1):
|
||||
count = 0
|
||||
for i in range(0, len(self.inputs)):
|
||||
value = candidate.evaluate(self.inputs[i])
|
||||
if value == half:
|
||||
self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i]
|
||||
count += 1
|
||||
else:
|
||||
self.output_xor[i] = -1
|
||||
return (count, self.mat_coherence())
|
||||
|
||||
def update(self):
|
||||
sample = self.epoch
|
||||
self.epoch += 1
|
||||
|
||||
base_coherence = self.random_sample()
|
||||
self.base_coherence_samples[sample] = base_coherence - 0.5
|
||||
candidate = Candidate(self.knowns[:])
|
||||
|
||||
for i in range(0, self.actual_N):
|
||||
candidate.indices.append(i)
|
||||
try:
|
||||
count_0, subspace_coherence_0 = self.get_distribution(candidate, 0)
|
||||
count_1, subspace_coherence_1 = self.get_distribution(candidate, 1)
|
||||
# delta = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size
|
||||
# delta = subspace_coherence_0 - subspace_coherence_1
|
||||
self.subspace_uplift_left_samples[i][sample] = subspace_coherence_0 - 0.5
|
||||
self.subspace_uplift_right_samples[i][sample] = subspace_coherence_1 - 0.5
|
||||
|
||||
# if index_hash(candidate.indices) in self.stops:
|
||||
# continue
|
||||
|
||||
for j in range(0, len(self.inputs)):
|
||||
self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j])
|
||||
|
||||
coherence = self.coherence()
|
||||
self.coherence_samples[i][sample] = coherence - 0.5
|
||||
finally:
|
||||
candidate.indices.pop()
|
||||
|
||||
if self.epoch >= self.samples:
|
||||
# for i in range(0, self.actual_N):
|
||||
# parameters = stats.norm.fit(self.uplift_samples[i])
|
||||
# print(i, parameters)
|
||||
# print(i, stats.kstest(self.uplift_samples[i], "norm", parameters))
|
||||
|
||||
added = False
|
||||
# parameters = stats.norm.fit(self.base_coherence_samples)
|
||||
# (base_mu, _) = parameters
|
||||
|
||||
try:
|
||||
index = -1
|
||||
lowest_pvalue = -1
|
||||
is_subspace = False
|
||||
for i in range(0, self.actual_N):
|
||||
if i in self.knowns:
|
||||
continue
|
||||
result = stats.kstest(self.base_coherence_samples, self.coherence_samples[i], alternative='greater')
|
||||
print(i, result)
|
||||
# value = result.pvalue * (1 - result.statistic)
|
||||
if index < 0 or result.pvalue < lowest_pvalue:
|
||||
# if index < 0 or value < lowest_pvalue:
|
||||
index = i
|
||||
lowest_pvalue = result.pvalue
|
||||
|
||||
for i in range(0, self.actual_N):
|
||||
if i in self.knowns:
|
||||
continue
|
||||
result = stats.kstest(self.base_coherence_samples, self.subspace_uplift_left_samples[i], alternative='greater')
|
||||
# result = stats.kstest(self.subspace_uplift_left_samples[i], self.subspace_uplift_right_samples[i], alternative='greater')
|
||||
print(i, result)
|
||||
# value = result.pvalue * (1 - result.statistic)
|
||||
if index < 0 or result.pvalue < lowest_pvalue:
|
||||
# if index < 0 or value < lowest_pvalue:
|
||||
index = i
|
||||
lowest_pvalue = result.pvalue
|
||||
is_subspace = True
|
||||
|
||||
# if result.pvalue > 0.95:
|
||||
# index = i
|
||||
# parameters = stats.norm.fit(self.subspace_uplift_samples[i])
|
||||
# (mu, _) = parameters
|
||||
# if mu > base_mu:
|
||||
# if index < 0 or mu > highest_mu:
|
||||
# index = i
|
||||
# highest_mu = mu
|
||||
|
||||
if index >= 0:
|
||||
if is_subspace:
|
||||
# print('subspace')
|
||||
self.knowns.append(index)
|
||||
print(self.knowns, lowest_pvalue)
|
||||
else:
|
||||
# print('flat')
|
||||
self.knowns.append(index)
|
||||
# self.layer_confidence[index_hash(self.knowns)] = confidence
|
||||
# num_terms = len(self.knowns)
|
||||
print(self.knowns, lowest_pvalue)
|
||||
print(base_coherence)
|
||||
self.add_layer()
|
||||
# if num_terms > self.num_terms:
|
||||
# self.stops = set()
|
||||
# self.num_terms = num_terms
|
||||
self.knowns = []
|
||||
return
|
||||
|
||||
# if len(self.knowns) > 0:
|
||||
# # self.add_stop()
|
||||
# self.knowns = []
|
||||
finally:
|
||||
fig, axs = plt.subplots(4, 4)
|
||||
for i in range(0, 4):
|
||||
for j in range(0, 4):
|
||||
axs[i][j].hist(self.base_coherence_samples, 50, density=True, facecolor='r', alpha=0.5)
|
||||
n, bins, patches = axs[i][j].hist(self.coherence_samples[i * 4 + j], 50, density=True, facecolor='g', alpha=0.5)
|
||||
n, bins, patches = axs[i][j].hist(self.subspace_uplift_left_samples[i * 4 + j], 50, density=True, facecolor='b', alpha=0.5)
|
||||
# n, bins, patches = axs[i][j].hist(self.subspace_uplift_right_samples[i * 4 + j], 50, density=True, facecolor='b', alpha=0.5)
|
||||
plt.show()
|
||||
self.epoch = 0
|
||||
|
||||
return
|
||||
|
||||
# print('=====' + str(base_coherence))
|
||||
# print(self.uplifts)
|
||||
# print(self.uplift_means)
|
||||
# print(self.uplift_medians)
|
||||
# print(self.uplift_stddevs)
|
||||
# print(self.uplift_ranges)
|
||||
# print(self.uplift_convergences)
|
||||
# print(self.subspace_uplifts)
|
||||
|
||||
if index >= 0:
|
||||
self.knowns.append(index)
|
||||
print(base_coherence)
|
||||
print(self.knowns, self.epoch)
|
||||
# print(self.uplift_medians)
|
||||
# print(self.uplifts)
|
||||
# print(self.subspace_uplifts)
|
||||
self.add_layer()
|
||||
self.uplifts.fill(0)
|
||||
self.subspace_uplifts.fill(0)
|
||||
self.uplift_medians.fill(0)
|
||||
self.uplift_convergences.fill(0)
|
||||
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.epoch = 0
|
||||
return
|
||||
|
||||
if subspace_index >= 0:
|
||||
self.knowns.append(subspace_index)
|
||||
print(self.knowns, self.epoch)
|
||||
# print(self.uplifts)
|
||||
# print(self.subspace_uplifts)
|
||||
self.uplifts.fill(0)
|
||||
self.subspace_uplifts.fill(0)
|
||||
self.uplift_medians.fill(0)
|
||||
self.uplift_convergences.fill(0)
|
||||
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.epoch = 0
|
||||
return
|
||||
|
||||
# print('======')
|
||||
# print(self.epoch, base_coherence)
|
||||
# print('======')
|
||||
|
||||
# if len(self.candidate_pool) == 0:
|
||||
# print(self.p)
|
||||
|
||||
# for i in range(0, min(5, len(self.candidate_pool))):
|
||||
# candidate = self.candidate_pool[i]
|
||||
# print(candidate.id(), candidate.uplift)
|
||||
|
||||
# if self.epoch < 15:
|
||||
# return
|
||||
|
||||
if self.candidate_pool[0].uplift > 0.3:
|
||||
candidate = self.candidate_pool[0]
|
||||
candidate_id = candidate.id()
|
||||
self.candidate_ids.remove(candidate_id)
|
||||
print(candidate_id)
|
||||
self.knowns = candidate.indices
|
||||
self.add_layer()
|
||||
self.knowns = []
|
||||
self.reset_p()
|
||||
self.epoch = 0
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
|
||||
self.epoch = 0
|
||||
self.num_terms += 1
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
return
|
||||
|
||||
# np.copyto(self.next_p, self.p)
|
||||
for _ in range(0, self.num_candidates):
|
||||
candidate = self.random_candidate()
|
||||
if candidate is None:
|
||||
continue
|
||||
candidate_id = candidate.id()
|
||||
if candidate_id in visited:
|
||||
continue
|
||||
visited.add(candidate_id)
|
||||
if self.actual_N in candidate.indices:
|
||||
continue
|
||||
has_candidate = True
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
|
||||
# coherence = self.ring_coherence()
|
||||
coherence = self.coherence()
|
||||
# if coherence <= base_coherence:
|
||||
# continue
|
||||
# for index in candidate.indices:
|
||||
# self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
|
||||
# self.p_temp[index] += 0
|
||||
for index in candidate.indices:
|
||||
if coherence > self.max_coherences[index]:
|
||||
self.max_coherences[index] = coherence
|
||||
self.max_candidates[index] = candidate
|
||||
# self.max_coherences[index] = max(self.max_coherences[index], coherence)
|
||||
# np.copyto(self.p, self.next_p)
|
||||
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
for i in range(0, self.actual_N):
|
||||
candidate = self.max_candidates[i]
|
||||
if candidate is None:
|
||||
continue
|
||||
for index in candidate.indices:
|
||||
self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
|
||||
# print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
|
||||
self.normalize_p()
|
||||
# print(self.p)
|
||||
|
||||
# np.subtract(self.p_temp, self.p, self.p_temp)
|
||||
# np.abs(self.p_temp, self.p_temp)
|
||||
# delta = np.sum(self.p_temp) / len(self.p_temp)
|
||||
# print(delta, np.argmax(self.p))
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
# for i in range(0, len(self.p_temp)):
|
||||
# self.p_temp[i] = round(self.p_temp[i] * 100) / 100
|
||||
# print(self.p_temp)
|
||||
|
||||
index = np.argmax(self.p)
|
||||
delta_over_null = self.p[index] - self.p[self.actual_N]
|
||||
if self.epoch == 0:
|
||||
self.average_delta_over_null = delta_over_null
|
||||
else:
|
||||
self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
|
||||
diff = self.num_terms - len(self.knowns)
|
||||
|
||||
print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
|
||||
|
||||
# Always iterate for a minimum number of epochs
|
||||
if self.epoch < 15:
|
||||
return
|
||||
if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
|
||||
return
|
||||
if self.average_delta_over_null < 0.001:
|
||||
index = self.actual_N
|
||||
else:
|
||||
index = np.argmax(self.p)
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
# if index == self.last_value:
|
||||
# self.rounds += 1
|
||||
# else:
|
||||
# self.rounds = 0
|
||||
# self.last_value = index
|
||||
|
||||
# if self.rounds < 10 and self.epoch < 100:
|
||||
# return
|
||||
|
||||
# if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
|
||||
# return
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
|
||||
# print(self.p)
|
||||
# print(self.threshold())
|
||||
# print(self.p)
|
||||
# index = self.get_converged_index()
|
||||
if not index is None or not has_candidate:
|
||||
# print(index, delta, np.argmax(self.p))
|
||||
self.epoch = 0
|
||||
if index == self.actual_N or not has_candidate:
|
||||
if len(self.knowns) > 0:
|
||||
self.add_stop()
|
||||
self.knowns.pop()
|
||||
print('Backtrack: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
return
|
||||
self.num_terms += 1
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
print(self.num_terms)
|
||||
return
|
||||
self.knowns.append(index)
|
||||
# bisect.insort(self.knowns, index)
|
||||
if len(self.knowns) == self.num_terms:
|
||||
print('Add layer: ' + str(self.knowns))
|
||||
self.add_layer()
|
||||
else:
|
||||
print('Found term: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
print(base_coherence)
|
||||
return
|
||||
|
||||
def cache_layers(self):
|
||||
expr = 'def f(x):\n\tresult=0\n'
|
||||
for layer in self.layers:
|
||||
expr += '\tresult^=' + layer.eval_str() + '\n'
|
||||
expr += '\treturn result\n'
|
||||
scope = {}
|
||||
exec(expr, scope)
|
||||
return scope['f']
|
||||
|
||||
def main():
|
||||
probabilities = Probabilities()
|
||||
# probabilities.knowns = [14]
|
||||
# probabilities.add_layer()
|
||||
# probabilities.knowns = [8]
|
||||
# probabilities.add_layer()
|
||||
# probabilities.knowns = [4]
|
||||
# probabilities.add_layer()
|
||||
while probabilities.num_terms <= probabilities.N:
|
||||
probabilities.update()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
845
mutations18.py
Normal file
845
mutations18.py
Normal file
@ -0,0 +1,845 @@
|
||||
import bisect
|
||||
from cmath import isnan
|
||||
from email.mime import base
|
||||
import matplotlib.pyplot as plt
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
import statistics
|
||||
|
||||
from pkg_resources import get_distribution
|
||||
from scipy import optimize, stats
|
||||
from astropy import modeling
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def xor(v):
|
||||
return np.sum(v[1:]) % 2
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def index_hash(indices):
|
||||
return ','.join([str(index) for index in sorted(indices)])
|
||||
|
||||
def bin_div(a, b):
|
||||
if a == 0 and b == 0:
|
||||
return 2
|
||||
if a == 1 and b == 0:
|
||||
return -1
|
||||
if a == 0 and b == 1:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
class Candidate():
|
||||
def __init__(self, indices):
|
||||
self.indices = indices[:]
|
||||
self.uplift = 0
|
||||
|
||||
def evaluate(self, x):
|
||||
if len(x) in self.indices:
|
||||
return 0
|
||||
value = 1
|
||||
for index in self.indices:
|
||||
value *= x[index]
|
||||
return value
|
||||
|
||||
def id(self):
|
||||
return index_hash(self.indices)
|
||||
|
||||
def eval_str(self):
|
||||
parts = []
|
||||
for index in self.indices:
|
||||
parts.append('x[' + str(index) + ']')
|
||||
return '*'.join(parts)
|
||||
|
||||
class Probabilities():
|
||||
def __init__(self):
|
||||
self.N = 16
|
||||
self.actual_N = self.N * 2
|
||||
self.num_terms = 1
|
||||
self.num_candidates = 100
|
||||
# self.sample_size = self.N ** 2
|
||||
self.sample_size = 64
|
||||
self.p = np.zeros((self.actual_N + 1,))
|
||||
self.p_temp = np.empty_like(self.p)
|
||||
self.next_p = np.empty_like(self.p)
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
self.epoch = 0
|
||||
|
||||
self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
|
||||
self.raw_inputs = np.zeros((self.sample_size, self.N)).astype(np.int32)
|
||||
self.masked_distances = np.zeros((self.sample_size, self.sample_size))
|
||||
self.distances = np.zeros((self.sample_size, self.sample_size))
|
||||
self.xor_square = np.zeros((self.sample_size, self.sample_size))
|
||||
self.nn = np.zeros((self.sample_size, self.sample_size)).astype(np.int32)
|
||||
self.nn_distances = np.zeros((self.sample_size, 2)).astype(np.int32)
|
||||
self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
|
||||
self.mask = np.zeros((self.sample_size))
|
||||
self.numerators = np.zeros((self.sample_size))
|
||||
self.denominators = np.zeros((self.sample_size))
|
||||
self.coherences = np.zeros((self.sample_size))
|
||||
self.max_coherences = np.zeros((self.actual_N + 1))
|
||||
self.max_candidates = [None for _ in range(0, self.actual_N)]
|
||||
self.uplifts = np.zeros((self.actual_N))
|
||||
self.uplift_means = np.zeros((self.actual_N))
|
||||
self.uplift_medians = np.zeros((self.actual_N))
|
||||
self.uplift_convergences = np.zeros((self.actual_N))
|
||||
# self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.superspace_uplift_samples = []
|
||||
self.subspace_uplifts = np.zeros((self.actual_N))
|
||||
self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)]
|
||||
self.uplift_stddevs = np.zeros((self.actual_N))
|
||||
|
||||
self.last_index = -1
|
||||
self.last_pvalue = -1
|
||||
self.left_half = True
|
||||
|
||||
self.samples = 10
|
||||
self.num_bins = 1000
|
||||
# self.samples = 200
|
||||
self.base_coherence_samples = np.zeros((self.samples))
|
||||
self.coherence_samples = np.zeros((self.actual_N, self.samples))
|
||||
self.subspace_uplift_samples = np.zeros((self.actual_N, self.samples))
|
||||
self.subspace_uplift_weights = np.zeros((self.actual_N, self.samples))
|
||||
|
||||
self.layers = []
|
||||
self.layer_confidence = {}
|
||||
self.base = None
|
||||
|
||||
self.scratch = np.zeros((self.N,))
|
||||
|
||||
self.last_value = -1
|
||||
self.rounds = 0
|
||||
self.average_delta_over_null = 0
|
||||
self.visited = set()
|
||||
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
self.has_added_layer = False
|
||||
|
||||
def randomize_inputs(self):
|
||||
for i in range(0, self.sample_size):
|
||||
for j in range(0, self.N):
|
||||
val = random.randint(0, 1)
|
||||
self.raw_inputs[i][j] = val
|
||||
self.inputs[i][j * 2] = val
|
||||
self.inputs[i][j * 2 + 1] = val ^ 1
|
||||
|
||||
def populate_distances(self):
|
||||
self.nn.fill(-1)
|
||||
self.nn_distances.fill(-1)
|
||||
for i in range(0, len(self.raw_inputs)):
|
||||
x_a = self.raw_inputs[i]
|
||||
for j in range(0, len(self.raw_inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = self.raw_inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, self.scratch)
|
||||
if (self.nn_distances[i][0] < 0 or distance < self.nn_distances[i][0]) and distance > 0:
|
||||
self.nn_distances[i][0] = distance
|
||||
self.nn_distances[i][1] = 1
|
||||
self.nn[i][0] = j
|
||||
elif distance == self.nn_distances[i][0]:
|
||||
count = self.nn_distances[i][1]
|
||||
self.nn_distances[i][1] = count + 1
|
||||
self.nn[i][count] = j
|
||||
# self.distances[i][j] = 1.0 / (2 ** (distance - 1)) if distance > 0 else 0
|
||||
self.distances[i][j] = 1.0 / (distance ** 12) if distance > 0 else 0
|
||||
|
||||
def compute_expected_outputs(self):
|
||||
for i in range(0, len(self.raw_inputs)):
|
||||
self.expected_outputs[i] = xor(self.raw_inputs[i])
|
||||
|
||||
def compute_base_outputs(self):
|
||||
if self.base is None:
|
||||
self.base_outputs.fill(0)
|
||||
return
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.base_outputs[i] = self.base(self.inputs[i])
|
||||
|
||||
def mat_coherence(self):
|
||||
np.abs(self.output_xor, self.mask)
|
||||
np.subtract(self.output_xor, self.mask, self.mask)
|
||||
np.divide(self.mask, 2.0, self.mask)
|
||||
np.add(1.0, self.mask, self.mask)
|
||||
self.xor_square.fill(0)
|
||||
np.copyto(self.masked_distances, self.distances)
|
||||
masked_distances_t = self.masked_distances.transpose()
|
||||
for i in range(0, len(self.xor_square)):
|
||||
self.xor_square[i] = self.output_xor
|
||||
np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i])
|
||||
np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i])
|
||||
np.sum(self.masked_distances, axis=0, out=self.denominators)
|
||||
self.xor_square = self.xor_square.transpose()
|
||||
np.logical_xor(self.xor_square, self.output_xor, self.xor_square)
|
||||
np.multiply(self.xor_square, self.masked_distances, self.xor_square)
|
||||
np.sum(self.xor_square, axis=0, out=self.numerators)
|
||||
np.divide(self.numerators, self.denominators, self.coherences)
|
||||
mean = np.nanmean(self.coherences)
|
||||
if isnan(mean):
|
||||
mean = 1.0
|
||||
return 1.0 - mean
|
||||
|
||||
def nn_coherence(self):
|
||||
for i in range(0, len(self.output_xor)):
|
||||
total = 0
|
||||
y_a = self.output_xor[i]
|
||||
[distance, count] = self.nn_distances[i]
|
||||
for index in range(0, count):
|
||||
j = self.nn[i][index]
|
||||
y_b = self.output_xor[j]
|
||||
total += 1 if y_a == 1 and y_b == 1 or y_a == 0 and y_b == 0 else 0
|
||||
self.coherences[i] = total / count
|
||||
return np.mean(self.coherences)
|
||||
|
||||
def coherence(self, outputs=None):
|
||||
if outputs is None:
|
||||
outputs = self.outputs
|
||||
np.logical_xor(outputs, self.expected_outputs, self.output_xor)
|
||||
return self.nn_coherence()
|
||||
# return self.mat_coherence()
|
||||
coherences = []
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(self.output_xor)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = self.output_xor[j]
|
||||
weight = self.distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
|
||||
raw_coherence = sum(coherences) / len(coherences)
|
||||
check_coherence = self.mat_coherence()
|
||||
|
||||
return raw_coherence
|
||||
|
||||
def div_coherence(self):
|
||||
coherences = []
|
||||
for i in range(0, len(self.output_xor)):
|
||||
y_a = self.output_xor[i]
|
||||
if y_a < 0:
|
||||
continue
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(self.output_xor)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = self.output_xor[j]
|
||||
if y_b < 0:
|
||||
continue
|
||||
weight = self.distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
# if y_a < 0 or y_b < 0:
|
||||
# numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
if len(coherences) == 0:
|
||||
return 1.0
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def normalize_p(self):
|
||||
check = self.knowns[:]
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] < 0:
|
||||
self.p[i] = 0
|
||||
for i in range(0, len(self.p)):
|
||||
if i in self.knowns:
|
||||
flip = i ^ 0b1
|
||||
self.p[i] = 0.0
|
||||
self.p[flip] = 0.0
|
||||
else:
|
||||
check.append(i)
|
||||
stop_id = index_hash(check)
|
||||
check.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p[i] = 0.0
|
||||
total = np.sum(self.p)
|
||||
if total > 0:
|
||||
for i in range(0, len(self.p)):
|
||||
self.p[i] = self.p[i] / total
|
||||
|
||||
def reset_p(self):
|
||||
self.p.fill(1.0)
|
||||
self.normalize_p()
|
||||
|
||||
def threshold(self):
|
||||
# return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
|
||||
return 1.0 - (self.epoch / 1000)
|
||||
|
||||
def get_converged_index(self):
|
||||
for i in range(0, len(self.p)):
|
||||
if self.p[i] > self.threshold():
|
||||
return i
|
||||
return None
|
||||
|
||||
def add_layer(self):
|
||||
self.has_added_layer = True
|
||||
self.add_stop()
|
||||
layer = Candidate(self.knowns)
|
||||
self.layers.append(layer)
|
||||
self.base = self.cache_layers()
|
||||
self.knowns.pop()
|
||||
self.reset_p()
|
||||
|
||||
def random_sample(self):
|
||||
self.randomize_inputs()
|
||||
self.populate_distances()
|
||||
self.compute_expected_outputs()
|
||||
self.compute_base_outputs()
|
||||
return self.coherence(self.base_outputs)
|
||||
|
||||
def random_candidate(self):
|
||||
indices = self.knowns[:]
|
||||
np.copyto(self.p_temp, self.p)
|
||||
self.p_temp[self.actual_N] = 0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
for _ in range(0, self.num_terms - len(self.knowns)):
|
||||
index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
|
||||
indices.append(index)
|
||||
flip = index ^ 0b1
|
||||
self.p_temp[index] = 0
|
||||
self.p_temp[flip] = 0
|
||||
for i in range(0, len(self.p_temp)):
|
||||
if i not in indices:
|
||||
indices.append(i)
|
||||
stop_id = index_hash(indices)
|
||||
indices.pop()
|
||||
if stop_id in self.stops:
|
||||
self.p_temp[i] = 0.0
|
||||
total = np.sum(self.p_temp)
|
||||
if total == 0:
|
||||
return None
|
||||
np.divide(self.p_temp, total, self.p_temp)
|
||||
return Candidate(indices)
|
||||
|
||||
def seed_candidate_pool(self):
|
||||
for _ in range(0, self.num_candidates):
|
||||
candidate = self.random_candidate()
|
||||
if candidate is None:
|
||||
continue
|
||||
candidate_id = candidate.id()
|
||||
if candidate_id in self.candidate_ids:
|
||||
continue
|
||||
self.candidate_pool.append(candidate)
|
||||
self.candidate_ids.add(candidate_id)
|
||||
|
||||
def add_stop(self):
|
||||
stop_id = index_hash(self.knowns)
|
||||
self.stops.add(stop_id)
|
||||
|
||||
def get_distribution(self, candidate, half = 1):
|
||||
count = 0
|
||||
for i in range(0, len(self.inputs)):
|
||||
value = candidate.evaluate(self.inputs[i])
|
||||
if value == half:
|
||||
self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i]
|
||||
count += 1
|
||||
else:
|
||||
self.output_xor[i] = -1
|
||||
# return (count, self.mat_coherence())
|
||||
return (count, self.nn_coherence())
|
||||
|
||||
def err(self, fitted_model, bins, hist):
|
||||
err = 0
|
||||
for i in range(0, self.num_bins):
|
||||
x = bins[i + 1]
|
||||
y = hist[i]
|
||||
delta = fitted_model(x) - y
|
||||
err += delta * delta
|
||||
return err / self.num_bins
|
||||
|
||||
def update(self):
|
||||
sample = self.epoch
|
||||
self.epoch += 1
|
||||
|
||||
base_coherence = self.random_sample()
|
||||
self.base_coherence_samples[sample] = base_coherence
|
||||
candidate = Candidate(self.knowns[:])
|
||||
|
||||
for i in range(0, self.actual_N):
|
||||
candidate.indices.append(i)
|
||||
try:
|
||||
count_0, subspace_coherence_0 = self.get_distribution(candidate, 0)
|
||||
# count_1, subspace_coherence_1 = self.get_distribution(candidate, 1)
|
||||
# delta = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size
|
||||
# delta = subspace_coherence_0 - subspace_coherence_1
|
||||
self.subspace_uplift_samples[i][sample] = subspace_coherence_0 - base_coherence
|
||||
self.subspace_uplift_weights[i][sample] = count_0 / self.sample_size
|
||||
# self.subspace_uplift_left_samples[i][sample] = subspace_coherence_0
|
||||
# self.subspace_uplift_right_samples[i][sample] = subspace_coherence_1 - base_coherence
|
||||
|
||||
# if index_hash(candidate.indices) in self.stops:
|
||||
# continue
|
||||
|
||||
for j in range(0, len(self.inputs)):
|
||||
self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j])
|
||||
|
||||
coherence = self.coherence()
|
||||
self.coherence_samples[i][sample] = coherence - base_coherence
|
||||
# self.coherence_samples[i][sample] = coherence
|
||||
finally:
|
||||
candidate.indices.pop()
|
||||
|
||||
if self.epoch >= self.samples:
|
||||
# for i in range(0, self.actual_N):
|
||||
# parameters = stats.norm.fit(self.uplift_samples[i])
|
||||
# print(i, parameters)
|
||||
# print(i, stats.kstest(self.uplift_samples[i], "norm", parameters))
|
||||
|
||||
added = False
|
||||
# parameters = stats.norm.fit(self.base_coherence_samples)
|
||||
# (base_mu, _) = parameters
|
||||
|
||||
# (hist, bins) = np.histogram(self.base_coherence_samples, self.num_bins, density=True)
|
||||
# fitter = modeling.fitting.LevMarLSQFitter()
|
||||
# model = modeling.models.Gaussian1D()
|
||||
# fitted_model = fitter(model, bins[1:], hist)
|
||||
# print('Base', fitted_model.mean.value, self.err(fitted_model, bins, hist))
|
||||
|
||||
# x = np.linspace(0, 1.0, 10000)
|
||||
# density = stats.gaussian_kde(self.base_coherence_samples)(x)
|
||||
# mode = x[np.argsort(density)[-1]]
|
||||
# print(mode)
|
||||
|
||||
# for i in range(0, self.actual_N):
|
||||
# count = 0
|
||||
# for j in range(0, self.samples):
|
||||
# for k in range(0, self.samples):
|
||||
# if self.coherence_samples[i][j] > self.base_coherence_samples[k]:
|
||||
# count += 1
|
||||
# print(i, count)
|
||||
|
||||
try:
|
||||
index = -1
|
||||
lowest_index = -1
|
||||
lowest_pvalue = -1
|
||||
highest_index = -1
|
||||
highest_pvalue = -1
|
||||
best_pvalue = -1
|
||||
pvalue_sum = 0
|
||||
pvalue_denom = 0
|
||||
is_subspace = False
|
||||
|
||||
for i in range(0, self.actual_N):
|
||||
if i in self.knowns:
|
||||
continue
|
||||
try:
|
||||
result = stats.ttest_1samp(self.coherence_samples[i], 0, alternative='greater')
|
||||
print(i, result)
|
||||
# (hist, bins) = np.histogram(self.coherence_samples[i], 20, range=(-0.01, 0.01))
|
||||
# total = 0
|
||||
# for j in range(0, 20):
|
||||
# total += hist[j] * (bins[j] + bins[j + 1]) / 2
|
||||
# mode = total / sum(hist)
|
||||
|
||||
# fitter = modeling.fitting.LevMarLSQFitter()
|
||||
# model = modeling.models.Gaussian1D()
|
||||
# fitted_model = fitter(model, bins[1:], hist)
|
||||
# mode = fitted_model.mean.value
|
||||
# print(i, total)
|
||||
|
||||
# result = stats.kstest(self.base_coherence_samples, self.coherence_samples[i], alternative='greater')
|
||||
# print(i, result)
|
||||
# value = result.pvalue * (1 - result.statistic)
|
||||
# parameters = stats.norm.fit(self.coherence_samples[i])
|
||||
# (mu, _) = parameters
|
||||
# density = stats.gaussian_kde(self.coherence_samples[i])(x)
|
||||
# mode = x[np.argsort(density)[-1]]
|
||||
# print(i, mode)
|
||||
# print(i, mu)
|
||||
if not isnan(result.pvalue):
|
||||
if i == self.last_index:
|
||||
delta = abs(result.pvalue - self.last_pvalue)
|
||||
if delta < 0.1:
|
||||
print('Low delta!')
|
||||
print(self.last_index, delta)
|
||||
# self.last_index = -1
|
||||
self.left_half = not self.left_half
|
||||
# self.layers.pop()
|
||||
# self.base = self.cache_layers()
|
||||
# return
|
||||
|
||||
pvalue_sum += result.pvalue
|
||||
pvalue_denom += 1
|
||||
if lowest_index < 0 or result.pvalue < lowest_pvalue:
|
||||
lowest_index = i
|
||||
lowest_pvalue = result.pvalue
|
||||
if highest_index < 0 or result.pvalue > highest_pvalue:
|
||||
highest_index = i
|
||||
highest_pvalue = result.pvalue
|
||||
except Exception as e:
|
||||
print(e)
|
||||
pass
|
||||
average_pvalue = pvalue_sum / pvalue_denom
|
||||
print(average_pvalue)
|
||||
index = highest_index if self.left_half else lowest_index
|
||||
best_pvalue = highest_pvalue if self.left_half else lowest_pvalue
|
||||
|
||||
self.last_index = index
|
||||
self.last_pvalue = best_pvalue
|
||||
# if average_pvalue < 0.5:
|
||||
# index = lowest_index
|
||||
# best_pvalue = lowest_pvalue
|
||||
# else:
|
||||
# index = highest_index
|
||||
# best_pvalue = highest_pvalue
|
||||
# print(e)
|
||||
|
||||
# for i in range(0, self.actual_N):
|
||||
# if i in self.knowns:
|
||||
# continue
|
||||
# # result = stats.kstest(self.base_coherence_samples, self.subspace_uplift_left_samples[i], alternative='greater')
|
||||
# # # result = stats.kstest(self.subspace_uplift_left_samples[i], self.subspace_uplift_right_samples[i], alternative='greater')
|
||||
# # print(i, result)
|
||||
# # value = result.pvalue * (1 - result.statistic)
|
||||
# # parameters = stats.norm.fit(self.subspace_uplift_left_samples[i])
|
||||
# # (mu, _) = parameters
|
||||
# try:
|
||||
# result = stats.ttest_1samp(self.subspace_uplift_samples[i], 0, alternative='greater')
|
||||
# print(i, result)
|
||||
# # (hist, bins) = np.histogram(self.subspace_uplift_samples[i], 20, range=(-0.01, 0.01))
|
||||
# # bin_index = np.argsort(hist)[-1]
|
||||
# # mode = (bins[bin_index] + bins[bin_index + 1]) / 2
|
||||
# # fitter = modeling.fitting.LevMarLSQFitter()
|
||||
# # model = modeling.models.Gaussian1D()
|
||||
# # fitted_model = fitter(model, bins[1:], hist)
|
||||
# # mode = fitted_model.mean.value
|
||||
# # print(i, mode)
|
||||
# # density = stats.gaussian_kde(self.subspace_uplift_samples[i], weights=self.subspace_uplift_weights[i])(x)
|
||||
# # density = stats.gaussian_kde(self.subspace_uplift_samples[i])(x)
|
||||
# # mode = x[np.argsort(density)[-1]]
|
||||
# # print(i, mode)
|
||||
# # print(i, mu)
|
||||
# if (index < 0 or result.pvalue < lowest_pvalue) and not isnan(result.pvalue):
|
||||
# # if index < 0 or value < lowest_pvalue:
|
||||
# index = i
|
||||
# lowest_pvalue = result.pvalue
|
||||
# is_subspace = True
|
||||
|
||||
# # if result.pvalue > 0.95:
|
||||
# # index = i
|
||||
# # parameters = stats.norm.fit(self.subspace_uplift_samples[i])
|
||||
# # (mu, _) = parameters
|
||||
# # if mu > base_mu:
|
||||
# # if index < 0 or mu > highest_mu:
|
||||
# # index = i
|
||||
# # highest_mu = mu
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
# pass
|
||||
# # print(e)
|
||||
|
||||
if index >= 0:
|
||||
if is_subspace:
|
||||
# print('subspace')
|
||||
self.knowns.append(index)
|
||||
print(self.knowns, best_pvalue)
|
||||
else:
|
||||
# print('flat')
|
||||
self.knowns.append(index)
|
||||
# self.layer_confidence[index_hash(self.knowns)] = confidence
|
||||
# num_terms = len(self.knowns)
|
||||
print(self.knowns, best_pvalue)
|
||||
print(base_coherence)
|
||||
self.add_layer()
|
||||
# if num_terms > self.num_terms:
|
||||
# self.stops = set()
|
||||
# self.num_terms = num_terms
|
||||
self.knowns = []
|
||||
return
|
||||
else:
|
||||
self.knowns = []
|
||||
# else:
|
||||
# self.knowns = []
|
||||
|
||||
# if len(self.knowns) > 0:
|
||||
# # self.add_stop()
|
||||
# self.knowns = []
|
||||
finally:
|
||||
# fig, axs = plt.subplots(int(self.actual_N / 4), 4)
|
||||
# x_eval = np.linspace(-1.0, 1.0, num=1000)
|
||||
# for i in range(0, int(self.actual_N / 4)):
|
||||
# for j in range(0, 4):
|
||||
# # (hist, bins) = np.histogram(self.base_coherence_samples, self.num_bins, density=True)
|
||||
# # fitter = modeling.fitting.LevMarLSQFitter()
|
||||
# # model = modeling.models.Gaussian1D()
|
||||
# # fitted_model = fitter(model, bins[1:], hist)
|
||||
# # axs[i][j].scatter(bins[1:], hist, s=1, color='r', alpha=0.5)
|
||||
# # axs[i][j].plot(x_eval, fitted_model(x_eval), color='r')
|
||||
|
||||
# (hist, bins) = np.histogram(self.coherence_samples[i * 4 + j], self.num_bins, density=True)
|
||||
# # fitter = modeling.fitting.LevMarLSQFitter()
|
||||
# # model = modeling.models.Gaussian1D()
|
||||
# # fitted_model = fitter(model, bins[1:], hist)
|
||||
# axs[i][j].scatter(bins[1:], hist, s=1, color='g', alpha=0.5)
|
||||
# # axs[i][j].plot(x_eval, fitted_model(x_eval), color='g')
|
||||
|
||||
# (hist, bins) = np.histogram(self.subspace_uplift_samples[i * 4 + j], self.num_bins, density=True)
|
||||
# # fitter = modeling.fitting.LevMarLSQFitter()
|
||||
# # model = modeling.models.Gaussian1D()
|
||||
# # fitted_model = fitter(model, bins[1:], hist)
|
||||
# axs[i][j].scatter(bins[1:], hist, s=1, color='b', alpha=0.5)
|
||||
# # axs[i][j].plot(x_eval, fitted_model(x_eval), color='b')
|
||||
|
||||
# # kde0 = stats.gaussian_kde(self.base_coherence_samples)
|
||||
# kde1 = stats.gaussian_kde(self.coherence_samples[i * 4 + j])
|
||||
# # kde2 = stats.gaussian_kde(self.subspace_uplift_samples[i * 4 + j], weights=self.subspace_uplift_weights[i])
|
||||
# kde2 = stats.gaussian_kde(self.subspace_uplift_samples[i * 4 + j])
|
||||
# # axs[i][j].plot(x_eval, kde0(x_eval), color='r')
|
||||
# axs[i][j].plot(x_eval, kde1(x_eval), color='g')
|
||||
# axs[i][j].plot(x_eval, kde2(x_eval), color='b')
|
||||
# # n, bins, patches = axs[i][j].hist(self.base_coherence_samples, 50, density=True, facecolor='r', alpha=0.5)
|
||||
# # n, bins, patches = axs[i][j].hist(self.coherence_samples[i * 4 + j], 50, density=True, facecolor='g', alpha=0.5)
|
||||
# # n, bins, patches = axs[i][j].hist(self.subspace_uplift_samples[i * 4 + j], 50, density=True, facecolor='b', alpha=0.5)
|
||||
# plt.show()
|
||||
self.epoch = 0
|
||||
|
||||
return
|
||||
|
||||
# print('=====' + str(base_coherence))
|
||||
# print(self.uplifts)
|
||||
# print(self.uplift_means)
|
||||
# print(self.uplift_medians)
|
||||
# print(self.uplift_stddevs)
|
||||
# print(self.uplift_ranges)
|
||||
# print(self.uplift_convergences)
|
||||
# print(self.subspace_uplifts)
|
||||
|
||||
if index >= 0:
|
||||
self.knowns.append(index)
|
||||
print(base_coherence)
|
||||
print(self.knowns, self.epoch)
|
||||
# print(self.uplift_medians)
|
||||
# print(self.uplifts)
|
||||
# print(self.subspace_uplifts)
|
||||
self.add_layer()
|
||||
self.uplifts.fill(0)
|
||||
self.subspace_uplifts.fill(0)
|
||||
self.uplift_medians.fill(0)
|
||||
self.uplift_convergences.fill(0)
|
||||
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.epoch = 0
|
||||
return
|
||||
|
||||
if subspace_index >= 0:
|
||||
self.knowns.append(subspace_index)
|
||||
print(self.knowns, self.epoch)
|
||||
# print(self.uplifts)
|
||||
# print(self.subspace_uplifts)
|
||||
self.uplifts.fill(0)
|
||||
self.subspace_uplifts.fill(0)
|
||||
self.uplift_medians.fill(0)
|
||||
self.uplift_convergences.fill(0)
|
||||
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
|
||||
self.epoch = 0
|
||||
return
|
||||
|
||||
# print('======')
|
||||
# print(self.epoch, base_coherence)
|
||||
# print('======')
|
||||
|
||||
# if len(self.candidate_pool) == 0:
|
||||
# print(self.p)
|
||||
|
||||
# for i in range(0, min(5, len(self.candidate_pool))):
|
||||
# candidate = self.candidate_pool[i]
|
||||
# print(candidate.id(), candidate.uplift)
|
||||
|
||||
# if self.epoch < 15:
|
||||
# return
|
||||
|
||||
if self.candidate_pool[0].uplift > 0.3:
|
||||
candidate = self.candidate_pool[0]
|
||||
candidate_id = candidate.id()
|
||||
self.candidate_ids.remove(candidate_id)
|
||||
print(candidate_id)
|
||||
self.knowns = candidate.indices
|
||||
self.add_layer()
|
||||
self.knowns = []
|
||||
self.reset_p()
|
||||
self.epoch = 0
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
|
||||
self.epoch = 0
|
||||
self.num_terms += 1
|
||||
self.candidate_pool = []
|
||||
self.candidate_ids = set()
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
return
|
||||
|
||||
# np.copyto(self.next_p, self.p)
|
||||
for _ in range(0, self.num_candidates):
|
||||
candidate = self.random_candidate()
|
||||
if candidate is None:
|
||||
continue
|
||||
candidate_id = candidate.id()
|
||||
if candidate_id in visited:
|
||||
continue
|
||||
visited.add(candidate_id)
|
||||
if self.actual_N in candidate.indices:
|
||||
continue
|
||||
has_candidate = True
|
||||
for i in range(0, len(self.inputs)):
|
||||
self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
|
||||
# coherence = self.ring_coherence()
|
||||
coherence = self.coherence()
|
||||
# if coherence <= base_coherence:
|
||||
# continue
|
||||
# for index in candidate.indices:
|
||||
# self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
|
||||
# self.p_temp[index] += 0
|
||||
for index in candidate.indices:
|
||||
if coherence > self.max_coherences[index]:
|
||||
self.max_coherences[index] = coherence
|
||||
self.max_candidates[index] = candidate
|
||||
# self.max_coherences[index] = max(self.max_coherences[index], coherence)
|
||||
# np.copyto(self.p, self.next_p)
|
||||
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
for i in range(0, self.actual_N):
|
||||
candidate = self.max_candidates[i]
|
||||
if candidate is None:
|
||||
continue
|
||||
for index in candidate.indices:
|
||||
self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
|
||||
# print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
|
||||
self.normalize_p()
|
||||
# print(self.p)
|
||||
|
||||
# np.subtract(self.p_temp, self.p, self.p_temp)
|
||||
# np.abs(self.p_temp, self.p_temp)
|
||||
# delta = np.sum(self.p_temp) / len(self.p_temp)
|
||||
# print(delta, np.argmax(self.p))
|
||||
# np.copyto(self.p_temp, self.p)
|
||||
# for i in range(0, len(self.p_temp)):
|
||||
# self.p_temp[i] = round(self.p_temp[i] * 100) / 100
|
||||
# print(self.p_temp)
|
||||
|
||||
index = np.argmax(self.p)
|
||||
delta_over_null = self.p[index] - self.p[self.actual_N]
|
||||
if self.epoch == 0:
|
||||
self.average_delta_over_null = delta_over_null
|
||||
else:
|
||||
self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
|
||||
diff = self.num_terms - len(self.knowns)
|
||||
|
||||
print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
|
||||
|
||||
# Always iterate for a minimum number of epochs
|
||||
if self.epoch < 15:
|
||||
return
|
||||
if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
|
||||
return
|
||||
if self.average_delta_over_null < 0.001:
|
||||
index = self.actual_N
|
||||
else:
|
||||
index = np.argmax(self.p)
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
# if index == self.last_value:
|
||||
# self.rounds += 1
|
||||
# else:
|
||||
# self.rounds = 0
|
||||
# self.last_value = index
|
||||
|
||||
# if self.rounds < 10 and self.epoch < 100:
|
||||
# return
|
||||
|
||||
# if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
|
||||
# return
|
||||
|
||||
# index = np.argmax(self.p)
|
||||
|
||||
# print(self.p)
|
||||
# print(self.threshold())
|
||||
# print(self.p)
|
||||
# index = self.get_converged_index()
|
||||
if not index is None or not has_candidate:
|
||||
# print(index, delta, np.argmax(self.p))
|
||||
self.epoch = 0
|
||||
if index == self.actual_N or not has_candidate:
|
||||
if len(self.knowns) > 0:
|
||||
self.add_stop()
|
||||
self.knowns.pop()
|
||||
print('Backtrack: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
return
|
||||
self.num_terms += 1
|
||||
self.knowns = []
|
||||
self.stops = set()
|
||||
self.reset_p()
|
||||
print(self.num_terms)
|
||||
return
|
||||
self.knowns.append(index)
|
||||
# bisect.insort(self.knowns, index)
|
||||
if len(self.knowns) == self.num_terms:
|
||||
print('Add layer: ' + str(self.knowns))
|
||||
self.add_layer()
|
||||
else:
|
||||
print('Found term: ' + str(self.knowns))
|
||||
self.reset_p()
|
||||
print(base_coherence)
|
||||
return
|
||||
|
||||
def cache_layers(self):
|
||||
expr = 'def f(x):\n\tresult=0\n'
|
||||
for layer in self.layers:
|
||||
expr += '\tresult^=' + layer.eval_str() + '\n'
|
||||
expr += '\treturn result\n'
|
||||
scope = {}
|
||||
exec(expr, scope)
|
||||
return scope['f']
|
||||
|
||||
def main():
|
||||
probabilities = Probabilities()
|
||||
# probabilities.knowns = [14]
|
||||
# probabilities.add_layer()
|
||||
# probabilities.knowns = [8]
|
||||
# probabilities.add_layer()
|
||||
# probabilities.knowns = [4]
|
||||
# probabilities.add_layer()
|
||||
while probabilities.num_terms <= probabilities.N:
|
||||
probabilities.update()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
1052
mutations19.py
Normal file
1052
mutations19.py
Normal file
File diff suppressed because it is too large
Load Diff
570
mutations2.py
Normal file
570
mutations2.py
Normal file
@ -0,0 +1,570 @@
|
||||
import hashlib
|
||||
import math
|
||||
from matplotlib import offsetbox
|
||||
import numpy as np
|
||||
import random
|
||||
from struct import pack, pack_into, unpack_from
|
||||
import secrets
|
||||
|
||||
from numpy import hamming
|
||||
|
||||
N = 32
|
||||
M = 2
|
||||
|
||||
def bit_at_index(buffer, index):
|
||||
offset = (index >> 3) % len(buffer)
|
||||
return buffer[offset] & (1 << (index & 0b111)) != 0
|
||||
|
||||
def count_one_bits(n):
|
||||
return bin(n).count("1")
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def encode_f(f, buffer, offset=0):
|
||||
(inverted, flips, child) = f
|
||||
pack_into('I', buffer, offset, inverted)
|
||||
offset += 4
|
||||
for index in flips:
|
||||
pack_into('I', buffer, offset, 0)
|
||||
offset += 4
|
||||
pack_into('I', buffer, offset, index)
|
||||
offset += 4
|
||||
if child is None:
|
||||
pack_into('I', buffer, offset, 1)
|
||||
offset += 4
|
||||
return offset
|
||||
(inverted, left, right) = child
|
||||
pack_into('I', buffer, offset, 2 if not inverted else 3)
|
||||
offset += 4
|
||||
offset = encode_f(left, buffer, offset)
|
||||
offset = encode_f(right, buffer, offset)
|
||||
return offset
|
||||
|
||||
def generate_random_branch(p_mutation):
|
||||
global N
|
||||
|
||||
p_add_indices = p_mutation * random.random()
|
||||
p_add_children = p_mutation * random.random()
|
||||
|
||||
inverted = random.randint(0, 1)
|
||||
indices = set()
|
||||
children = []
|
||||
|
||||
# randomly add indices
|
||||
while random.random() < p_add_indices and len(indices) < N:
|
||||
available_indices = [i for i in range(0, N) if i not in indices]
|
||||
if len(available_indices) == 1:
|
||||
indices.add(available_indices[0])
|
||||
continue
|
||||
indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
|
||||
# randomly add children
|
||||
while random.random() < p_add_children:
|
||||
child_inverted = random.randint(0, 1)
|
||||
left = generate_random_branch(p_add_children)
|
||||
right = generate_random_branch(p_add_children)
|
||||
children.append((child_inverted, left, right))
|
||||
return (inverted, indices, children)
|
||||
|
||||
def mutate_f(f, p_mutation):
|
||||
global N
|
||||
(inverted, indices, children) = f
|
||||
mutated_indices = set(indices)
|
||||
mutated_children = children[:]
|
||||
|
||||
p_invert = p_mutation * random.random()
|
||||
p_drop_indices = p_mutation * random.random()
|
||||
p_add_indices = p_mutation * random.random()
|
||||
p_drop_children = p_mutation * random.random()
|
||||
p_mutate_child = p_mutation * random.random()
|
||||
p_clone_child = p_mutation * random.random()
|
||||
p_invert_child = p_mutation * random.random()
|
||||
p_add_children = p_mutation * random.random()
|
||||
|
||||
# randomly invert
|
||||
if random.random() < p_invert:
|
||||
inverted ^= 1
|
||||
# randomly drop indices
|
||||
while random.random() < p_drop_indices and len(mutated_indices) > 0:
|
||||
mutated_indices.pop()
|
||||
# randomly add indices
|
||||
while random.random() < p_add_indices and len(mutated_indices) < N:
|
||||
available_indices = [i for i in range(0, N) if i not in mutated_indices]
|
||||
if len(available_indices) == 1:
|
||||
mutated_indices.add(available_indices[0])
|
||||
continue
|
||||
mutated_indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
|
||||
# randomly drop children
|
||||
while random.random() < p_drop_children and len(mutated_children) > 0:
|
||||
if len(mutated_children) == 1:
|
||||
del mutated_children[0]
|
||||
break
|
||||
del mutated_children[random.randint(0, len(mutated_children) - 1)]
|
||||
# randomly clone children
|
||||
while random.random() < p_clone_child and len(mutated_children) > 0:
|
||||
index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
|
||||
(child_inverted, left, right) = mutated_children[index]
|
||||
if random.random() < p_invert_child:
|
||||
child_inverted ^= 1
|
||||
clone = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
|
||||
mutated_children.append(clone)
|
||||
# randomly mutate children
|
||||
while random.random() < p_mutate_child and len(mutated_children) > 0:
|
||||
index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
|
||||
(child_inverted, left, right) = mutated_children[index]
|
||||
if random.random() < p_invert_child:
|
||||
child_inverted ^= 1
|
||||
mutated_children[index] = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
|
||||
# randomly add children
|
||||
while random.random() < p_add_children:
|
||||
child_inverted = random.randint(0, 1)
|
||||
left = generate_random_branch(p_mutation)
|
||||
right = generate_random_branch(p_mutation)
|
||||
mutated_children.append((child_inverted, left, right))
|
||||
return (inverted, mutated_indices, mutated_children)
|
||||
|
||||
def decode_f(buffer, mutate = False, offset = 0, skip_invert = False):
|
||||
global N
|
||||
inverted = 0
|
||||
if not skip_invert:
|
||||
[inverted] = unpack_from('I', buffer, offset)
|
||||
offset += 4
|
||||
# random invert
|
||||
if mutate and random.random() < 0.01:
|
||||
inverted ^= 1
|
||||
inverted &= 0b1
|
||||
flips = set()
|
||||
# random add flip
|
||||
while mutate and random.random() < 0.5 and len(flips) < N:
|
||||
available_indices = [i for i in range(0, N) if i not in flips]
|
||||
if len(available_indices) == 1:
|
||||
flips.add(available_indices[0])
|
||||
continue
|
||||
flips.add(available_indices[random.randint(0, len(available_indices) - 1)])
|
||||
while offset < len(buffer):
|
||||
# random create branch
|
||||
if mutate and random.random() < 0.01:
|
||||
gate_inverted = random.randint(0, 1)
|
||||
left = generate_random_branch()
|
||||
(offset, right) = decode_f(buffer, mutate, offset, True)
|
||||
return (offset, (inverted, flips, (gate_inverted, left, right)))
|
||||
[opcode] = unpack_from('I', buffer, offset)
|
||||
offset += 4
|
||||
opcode &= 0b11
|
||||
if opcode == 0:
|
||||
[index] = unpack_from('I', buffer, offset)
|
||||
offset += 4
|
||||
# random skip flip
|
||||
if mutate and random.random() < 0.01:
|
||||
continue
|
||||
if index in flips:
|
||||
flips.remove(index)
|
||||
else:
|
||||
flips.add(index)
|
||||
elif opcode == 1:
|
||||
return (offset, (inverted, flips, None))
|
||||
else:
|
||||
(offset, left) = decode_f(buffer, mutate, offset)
|
||||
(offset, right) = decode_f(buffer, mutate, offset)
|
||||
gate_inverted = 0 if opcode == 2 else 1
|
||||
# random invert
|
||||
if mutate and random.random() < 0.01:
|
||||
gate_inverted ^= 1
|
||||
# random skip branch
|
||||
if mutate and random.random() < 0.01:
|
||||
return (offset, (inverted, flips, None))
|
||||
return (offset, (inverted, flips, (gate_inverted, left, right)))
|
||||
return (offset, (inverted, [], None))
|
||||
|
||||
def generate_program(model, output_var='output'):
|
||||
global N, M
|
||||
(constant, indices, child) = model
|
||||
|
||||
statement = 'multiply(' + np.array2string(indices, separator=',') + ', x, temp)\n\t'
|
||||
statement += output_var + '=' + str(constant) + '+sum(temp)\n\t'
|
||||
|
||||
if not child is None:
|
||||
left_output = output_var + '0'
|
||||
right_output = output_var + '1'
|
||||
(left, right) = child
|
||||
statement += generate_program(left, left_output)
|
||||
statement += generate_program(right, right_output)
|
||||
statement += output_var + '+=' + left_output + '*' + right_output + '\n\t'
|
||||
statement += output_var + '%=' + str(M) + '\n\t'
|
||||
return statement
|
||||
|
||||
def compile(model):
|
||||
program = 'def f(x, temp):\n\t' + generate_program(model) + 'return output'
|
||||
scope = {'multiply': np.multiply, 'sum': np.sum}
|
||||
exec(program, scope)
|
||||
return scope['f']
|
||||
|
||||
def evaluate(model, x, value = 0):
|
||||
(inverted, indices, children) = model
|
||||
for i in indices:
|
||||
if bit_at_index(x, i) != 0:
|
||||
value ^= 1
|
||||
for child in children:
|
||||
(child_inverted, left, right) = child
|
||||
left = evaluate(left, x)
|
||||
right = evaluate(right, x)
|
||||
if left & right != child_inverted:
|
||||
value ^= 1
|
||||
if inverted:
|
||||
value ^= 1
|
||||
return value
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(N / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(x)
|
||||
|
||||
def sha(v):
|
||||
global M
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] % M
|
||||
|
||||
def xor(x):
|
||||
num_one_bits = 0
|
||||
for n in x:
|
||||
num_one_bits += count_one_bits(n)
|
||||
return num_one_bits % 2
|
||||
|
||||
def random_sample(m, n):
|
||||
inputs = np.zeros((m, n))
|
||||
for i in range(0, m):
|
||||
for j in range(0, n):
|
||||
inputs[i][j] = random.randint(0, 1)
|
||||
return inputs
|
||||
|
||||
def update_sample(sample, index):
|
||||
global N
|
||||
for j in range(0, N):
|
||||
sample[index][j] = random.randint(0, 1)
|
||||
|
||||
def coherence(inputs, outputs, scratch):
|
||||
coherences = []
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
y_a = outputs[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = inputs[j]
|
||||
y_b = outputs[j]
|
||||
distance = hamming_distance(x_a, x_b, scratch)
|
||||
weight = 1.0 / (2 ** distance)
|
||||
denominator += weight
|
||||
if y_a == y_b:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def build_coherence_models(inputs, scratch):
|
||||
coherence_models = []
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
distances = [hamming_distance(x_a, inputs[j], scratch) for j in range(0, len(inputs))]
|
||||
indices = sorted(range(len(distances)), key=lambda i: distances[i])
|
||||
lowest = -1
|
||||
denominator = 0
|
||||
components = []
|
||||
for index in range(0, len(indices)):
|
||||
j = indices[index]
|
||||
if distances[j] == 0:
|
||||
continue
|
||||
if lowest < 0:
|
||||
lowest = distances[j]
|
||||
distance = distances[j] - lowest
|
||||
if distance >= 8:
|
||||
break
|
||||
weight = 2 ** -distance
|
||||
denominator += weight
|
||||
components.append((weight, j))
|
||||
coherence_models.append((denominator, components))
|
||||
return coherence_models
|
||||
|
||||
def fast_coherence(coherence_models, outputs):
|
||||
coherences = []
|
||||
for i in range(0, len(coherence_models)):
|
||||
(denominator, components) = coherence_models[i]
|
||||
numerator = 0
|
||||
for component in components:
|
||||
(weight, j) = component
|
||||
if outputs[i] == outputs[j]:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def score(f, sample, distances):
|
||||
return coherence([(x, f(x) ^ y) for (x, y) in sample], distances)
|
||||
|
||||
def compute_distances(inputs, distances, scratch):
|
||||
for i in range(0, len(inputs)):
|
||||
a = inputs[i]
|
||||
for j in range(i, len(inputs)):
|
||||
if i == j:
|
||||
distances[i][j] = 0
|
||||
continue
|
||||
b = inputs[j]
|
||||
distance = 2 ** -hamming_distance(a, b, scratch)
|
||||
distances[i][j] = distance
|
||||
distances[j][i] = distance
|
||||
|
||||
def update_distances(inputs, distances, i, scratch):
|
||||
a = inputs[i]
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
distances[i][j] = 0
|
||||
continue
|
||||
b = inputs[j]
|
||||
distance = 2 ** -hamming_distance(a, b, scratch)
|
||||
distances[i][j] = distance
|
||||
distances[j][i] = distance
|
||||
|
||||
def evaluate_sample(model, sample, output):
|
||||
stack = [model]
|
||||
(_, _, _, root_scratch, _) = model
|
||||
while len(stack) > 0:
|
||||
layer = stack.pop()
|
||||
(inverted, xors, child, scratch, touched) = layer
|
||||
if child is None:
|
||||
np.matmul(sample, xors, scratch)
|
||||
np.mod(scratch, 2, scratch)
|
||||
if inverted == 1:
|
||||
np.logical_xor(1, scratch, scratch)
|
||||
touched[0] = 1
|
||||
else:
|
||||
(child_inverted, left, right) = child
|
||||
(_, _, _, left_scratch, left_touched) = left
|
||||
(_, _, _, right_scratch, right_touched) = right
|
||||
if left_touched[0] and right_touched[0]:
|
||||
np.multiply(left_scratch, right_scratch, output)
|
||||
np.matmul(sample, xors, scratch)
|
||||
np.mod(scratch, 2, scratch)
|
||||
if inverted:
|
||||
np.logical_xor(scratch, 1, scratch)
|
||||
if child_inverted:
|
||||
np.logical_xor(output, 1, output)
|
||||
np.logical_xor(scratch, output, scratch)
|
||||
touched[0] = 1
|
||||
else:
|
||||
stack.insert(0, layer)
|
||||
stack.insert(0, left)
|
||||
stack.insert(0, right)
|
||||
np.copyto(output, root_scratch)
|
||||
reset_model(model)
|
||||
|
||||
def reset_model(model):
|
||||
stack = [model]
|
||||
while len(stack) > 0:
|
||||
layer = stack.pop()
|
||||
(_, _, child, _, touched) = layer
|
||||
touched[0] = 0
|
||||
if not child is None:
|
||||
(_, left, right) = child
|
||||
stack.append(left)
|
||||
stack.append(right)
|
||||
|
||||
def clone_model(model, p_mutation):
|
||||
global N, M
|
||||
|
||||
p_constant = p_mutation * random.random()
|
||||
p_flip = p_mutation * random.random()
|
||||
p_add_child = p_mutation * random.random()
|
||||
p_drop_child = p_mutation * random.random()
|
||||
|
||||
(constant, xors, child) = model
|
||||
if random.random() < p_constant:
|
||||
constant += random.randint(0, M - 1)
|
||||
constant %= M
|
||||
clone_xors = np.zeros((N,))
|
||||
np.copyto(clone_xors, xors)
|
||||
for i in range(0, N):
|
||||
if random.random() < p_flip:
|
||||
offset = 1 if M == 2 else random.randint(1, M - 1)
|
||||
clone_xors[i] += offset
|
||||
clone_xors[i] %= M
|
||||
if child is None:
|
||||
if random.random() < p_add_child:
|
||||
left = random_child(p_mutation)
|
||||
right = random_child(p_mutation)
|
||||
return (constant, clone_xors, (left, right))
|
||||
return (constant, clone_xors, None)
|
||||
if random.random() < p_drop_child:
|
||||
return (constant, clone_xors, None)
|
||||
(left, right) = child
|
||||
clone_left = clone_model(left, p_mutation)
|
||||
clone_right = clone_model(right, p_mutation)
|
||||
return (constant, clone_xors, (clone_left, clone_right))
|
||||
|
||||
def random_child(p_mutation):
|
||||
global N, M
|
||||
constant = random.randint(0, M - 1)
|
||||
xors = np.zeros((N,))
|
||||
|
||||
p_flip = p_mutation * random.random()
|
||||
p_child = p_mutation * random.random()
|
||||
|
||||
index = random.randint(0, N - 1)
|
||||
xors[index] = 1 if M == 2 else random.randint(1, M - 1)
|
||||
for i in range(0, N):
|
||||
if i != index and random.random() < p_flip:
|
||||
xors[i] = 1 if M == 2 else random.randint(1, M - 1)
|
||||
# if random.random() < p_child:
|
||||
# left = random_child(p_mutation * random.random())
|
||||
# right = random_child(p_mutation * random.random())
|
||||
# return (constant, xors, (left, right))
|
||||
return (constant, xors, None)
|
||||
|
||||
def null_candidate():
|
||||
global N
|
||||
return (0, np.zeros((N,)), None)
|
||||
|
||||
def size(model):
|
||||
(_, xors, child) = model
|
||||
xor_size = np.sum(xors)
|
||||
if not child is None:
|
||||
(left, right) = child
|
||||
return xor_size + size(left) * size(right)
|
||||
return xor_size
|
||||
|
||||
def main():
|
||||
global N, M
|
||||
epochs = 10000
|
||||
num_survivors = 100
|
||||
num_offspring = 10
|
||||
num_candidates = num_survivors + num_survivors * num_offspring
|
||||
sample_size = 128
|
||||
eval_size = 100
|
||||
p_mutation = 0.5
|
||||
g = sha
|
||||
current_generation = [null_candidate() for _ in range(0, num_candidates)]
|
||||
|
||||
distances = np.zeros((sample_size, sample_size))
|
||||
output_equality = np.zeros((sample_size, sample_size))
|
||||
inputs = random_sample(sample_size, N)
|
||||
scratch = np.zeros(N,)
|
||||
# compute_distances(inputs, distances, scratch)
|
||||
expected_outputs = np.zeros((sample_size,))
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
outputs = np.zeros((sample_size,))
|
||||
output_xor = np.zeros((sample_size,))
|
||||
ones = np.ones((sample_size,))
|
||||
numerators = np.zeros((sample_size,))
|
||||
denominators = np.zeros((sample_size,))
|
||||
coherences = np.zeros((sample_size,))
|
||||
np.matmul(ones, distances, denominators)
|
||||
scores = np.zeros((num_candidates,))
|
||||
max_score = 0
|
||||
last_score = 0
|
||||
streak = 0
|
||||
|
||||
coherence_models = build_coherence_models(inputs, scratch)
|
||||
|
||||
for epoch in range(0, epochs):
|
||||
for i in range(0, num_candidates):
|
||||
candidate = current_generation[i]
|
||||
f = compile(candidate)
|
||||
for j in range(0, sample_size):
|
||||
outputs[j] = f(inputs[j], scratch)
|
||||
np.subtract(outputs, expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
# for p in range(0, sample_size):
|
||||
# for q in range(0, sample_size):
|
||||
# m = int(output_xor[p])
|
||||
# n = int(output_xor[q])
|
||||
# distance = abs(m - n)
|
||||
# if distance > M / 2:
|
||||
# distance = M - distance
|
||||
# distance /= (M / 2)
|
||||
# distance **= 2
|
||||
# output_equality[p][q] = distance
|
||||
# # output_equality[p][q] = 1 if m == n else 0
|
||||
# np.multiply(output_equality, distances, output_equality)
|
||||
# np.matmul(ones, output_equality, numerators)
|
||||
# np.divide(numerators, denominators, coherences)
|
||||
# score = np.average(coherences)
|
||||
score = fast_coherence(coherence_models, output_xor)
|
||||
# if random.random() < 0.1:
|
||||
# check = coherence(inputs, output_xor, scratch)
|
||||
# if check - score > 1e-3:
|
||||
# print('not equal')
|
||||
scores[i] = score
|
||||
|
||||
top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
|
||||
survivors = [current_generation[index] for index in top_n]
|
||||
|
||||
# f = lambda x: evaluate(current_generation[0], x)
|
||||
# correct = 0
|
||||
# for i in range(0, eval_size):
|
||||
# x = random_input()
|
||||
# if f(x) == g(x):
|
||||
# correct += 1
|
||||
|
||||
top_score = scores[top_n[-1]]
|
||||
print(epoch, top_score, size(survivors[-1]))
|
||||
if top_score <= max_score:
|
||||
p_mutation += 0.01
|
||||
else:
|
||||
p_mutation = 0.5
|
||||
max_score = top_score
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
current_generation[i] = survivors[i]
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
candidate = survivors[i]
|
||||
for j in range(0, num_offspring):
|
||||
index = num_survivors + j * num_survivors + i
|
||||
current_generation[index] = clone_model(candidate, random.random())
|
||||
|
||||
# inputs = random_sample(sample_size, N)
|
||||
# coherence_models = build_coherence_models(inputs, scratch)
|
||||
# for i in range(0, sample_size):
|
||||
# expected_outputs[i] = g(inputs[i])
|
||||
|
||||
# while random.random() < 0.5:
|
||||
if last_score == top_score:
|
||||
streak += 1
|
||||
else:
|
||||
streak = 0
|
||||
if streak >= 4:
|
||||
inputs = random_sample(sample_size, N)
|
||||
coherence_models = build_coherence_models(inputs, scratch)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
# inputs = random_sample(sample_size, N)
|
||||
# coherence_models = build_coherence_models(inputs, scratch)
|
||||
# # compute_distances(inputs, distances, scratch)
|
||||
# # np.matmul(ones, distances, denominators)
|
||||
# for i in range(0, sample_size):
|
||||
# expected_outputs[i] = g(inputs[i])
|
||||
# streak = 0
|
||||
# expected_outputs = np.zeros((sample_size,))
|
||||
# for i in range(0, sample_size):
|
||||
# expected_outputs[i] = g(inputs[i])
|
||||
# index = random.randint(0, sample_size - 1)
|
||||
# update_sample(inputs, index)
|
||||
# expected_outputs[index] = g(inputs[index])
|
||||
# update_distances(inputs, distances, index, scratch)
|
||||
# np.matmul(ones, distances, denominators)
|
||||
last_score = top_score
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
316
mutations20.py
Normal file
316
mutations20.py
Normal file
@ -0,0 +1,316 @@
|
||||
import numpy as np
|
||||
import random
|
||||
import hashlib
|
||||
import math
|
||||
|
||||
def get_state_id(state):
|
||||
return ','.join([str(x) for x in sorted(state)])
|
||||
|
||||
class Point():
|
||||
def __init__(self, x, y):
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def id(self):
|
||||
return ''.join([str(x) for x in self.x])
|
||||
|
||||
class Influence():
|
||||
def __init__(self, a, b):
|
||||
self.a = a
|
||||
self.b = b
|
||||
self.original_dof = set()
|
||||
self.dof = set()
|
||||
for i in range(0, len(a.x)):
|
||||
if a.x[i] != b.x[i]:
|
||||
self.original_dof.add(i)
|
||||
self.dof.add(i)
|
||||
self.coherent = self.a.y == self.b.y
|
||||
|
||||
def coherent(self):
|
||||
return self.a.y == self.b.y
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def hamming_distance(a, b, flips):
|
||||
distance = 0
|
||||
for i in range(0, len(a.x)):
|
||||
if i in flips:
|
||||
continue
|
||||
distance += 1 if a.x[i] != b.x[i] else 0
|
||||
return distance
|
||||
|
||||
def random_x(N):
|
||||
x = np.zeros((N))
|
||||
for i in range(0, N):
|
||||
x[i] = random.randint(0, 1)
|
||||
return x
|
||||
|
||||
def xor(x):
|
||||
return np.sum(x[16:]) % 2
|
||||
|
||||
def create_dof_map(influences):
|
||||
dof_map = {}
|
||||
for influence in influences:
|
||||
for i in influence.dof:
|
||||
if not i in dof_map:
|
||||
dof_map[i] = []
|
||||
dof_map[i].append(influence)
|
||||
return dof_map
|
||||
|
||||
def flip(influences, i):
|
||||
for influence in influences:
|
||||
if i in influence.dof:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
|
||||
def remove_dof(dof_map, i, flip = False):
|
||||
for influence in dof_map[i]:
|
||||
influence.dof.remove(i)
|
||||
if flip:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
# if len(influence.dof) == 0 and not influence.coherent():
|
||||
# raise Exception('Invalid')
|
||||
del dof_map[i]
|
||||
|
||||
def solve(dof_map, all_influences, all_samples):
|
||||
eliminated = True
|
||||
while eliminated:
|
||||
eliminated = False
|
||||
for influence in all_influences:
|
||||
if len(influence.dof) == 1:
|
||||
i = next(iter(influence.dof))
|
||||
if influence.coherent:
|
||||
remove_dof(dof_map, i)
|
||||
eliminated = True
|
||||
else:
|
||||
print('Forced', i)
|
||||
remove_dof(dof_map, i, True)
|
||||
eliminated = True
|
||||
|
||||
lowest_dof = None
|
||||
for influence in all_influences:
|
||||
if not influence.coherent and len(influence.dof) > 1:
|
||||
if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
|
||||
lowest_dof = influence
|
||||
|
||||
flip = None
|
||||
highest_score = -1
|
||||
|
||||
for i in lowest_dof.dof:
|
||||
per_point_scores = {}
|
||||
i_influences = dof_map[i]
|
||||
left = 0
|
||||
right = 0
|
||||
for influence in i_influences:
|
||||
if not influence.a in per_point_scores:
|
||||
per_point_scores[influence.a] = [0, 0]
|
||||
if not influence.b in per_point_scores:
|
||||
per_point_scores[influence.b] = [0, 0]
|
||||
if influence.coherent:
|
||||
per_point_scores[influence.a][0] += 1
|
||||
per_point_scores[influence.b][0] += 1
|
||||
left += 1
|
||||
else:
|
||||
per_point_scores[influence.a][1] += 1
|
||||
per_point_scores[influence.b][1] += 1
|
||||
right += 1
|
||||
print(i, left / (left + right))
|
||||
num = 0
|
||||
denom = 0
|
||||
for _, score in per_point_scores.items():
|
||||
if score[0] == score[1]:
|
||||
continue
|
||||
print(i, score)
|
||||
num += score[1] / (score[0] + score[1])
|
||||
denom += 1
|
||||
score = num / denom if denom > 0 else 0
|
||||
print(score)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# score_sum = 0
|
||||
# for j, j_influences in dof_map.items():
|
||||
# if j in lowest_dof.dof:
|
||||
# continue
|
||||
# double_score = 0
|
||||
# double_totals = [0, 0, 0, 0, 0, 0]
|
||||
# for influence in i_influences:
|
||||
# if influence in j_influences:
|
||||
# weight = 1.0 / ((len(influence.dof) - 2) ** 2)
|
||||
# if influence.coherent:
|
||||
# double_score += weight
|
||||
# double_totals[0] += 1
|
||||
# else:
|
||||
# double_score -= weight
|
||||
# double_totals[3] += 1
|
||||
# else:
|
||||
# weight = 1.0 / ((len(influence.dof) - 1) ** 2)
|
||||
# if influence.coherent:
|
||||
# double_score -= weight
|
||||
# double_totals[4] += 1
|
||||
# else:
|
||||
# double_score += weight
|
||||
# double_totals[1] += 1
|
||||
# for influence in j_influences:
|
||||
# if influence in i_influences:
|
||||
# continue
|
||||
# weight = 1.0 / ((len(influence.dof) - 1) ** 2)
|
||||
# if influence.coherent:
|
||||
# double_score -= weight
|
||||
# double_totals[5] += 1
|
||||
# else:
|
||||
# double_score += weight
|
||||
# double_totals[2] += 1
|
||||
|
||||
# score = double_score
|
||||
# score_sum += score
|
||||
# # print((i, j), score, single_totals, double_totals)
|
||||
|
||||
# if flip is None or score_sum > highest_score:
|
||||
# highest_score = score_sum
|
||||
# flip = [i]
|
||||
# print(i, score_sum)
|
||||
|
||||
# if flip is None:
|
||||
# return None
|
||||
# print('Chose', flip, 'from', lowest_dof.dof, highest_score)
|
||||
# for i in flip:
|
||||
# remove_dof(dof_map, i, True)
|
||||
# return flip
|
||||
|
||||
def main():
|
||||
N = 32
|
||||
sample_size = 32
|
||||
p_dist = np.ones(N)
|
||||
p_dist.fill(0.5)
|
||||
epoch = 0
|
||||
|
||||
while True:
|
||||
sample_ids = set()
|
||||
samples = []
|
||||
|
||||
for i in range(0, sample_size):
|
||||
x = random_x(N)
|
||||
y = int(sha(x))
|
||||
p = Point(x, y)
|
||||
p_id = p.id()
|
||||
if p_id in sample_ids:
|
||||
continue
|
||||
sample_ids.add(p_id)
|
||||
samples.append(p)
|
||||
|
||||
influences = []
|
||||
for i in range(0, len(samples)):
|
||||
a = samples[i]
|
||||
for j in range(i + 1, len(samples)):
|
||||
b = samples[j]
|
||||
influences.append(Influence(a, b))
|
||||
|
||||
visited = set()
|
||||
state = []
|
||||
|
||||
iterations = 0
|
||||
while sum([0 if influence.coherent else 1 for influence in influences]) > 0:
|
||||
# if iterations > 5000:
|
||||
# state = []
|
||||
# break
|
||||
iterations += 1
|
||||
# print(state)
|
||||
lowest_dof = None
|
||||
num_influences = -1
|
||||
for influence in influences:
|
||||
if influence.coherent:
|
||||
continue
|
||||
|
||||
if lowest_dof is not None and len(influence.dof) >= num_influences:
|
||||
continue
|
||||
|
||||
has_unvisited_state = False
|
||||
for i in influence.dof:
|
||||
state_id = get_state_id(state + [i])
|
||||
if state_id not in visited:
|
||||
has_unvisited_state = True
|
||||
break
|
||||
|
||||
if not has_unvisited_state:
|
||||
continue
|
||||
|
||||
if lowest_dof is None or len(influence.dof) < num_influences:
|
||||
lowest_dof = influence
|
||||
num_influences = len(influence.dof)
|
||||
|
||||
added = False
|
||||
if lowest_dof is not None:
|
||||
valid_choices = []
|
||||
for i in lowest_dof.dof:
|
||||
state_id = get_state_id(state + [i])
|
||||
if state_id in visited:
|
||||
continue
|
||||
valid_choices.append(i)
|
||||
|
||||
if len(valid_choices) > 0:
|
||||
i = valid_choices[0]
|
||||
if len(valid_choices) > 1:
|
||||
p_partial = np.zeros(len(valid_choices))
|
||||
index = 0
|
||||
for j in valid_choices:
|
||||
p_partial[index] = p_dist[j]
|
||||
np.divide(p_partial, np.sum(p_partial), p_partial)
|
||||
i = np.random.choice(valid_choices, p=p_partial)
|
||||
|
||||
state_id = get_state_id(state + [i])
|
||||
visited.add(state_id)
|
||||
state.append(i)
|
||||
added = True
|
||||
|
||||
revert = False
|
||||
if added:
|
||||
i = state[-1]
|
||||
for influence in influences:
|
||||
if i in influence.dof:
|
||||
if len(influence.dof) == 1 and influence.coherent:
|
||||
revert = True
|
||||
influence.coherent = not influence.coherent
|
||||
influence.dof.remove(i)
|
||||
|
||||
if revert or not added:
|
||||
if len(state) == 0:
|
||||
break
|
||||
i = state.pop(random.randrange(len(state)))
|
||||
for influence in influences:
|
||||
if i in influence.original_dof and not i in influence.dof:
|
||||
influence.coherent = not influence.coherent
|
||||
influence.dof.add(i)
|
||||
|
||||
if len(state) > 0:
|
||||
epoch += 1
|
||||
p_dist -= 0.0001 * (sample_size ** 2)
|
||||
for i in state:
|
||||
p_dist[i] += 0.0002 * (sample_size ** 2)
|
||||
# sample_size += 1
|
||||
print(p_dist)
|
||||
else:
|
||||
# sample_size -= 1
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
368
mutations21.py
Normal file
368
mutations21.py
Normal file
@ -0,0 +1,368 @@
|
||||
from cmath import isnan
|
||||
import numpy as np
|
||||
import random
|
||||
import hashlib
|
||||
import math
|
||||
|
||||
def get_state_id(state):
|
||||
return ','.join([str(x) for x in sorted(state)])
|
||||
|
||||
class Point():
|
||||
def __init__(self, x, y):
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def id(self):
|
||||
return ','.join([str(int(x)) for x in self.x])
|
||||
|
||||
class Influence():
|
||||
def __init__(self, a, b):
|
||||
self.a = a
|
||||
self.b = b
|
||||
self.original_dof = set()
|
||||
self.dof = set()
|
||||
for i in range(0, len(a.x)):
|
||||
if a.x[i] != b.x[i]:
|
||||
self.original_dof.add(i)
|
||||
self.dof.add(i)
|
||||
|
||||
def coherent(self):
|
||||
return self.a.y == self.b.y
|
||||
|
||||
def id(self):
|
||||
return ','.join(sorted([self.a.id(), self.b.id()]))
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def decode(x, N):
|
||||
index = 0
|
||||
output = np.zeros((N))
|
||||
while x > 0 and index < N:
|
||||
output[index] = x & 0b1
|
||||
x >>= 1
|
||||
index += 1
|
||||
return output
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def hamming_distance(a, b):
|
||||
return np.sum(np.logical_xor(a.x, b.x))
|
||||
|
||||
def random_x(N):
|
||||
x = np.zeros((N))
|
||||
for i in range(0, N):
|
||||
x[i] = random.randint(0, 1)
|
||||
return x
|
||||
|
||||
def xor(x):
|
||||
# return sum(x[:4]) % 2
|
||||
return sum(x) % 2
|
||||
|
||||
def create_dof_map(influences):
|
||||
dof_map = {}
|
||||
for influence in influences:
|
||||
for i in influence.dof:
|
||||
if not i in dof_map:
|
||||
dof_map[i] = []
|
||||
dof_map[i].append(influence)
|
||||
return dof_map
|
||||
|
||||
def flip(influences, i):
|
||||
for influence in influences:
|
||||
if i in influence.dof:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
|
||||
def remove_dof(dof_map, i, flip = False):
|
||||
for influence in dof_map[i]:
|
||||
influence.dof.remove(i)
|
||||
if flip:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
# if len(influence.dof) == 0 and not influence.coherent():
|
||||
# raise Exception('Invalid')
|
||||
del dof_map[i]
|
||||
|
||||
def solve(dof_map, all_influences, all_samples):
|
||||
eliminated = True
|
||||
while eliminated:
|
||||
eliminated = False
|
||||
for influence in all_influences:
|
||||
if len(influence.dof) == 1:
|
||||
i = next(iter(influence.dof))
|
||||
if influence.coherent:
|
||||
remove_dof(dof_map, i)
|
||||
eliminated = True
|
||||
else:
|
||||
print('Forced', i)
|
||||
remove_dof(dof_map, i, True)
|
||||
eliminated = True
|
||||
|
||||
lowest_dof = None
|
||||
for influence in all_influences:
|
||||
if not influence.coherent and len(influence.dof) > 1:
|
||||
if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
|
||||
lowest_dof = influence
|
||||
|
||||
flip = None
|
||||
highest_score = -1
|
||||
|
||||
for i in lowest_dof.dof:
|
||||
per_point_scores = {}
|
||||
i_influences = dof_map[i]
|
||||
left = 0
|
||||
right = 0
|
||||
for influence in i_influences:
|
||||
if not influence.a in per_point_scores:
|
||||
per_point_scores[influence.a] = [0, 0]
|
||||
if not influence.b in per_point_scores:
|
||||
per_point_scores[influence.b] = [0, 0]
|
||||
if influence.coherent:
|
||||
per_point_scores[influence.a][0] += 1
|
||||
per_point_scores[influence.b][0] += 1
|
||||
left += 1
|
||||
else:
|
||||
per_point_scores[influence.a][1] += 1
|
||||
per_point_scores[influence.b][1] += 1
|
||||
right += 1
|
||||
print(i, left / (left + right))
|
||||
num = 0
|
||||
denom = 0
|
||||
for _, score in per_point_scores.items():
|
||||
if score[0] == score[1]:
|
||||
continue
|
||||
print(i, score)
|
||||
num += score[1] / (score[0] + score[1])
|
||||
denom += 1
|
||||
score = num / denom if denom > 0 else 0
|
||||
print(score)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# 1st row (n+1 choose k+1) * (1-(k mod 2))
|
||||
# psuedopascal to compute the follow-on rows
|
||||
# assuming solvability, we want to maximize the probability that our current state and our state with
|
||||
# a particular single flip are one order apart in the correct direction
|
||||
|
||||
|
||||
|
||||
# 2, 0
|
||||
# 2, 2, 0
|
||||
# 2, 4, 2, 0
|
||||
# 2, 6, 6, 2, 0
|
||||
# 2, 8,12, 8, 2, 0
|
||||
# 2,10,20,20,10, 2, 0
|
||||
|
||||
# 3,-9,19,-33,51,-73,99
|
||||
# 3,-6,10,-14,18,-22,26
|
||||
# 3,-3, 4, -4, 4, -4, 4
|
||||
# 3, 0, 1, 0, 0, 0, 0
|
||||
# 3, 3, 1, 1, 0, 0, 0
|
||||
# 3, 6, 4, 2, 1, 0, 0
|
||||
# 3, 9,10, 6, 3, 1, 0
|
||||
|
||||
# 4, 0, 4, 0
|
||||
# 4, 4, 4, 4, 0
|
||||
# 4, 8, 8, 8, 4, 0
|
||||
# 4,12,16,16,12, 4, 0
|
||||
|
||||
# 5, 0,10, 0, 1
|
||||
# 5, 5,10,10, 1, 1
|
||||
# 5,
|
||||
# 5,
|
||||
|
||||
|
||||
|
||||
# 3
|
||||
#
|
||||
# @1 [1, 2, 1]
|
||||
# @2 [2, 2, 0]
|
||||
# @3 [3, 0, 1]
|
||||
|
||||
# 5 [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)
|
||||
#
|
||||
# @1 [1, 4, 6, 4, 1], [4, 6, 4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),
|
||||
# @2 [2, 6, 6, 2, 0], [3, 4, 4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)
|
||||
# @3 [3, 6, 4, 2, 1], [2, 4, 6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)
|
||||
# @4 [4, 4, 4, 4, 0], [1, 6, 6, 1, 1] - 16, 15 -
|
||||
# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 -
|
||||
|
||||
# @0 [0.0, 0.0, 0.0, 0.0, 0.0]
|
||||
# @1 [0.2, 0.4, 0.6, 0.8, 1.0]
|
||||
# @2 [0.4, 0.6, 0.6, 0.4, 0.0]
|
||||
# @3 [0.6, 0.6, 0.4, 0.4, 1.0]
|
||||
# @4 [0.8, 0.4, 0.4, 0.8, 0.0]
|
||||
# @5 [1.0, 0.0, 1.0, 0.0, 1.0]
|
||||
|
||||
# 6
|
||||
#
|
||||
# @1 [1, 5, 10, 10, 5, 1]
|
||||
# @2 [2, 8, 12, 8, 2, 0]
|
||||
# @3 [3, 9, 10, 6, 3, 1]
|
||||
# @4 [4, 8, 8, 8, 4, 0]
|
||||
# @5 [5, 5, 10, 10, 1, 1]
|
||||
# @6 [6, 0, 20, 0, 6, 0]
|
||||
|
||||
# last row, 1 if odd, 0 if even
|
||||
# second to last, subtract 2 on odds, add 2 on evens
|
||||
|
||||
def compute_distributions(N):
|
||||
dist = np.zeros((N, N))
|
||||
for j in range(0, N):
|
||||
dist[0][j] = math.comb(N - 1, j)
|
||||
dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
|
||||
for i in range(1, N):
|
||||
for j in range(0, i + 1):
|
||||
dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
|
||||
for k in range(i + 1, N):
|
||||
for j in reversed(range(0, k)):
|
||||
dist[i][j+1] = dist[i][j] + dist[i][j+1]
|
||||
for i in range(0, N):
|
||||
for j in range(0, N):
|
||||
denom = math.comb(N, j+1)
|
||||
dist[i][j] /= denom
|
||||
return dist
|
||||
|
||||
|
||||
def main():
|
||||
N = 32
|
||||
sample_size = 2048
|
||||
sample_ids = set()
|
||||
samples = []
|
||||
|
||||
dist = compute_distributions(N)
|
||||
print(dist)
|
||||
|
||||
for i in range(0, sample_size):
|
||||
x = random_x(N)
|
||||
y = int(xor(x))
|
||||
p = Point(x, y)
|
||||
p_id = p.id()
|
||||
if p_id in sample_ids:
|
||||
continue
|
||||
sample_ids.add(p_id)
|
||||
samples.append(p)
|
||||
|
||||
# for i in range(0, 2**N):
|
||||
# x = decode(i, N)
|
||||
# y = int(xor(x))
|
||||
# samples.append(Point(x,y))
|
||||
|
||||
base = np.zeros(N)
|
||||
current = np.zeros(N)
|
||||
|
||||
for _ in range(0, N):
|
||||
lowest_err = -1
|
||||
use_flip = -1
|
||||
for flip in range(-1, N):
|
||||
coherent_distances = {}
|
||||
incoherent_distances = {}
|
||||
all_coherent = True
|
||||
for i in range(0, len(samples)):
|
||||
a = samples[i]
|
||||
for j in range(i + 1, len(samples)):
|
||||
# if i == j:
|
||||
# continue
|
||||
b = samples[j]
|
||||
distance = hamming_distance(a, b)
|
||||
if distance not in coherent_distances:
|
||||
coherent_distances[distance] = 0
|
||||
if distance not in incoherent_distances:
|
||||
incoherent_distances[distance] = 0
|
||||
is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)
|
||||
if is_coherent:
|
||||
coherent_distances[distance] += 1
|
||||
else:
|
||||
incoherent_distances[distance] += 1
|
||||
all_coherent = False
|
||||
if all_coherent:
|
||||
print('Flip and halt', flip)
|
||||
return
|
||||
# print(coherent_distances, incoherent_distances)
|
||||
|
||||
for k in range(0, N):
|
||||
known_incoherence_at_k = dist[k]
|
||||
err = 0
|
||||
# denom = 0
|
||||
for i in range(0, N):
|
||||
if i not in coherent_distances:
|
||||
continue
|
||||
est_incoherence = incoherent_distances[i] / (coherent_distances[i] + incoherent_distances[i])
|
||||
confidence = 1.0
|
||||
# print(k, i, est_incoherence)
|
||||
err += confidence * abs(est_incoherence - known_incoherence_at_k[i - 1])# / ((est_incoherence + known_incoherence_at_k[i - 1]) / 2)
|
||||
# denom += 1
|
||||
# print(flip, k, err)
|
||||
# err /= denom
|
||||
if flip < 0:
|
||||
base[k] = err
|
||||
else:
|
||||
current[k] = err
|
||||
if flip >= 0:
|
||||
# np.divide(current, np.max(current), current)
|
||||
# print(flip, current)
|
||||
index = -1
|
||||
base_sum = 0
|
||||
current_sum = 0
|
||||
base_total = 0
|
||||
current_total = 0
|
||||
for k in range(0, N):
|
||||
if base[k] > 0:
|
||||
base_sum += k / base[k]
|
||||
base_total += 1.0 / base[k]
|
||||
else:
|
||||
base_sum += k * 1e6
|
||||
base_total += 1e6
|
||||
if current[k] > 0:
|
||||
current_sum += k / current[k]
|
||||
current_total += 1.0 / current[k]
|
||||
else:
|
||||
current_sum += k * 1e6
|
||||
current_total += 1e6
|
||||
# print(base_sum, base_total, current_sum, current_total)
|
||||
# print(current_sum / current_total, base_sum / base_total)
|
||||
rel_to_base = (current_sum / current_total) - (base_sum / base_total)
|
||||
|
||||
# print(base_sum, base_total)
|
||||
# print(base_sum / base_total, current_sum / current_total)
|
||||
|
||||
# for k in range(0, N - 2):
|
||||
# # err = base[k + 1] * current[k] * 1.0 / (base[k + 1] * current[k + 2])
|
||||
# err = base[k + 1] * current[k]
|
||||
# if rel_to_base < 0 or err < rel_to_base:
|
||||
# rel_to_base = err
|
||||
# index = k
|
||||
|
||||
if use_flip < 0 or rel_to_base < lowest_err:
|
||||
lowest_err = rel_to_base
|
||||
use_flip = flip
|
||||
print(flip, rel_to_base)
|
||||
else:
|
||||
pass
|
||||
# np.divide(base, np.max(base), base)
|
||||
# print(flip, base)
|
||||
|
||||
if lowest_err > 0:
|
||||
return
|
||||
print('Flip', use_flip, lowest_err)
|
||||
for p in samples:
|
||||
if p.x[use_flip]:
|
||||
p.y ^= 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
405
mutations22.py
Normal file
405
mutations22.py
Normal file
@ -0,0 +1,405 @@
|
||||
from cmath import isnan
|
||||
import numpy as np
|
||||
import random
|
||||
import hashlib
|
||||
import math
|
||||
|
||||
def get_state_id(state):
|
||||
return ','.join([str(x) for x in sorted(state)])
|
||||
|
||||
class Point():
|
||||
def __init__(self, x, y):
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def id(self):
|
||||
return ','.join([str(int(x)) for x in self.x])
|
||||
|
||||
class Influence():
|
||||
def __init__(self, a, b):
|
||||
self.a = a
|
||||
self.b = b
|
||||
self.original_dof = set()
|
||||
self.dof = set()
|
||||
for i in range(0, len(a.x)):
|
||||
if a.x[i] != b.x[i]:
|
||||
self.original_dof.add(i)
|
||||
self.dof.add(i)
|
||||
|
||||
def coherent(self):
|
||||
return self.a.y == self.b.y
|
||||
|
||||
def id(self):
|
||||
return ','.join(sorted([self.a.id(), self.b.id()]))
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def decode(x, N):
|
||||
index = 0
|
||||
output = np.zeros((N))
|
||||
while x > 0 and index < N:
|
||||
output[index] = x & 0b1
|
||||
x >>= 1
|
||||
index += 1
|
||||
return output
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def hamming_distance(a, b):
|
||||
return np.sum(np.logical_xor(a.x, b.x))
|
||||
|
||||
def random_x(N):
|
||||
x = np.zeros((N))
|
||||
for i in range(0, N):
|
||||
x[i] = random.randint(0, 1)
|
||||
return x
|
||||
|
||||
def xor(x):
|
||||
# return sum(x[:4]) % 2
|
||||
return sum(x) % 2
|
||||
|
||||
def create_dof_map(influences):
|
||||
dof_map = {}
|
||||
for influence in influences:
|
||||
for i in influence.dof:
|
||||
if not i in dof_map:
|
||||
dof_map[i] = []
|
||||
dof_map[i].append(influence)
|
||||
return dof_map
|
||||
|
||||
def flip(influences, i):
|
||||
for influence in influences:
|
||||
if i in influence.dof:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
|
||||
def remove_dof(dof_map, i, flip = False):
|
||||
for influence in dof_map[i]:
|
||||
influence.dof.remove(i)
|
||||
if flip:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
# if len(influence.dof) == 0 and not influence.coherent():
|
||||
# raise Exception('Invalid')
|
||||
del dof_map[i]
|
||||
|
||||
def solve(dof_map, all_influences, all_samples):
|
||||
eliminated = True
|
||||
while eliminated:
|
||||
eliminated = False
|
||||
for influence in all_influences:
|
||||
if len(influence.dof) == 1:
|
||||
i = next(iter(influence.dof))
|
||||
if influence.coherent:
|
||||
remove_dof(dof_map, i)
|
||||
eliminated = True
|
||||
else:
|
||||
print('Forced', i)
|
||||
remove_dof(dof_map, i, True)
|
||||
eliminated = True
|
||||
|
||||
lowest_dof = None
|
||||
for influence in all_influences:
|
||||
if not influence.coherent and len(influence.dof) > 1:
|
||||
if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
|
||||
lowest_dof = influence
|
||||
|
||||
flip = None
|
||||
highest_score = -1
|
||||
|
||||
for i in lowest_dof.dof:
|
||||
per_point_scores = {}
|
||||
i_influences = dof_map[i]
|
||||
left = 0
|
||||
right = 0
|
||||
for influence in i_influences:
|
||||
if not influence.a in per_point_scores:
|
||||
per_point_scores[influence.a] = [0, 0]
|
||||
if not influence.b in per_point_scores:
|
||||
per_point_scores[influence.b] = [0, 0]
|
||||
if influence.coherent:
|
||||
per_point_scores[influence.a][0] += 1
|
||||
per_point_scores[influence.b][0] += 1
|
||||
left += 1
|
||||
else:
|
||||
per_point_scores[influence.a][1] += 1
|
||||
per_point_scores[influence.b][1] += 1
|
||||
right += 1
|
||||
print(i, left / (left + right))
|
||||
num = 0
|
||||
denom = 0
|
||||
for _, score in per_point_scores.items():
|
||||
if score[0] == score[1]:
|
||||
continue
|
||||
print(i, score)
|
||||
num += score[1] / (score[0] + score[1])
|
||||
denom += 1
|
||||
score = num / denom if denom > 0 else 0
|
||||
print(score)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# 1st row (n+1 choose k+1) * (1-(k mod 2))
|
||||
# psuedopascal to compute the follow-on rows
|
||||
# assuming solvability, we want to maximize the probability that our current state and our state with
|
||||
# a particular single flip are one order apart in the correct direction
|
||||
|
||||
|
||||
|
||||
# 2, 0
|
||||
# 2, 2, 0
|
||||
# 2, 4, 2, 0
|
||||
# 2, 6, 6, 2, 0
|
||||
# 2, 8,12, 8, 2, 0
|
||||
# 2,10,20,20,10, 2, 0
|
||||
|
||||
# 3,-9,19,-33,51,-73,99
|
||||
# 3,-6,10,-14,18,-22,26
|
||||
# 3,-3, 4, -4, 4, -4, 4
|
||||
# 3, 0, 1, 0, 0, 0, 0
|
||||
# 3, 3, 1, 1, 0, 0, 0
|
||||
# 3, 6, 4, 2, 1, 0, 0
|
||||
# 3, 9,10, 6, 3, 1, 0
|
||||
|
||||
# 4, 0, 4, 0
|
||||
# 4, 4, 4, 4, 0
|
||||
# 4, 8, 8, 8, 4, 0
|
||||
# 4,12,16,16,12, 4, 0
|
||||
|
||||
# 5, 0,10, 0, 1
|
||||
# 5, 5,10,10, 1, 1
|
||||
# 5,
|
||||
# 5,
|
||||
|
||||
|
||||
|
||||
# 3
|
||||
#
|
||||
# @1 [1, 2, 1]
|
||||
# @2 [2, 2, 0]
|
||||
# @3 [3, 0, 1]
|
||||
|
||||
# 5 [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)
|
||||
#
|
||||
# @1 [1, 4, 6, 4, 1], [4, 6, 4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),
|
||||
# @2 [2, 6, 6, 2, 0], [3, 4, 4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)
|
||||
# @3 [3, 6, 4, 2, 1], [2, 4, 6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)
|
||||
# @4 [4, 4, 4, 4, 0], [1, 6, 6, 1, 1] - 16, 15 -
|
||||
# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 -
|
||||
|
||||
# @0 [0.0, 0.0, 0.0, 0.0, 0.0]
|
||||
# @1 [0.2, 0.4, 0.6, 0.8, 1.0]
|
||||
# @2 [0.4, 0.6, 0.6, 0.4, 0.0]
|
||||
# @3 [0.6, 0.6, 0.4, 0.4, 1.0]
|
||||
# @4 [0.8, 0.4, 0.4, 0.8, 0.0]
|
||||
# @5 [1.0, 0.0, 1.0, 0.0, 1.0]
|
||||
|
||||
# 6
|
||||
#
|
||||
# @1 [1, 5, 10, 10, 5, 1]
|
||||
# @2 [2, 8, 12, 8, 2, 0]
|
||||
# @3 [3, 9, 10, 6, 3, 1]
|
||||
# @4 [4, 8, 8, 8, 4, 0]
|
||||
# @5 [5, 5, 10, 10, 1, 1]
|
||||
# @6 [6, 0, 20, 0, 6, 0]
|
||||
|
||||
# last row, 1 if odd, 0 if even
|
||||
# second to last, subtract 2 on odds, add 2 on evens
|
||||
|
||||
def compute_distributions(N):
|
||||
dist = np.zeros((N, N))
|
||||
for j in range(0, N):
|
||||
dist[0][j] = math.comb(N - 1, j)
|
||||
dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
|
||||
for i in range(1, N):
|
||||
for j in range(0, i + 1):
|
||||
dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
|
||||
for k in range(i + 1, N):
|
||||
for j in reversed(range(0, k)):
|
||||
dist[i][j+1] = dist[i][j] + dist[i][j+1]
|
||||
print(dist)
|
||||
for i in range(0, N):
|
||||
for j in range(0, N):
|
||||
denom = math.comb(N, j+1)
|
||||
dist[i][j] /= denom
|
||||
return dist
|
||||
|
||||
def raised_cosine(x, u, s):
|
||||
if x < (u - s):
|
||||
return 0
|
||||
if x > (u + s):
|
||||
return 0
|
||||
return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s))
|
||||
|
||||
def average_index(x):
|
||||
total = 0
|
||||
for k in range(0, len(x)):
|
||||
total += k * x[k]
|
||||
return total / np.sum(x)
|
||||
|
||||
# 8, 32, 2^5
|
||||
# 10, 64, 2^6
|
||||
# 12, 128, 2^7
|
||||
# 14, 256, 2^8
|
||||
# 16, 512, 2^9
|
||||
# 18, 1024, 2^10
|
||||
# 20, 2048, 2^11
|
||||
# 22, 4096, 2^12
|
||||
def main():
|
||||
N = 16
|
||||
sample_size = 128
|
||||
sample_ids = set()
|
||||
samples = []
|
||||
|
||||
dist = compute_distributions(N)
|
||||
print(dist)
|
||||
|
||||
for i in range(0, sample_size):
|
||||
x = random_x(N)
|
||||
y = int(xor(x))
|
||||
p = Point(x, y)
|
||||
p_id = p.id()
|
||||
if p_id in sample_ids:
|
||||
continue
|
||||
sample_ids.add(p_id)
|
||||
samples.append(p)
|
||||
total_sample_count = len(samples)
|
||||
|
||||
# for i in range(0, 2**N):
|
||||
# x = decode(i, N)
|
||||
# y = int(xor(x))
|
||||
# samples.append(Point(x,y))
|
||||
|
||||
base = np.zeros(N)
|
||||
current = np.zeros(N)
|
||||
cumulative_probability = np.ones(N)
|
||||
|
||||
for _ in range(0, N):
|
||||
lowest_err = -1
|
||||
use_flip = -1
|
||||
for flip in range(-1, N):
|
||||
coherent_distances = np.zeros(N+1)
|
||||
incoherent_distances = np.zeros(N+1)
|
||||
all_coherent = True
|
||||
for i in range(0, len(samples)):
|
||||
a = samples[i]
|
||||
for j in range(0, len(samples)):
|
||||
b = samples[j]
|
||||
distance = hamming_distance(a, b)
|
||||
is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)
|
||||
if is_coherent:
|
||||
coherent_distances[distance] += 1
|
||||
else:
|
||||
incoherent_distances[distance] += 1
|
||||
all_coherent = False
|
||||
if all_coherent:
|
||||
print('Flip and halt', flip)
|
||||
return
|
||||
# print(coherent_distances, incoherent_distances)
|
||||
|
||||
# print(coherent_distances, incoherent_distances)
|
||||
est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances))
|
||||
# print(est_incoherence)
|
||||
|
||||
for k in range(0, N):
|
||||
known_incoherence_at_k = dist[k]
|
||||
err = 0
|
||||
# denom = 0
|
||||
probability = 1.0
|
||||
for i in range(1, N + 1):
|
||||
if isnan(est_incoherence[i]):
|
||||
continue
|
||||
sample_size = coherent_distances[i] + incoherent_distances[i]
|
||||
full_size = math.comb(N, i) * (2 ** N)
|
||||
num_unknowns = full_size - sample_size
|
||||
min_true_value = incoherent_distances[i] / full_size
|
||||
max_true_value = (incoherent_distances[i] + num_unknowns) / full_size
|
||||
s = max(abs(est_incoherence[i] - min_true_value), abs(est_incoherence[i] - max_true_value))
|
||||
u = est_incoherence[i]
|
||||
known_incoherence = known_incoherence_at_k[i - 1]
|
||||
err = raised_cosine(known_incoherence, u, s)
|
||||
probability *= err
|
||||
|
||||
# print(k, i, min_true_value, max_true_value)
|
||||
|
||||
# confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative
|
||||
# err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence
|
||||
# denom += 1
|
||||
# print(flip, k, err)
|
||||
# err /= denom
|
||||
if flip < 0:
|
||||
base[k] = probability
|
||||
else:
|
||||
current[k] = probability
|
||||
|
||||
if flip >= 0:
|
||||
if np.sum(current) == 0:
|
||||
continue
|
||||
np.divide(current, np.sum(current), current)
|
||||
# print(current)
|
||||
# temp = np.roll(cumulative_probability, -1)
|
||||
# temp[-1] = 1.0
|
||||
# np.multiply(current, temp, current)
|
||||
# np.divide(current, np.sum(current), current)
|
||||
p_forward = 0
|
||||
p_backward = 0
|
||||
for i in range(1, N):
|
||||
p_forward += cumulative_probability[i] * current[i - 1]
|
||||
for i in range(0, N - 1):
|
||||
p_backward += cumulative_probability[i] * current[i + 1]
|
||||
|
||||
# base_index = average_index(cumulative_probability)
|
||||
# new_index = average_index(current)
|
||||
# if isnan(new_index):
|
||||
# continue
|
||||
# np.divide(current, np.sum(current), current)
|
||||
# np.subtract(1, current, current)
|
||||
print(flip,p_forward,p_backward,current)
|
||||
delta = p_forward - p_backward
|
||||
if use_flip < 0 or delta > lowest_err:
|
||||
use_flip = flip
|
||||
lowest_err = delta
|
||||
|
||||
# for k in range(0, N - 1):
|
||||
# value = current[k] * cumulative_probability[k + 1]
|
||||
# if use_flip < 0 or value > lowest_err:
|
||||
# use_flip = flip
|
||||
# lowest_err = value
|
||||
# print(flip, highest_value)
|
||||
else:
|
||||
np.divide(base, np.sum(base), base)
|
||||
# np.subtract(1, base, base)
|
||||
# print(cumulative_probability)
|
||||
cumulative_probability = np.roll(cumulative_probability, -1)
|
||||
cumulative_probability[-1] = 1.0
|
||||
# print(cumulative_probability)
|
||||
# print(base)
|
||||
np.multiply(base, cumulative_probability, cumulative_probability)
|
||||
np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
|
||||
print(cumulative_probability)
|
||||
|
||||
if use_flip < 0:
|
||||
return
|
||||
|
||||
print('Flip', use_flip, lowest_err)
|
||||
for p in samples:
|
||||
if p.x[use_flip]:
|
||||
p.y ^= 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
761
mutations23.py
Normal file
761
mutations23.py
Normal file
@ -0,0 +1,761 @@
|
||||
from cmath import isnan
|
||||
import numpy as np
|
||||
import random
|
||||
import hashlib
|
||||
import math
|
||||
|
||||
def get_state_id(state):
|
||||
return ','.join([str(x) for x in sorted(state)])
|
||||
|
||||
class Point():
|
||||
def __init__(self, x, y):
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def id(self):
|
||||
return ','.join([str(int(x)) for x in self.x])
|
||||
|
||||
class Influence():
|
||||
def __init__(self, a, b):
|
||||
self.a = a
|
||||
self.b = b
|
||||
self.original_dof = set()
|
||||
self.dof = set()
|
||||
for i in range(0, len(a.x)):
|
||||
if a.x[i] != b.x[i]:
|
||||
self.original_dof.add(i)
|
||||
self.dof.add(i)
|
||||
|
||||
def coherent(self):
|
||||
return self.a.y == self.b.y
|
||||
|
||||
def id(self):
|
||||
return ','.join(sorted([self.a.id(), self.b.id()]))
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def decode(x, N):
|
||||
index = 0
|
||||
output = np.zeros((N))
|
||||
while x > 0 and index < N:
|
||||
output[index] = x & 0b1
|
||||
x >>= 1
|
||||
index += 1
|
||||
return output
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def hamming_distance(a, b):
|
||||
return np.sum(np.logical_xor(a.x, b.x))
|
||||
|
||||
def random_x(N):
|
||||
x = np.zeros((N))
|
||||
for i in range(0, N):
|
||||
x[i] = random.randint(0, 1)
|
||||
return x
|
||||
|
||||
def xor(x):
|
||||
# return sum(x) % 2
|
||||
half = int(len(x) / 2)
|
||||
return sum(x[:half]) % 2
|
||||
|
||||
def create_dof_map(influences):
|
||||
dof_map = {}
|
||||
for influence in influences:
|
||||
for i in influence.dof:
|
||||
if not i in dof_map:
|
||||
dof_map[i] = []
|
||||
dof_map[i].append(influence)
|
||||
return dof_map
|
||||
|
||||
def flip(influences, i):
|
||||
for influence in influences:
|
||||
if i in influence.dof:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
|
||||
def remove_dof(dof_map, i, flip = False):
|
||||
for influence in dof_map[i]:
|
||||
influence.dof.remove(i)
|
||||
if flip:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
# if len(influence.dof) == 0 and not influence.coherent():
|
||||
# raise Exception('Invalid')
|
||||
del dof_map[i]
|
||||
|
||||
def solve(dof_map, all_influences, all_samples):
|
||||
eliminated = True
|
||||
while eliminated:
|
||||
eliminated = False
|
||||
for influence in all_influences:
|
||||
if len(influence.dof) == 1:
|
||||
i = next(iter(influence.dof))
|
||||
if influence.coherent:
|
||||
remove_dof(dof_map, i)
|
||||
eliminated = True
|
||||
else:
|
||||
print('Forced', i)
|
||||
remove_dof(dof_map, i, True)
|
||||
eliminated = True
|
||||
|
||||
lowest_dof = None
|
||||
for influence in all_influences:
|
||||
if not influence.coherent and len(influence.dof) > 1:
|
||||
if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
|
||||
lowest_dof = influence
|
||||
|
||||
flip = None
|
||||
highest_score = -1
|
||||
|
||||
for i in lowest_dof.dof:
|
||||
per_point_scores = {}
|
||||
i_influences = dof_map[i]
|
||||
left = 0
|
||||
right = 0
|
||||
for influence in i_influences:
|
||||
if not influence.a in per_point_scores:
|
||||
per_point_scores[influence.a] = [0, 0]
|
||||
if not influence.b in per_point_scores:
|
||||
per_point_scores[influence.b] = [0, 0]
|
||||
if influence.coherent:
|
||||
per_point_scores[influence.a][0] += 1
|
||||
per_point_scores[influence.b][0] += 1
|
||||
left += 1
|
||||
else:
|
||||
per_point_scores[influence.a][1] += 1
|
||||
per_point_scores[influence.b][1] += 1
|
||||
right += 1
|
||||
print(i, left / (left + right))
|
||||
num = 0
|
||||
denom = 0
|
||||
for _, score in per_point_scores.items():
|
||||
if score[0] == score[1]:
|
||||
continue
|
||||
print(i, score)
|
||||
num += score[1] / (score[0] + score[1])
|
||||
denom += 1
|
||||
score = num / denom if denom > 0 else 0
|
||||
print(score)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# 1st row (n+1 choose k+1) * (1-(k mod 2))
|
||||
# psuedopascal to compute the follow-on rows
|
||||
# assuming solvability, we want to maximize the probability that our current state and our state with
|
||||
# a particular single flip are one order apart in the correct direction
|
||||
|
||||
|
||||
|
||||
# 2, 0
|
||||
# 2, 2, 0
|
||||
# 2, 4, 2, 0
|
||||
# 2, 6, 6, 2, 0
|
||||
# 2, 8,12, 8, 2, 0
|
||||
# 2,10,20,20,10, 2, 0
|
||||
|
||||
# 3,-9,19,-33,51,-73,99
|
||||
# 3,-6,10,-14,18,-22,26
|
||||
# 3,-3, 4, -4, 4, -4, 4
|
||||
# 3, 0, 1, 0, 0, 0, 0
|
||||
# 3, 3, 1, 1, 0, 0, 0
|
||||
# 3, 6, 4, 2, 1, 0, 0
|
||||
# 3, 9,10, 6, 3, 1, 0
|
||||
|
||||
# 4, 0, 4, 0
|
||||
# 4, 4, 4, 4, 0
|
||||
# 4, 8, 8, 8, 4, 0
|
||||
# 4,12,16,16,12, 4, 0
|
||||
|
||||
# 5, 0,10, 0, 1
|
||||
# 5, 5,10,10, 1, 1
|
||||
# 5,
|
||||
# 5,
|
||||
|
||||
|
||||
|
||||
# 3
|
||||
#
|
||||
# @1 [1, 2, 1]
|
||||
# @2 [2, 2, 0]
|
||||
# @3 [3, 0, 1]
|
||||
|
||||
# 5 [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)
|
||||
#
|
||||
# @1 [1, 4, 6, 4, 1], [4, 6, 4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),
|
||||
# @2 [2, 6, 6, 2, 0], [3, 4, 4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)
|
||||
# @3 [3, 6, 4, 2, 1], [2, 4, 6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)
|
||||
# @4 [4, 4, 4, 4, 0], [1, 6, 6, 1, 1] - 16, 15 -
|
||||
# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 -
|
||||
|
||||
# @0 [0.0, 0.0, 0.0, 0.0, 0.0]
|
||||
# @1 [0.2, 0.4, 0.6, 0.8, 1.0]
|
||||
# @2 [0.4, 0.6, 0.6, 0.4, 0.0]
|
||||
# @3 [0.6, 0.6, 0.4, 0.4, 1.0]
|
||||
# @4 [0.8, 0.4, 0.4, 0.8, 0.0]
|
||||
# @5 [1.0, 0.0, 1.0, 0.0, 1.0]
|
||||
|
||||
# 6
|
||||
#
|
||||
# @1 [1, 5, 10, 10, 5, 1]
|
||||
# @2 [2, 8, 12, 8, 2, 0]
|
||||
# @3 [3, 9, 10, 6, 3, 1]
|
||||
# @4 [4, 8, 8, 8, 4, 0]
|
||||
# @5 [5, 5, 10, 10, 1, 1]
|
||||
# @6 [6, 0, 20, 0, 6, 0]
|
||||
|
||||
# last row, 1 if odd, 0 if even
|
||||
# second to last, subtract 2 on odds, add 2 on evens
|
||||
|
||||
def compute_pseudopascal(N):
|
||||
dist = np.zeros((N, N))
|
||||
for j in range(0, N):
|
||||
dist[0][j] = math.comb(N - 1, j)
|
||||
dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
|
||||
for i in range(1, N):
|
||||
for j in range(0, i + 1):
|
||||
dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
|
||||
for k in range(i + 1, N):
|
||||
for j in reversed(range(0, k)):
|
||||
dist[i][j+1] = dist[i][j] + dist[i][j+1]
|
||||
return dist
|
||||
|
||||
def compute_distributions(N):
|
||||
dist = compute_pseudopascal(N)
|
||||
print(dist)
|
||||
for i in range(0, N):
|
||||
for j in range(0, N):
|
||||
denom = math.comb(N, j+1)
|
||||
dist[i][j] /= denom
|
||||
return dist
|
||||
|
||||
def confusion_probabilities(N, samples):
|
||||
sample_sizes = np.zeros(N)
|
||||
for i in range(0, len(samples)):
|
||||
a = samples[i]
|
||||
for j in range(0, len(samples)):
|
||||
b = samples[j]
|
||||
if i == j:
|
||||
continue
|
||||
distance = hamming_distance(a, b)
|
||||
sample_sizes[distance - 1] += 1
|
||||
|
||||
confusion = np.zeros((N, N))
|
||||
dist = compute_pseudopascal(N)
|
||||
np.multiply(dist, 2 ** N, dist)
|
||||
# These are the probabilities that we might mix up any two orders given a particular sample size
|
||||
for i in range(0, N):
|
||||
for j in range(0, N):
|
||||
probability = 1.0
|
||||
for k in range(0, N):
|
||||
full_size = math.comb(N, k+1) * (2 ** N)
|
||||
sample_size = sample_sizes[k]
|
||||
num_unknowns = full_size - sample_size
|
||||
i_incoherent = dist[i][k]
|
||||
# Worst case, we sample only the coherent points,
|
||||
i_min = max(i_incoherent - num_unknowns, 0) / full_size
|
||||
i_max = min(sample_size, i_incoherent) / full_size
|
||||
u = i_min + i_max / 2
|
||||
s = (i_max - i_min) / 2
|
||||
probability *= raised_cosine(dist[j][k] / full_size, u, s)
|
||||
confusion[i][j] = probability
|
||||
return confusion
|
||||
|
||||
def raised_cosine(x, u, s):
|
||||
if x < (u - s):
|
||||
return 0
|
||||
if x > (u + s):
|
||||
return 0
|
||||
return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s))
|
||||
|
||||
# Probability of getting k red balls after drawing n from a bag with m total balls and j red balls in it
|
||||
# (n choose k) * p^k * (1-p)^(n-k)
|
||||
|
||||
# p/m chance of getting a red ball
|
||||
# (1 - p/m) chance of not getting a red ball
|
||||
|
||||
# One way (p/m) * ((p-1)/(m-1)) * ((p-2)/(m-2))
|
||||
# (1 - (p/m))
|
||||
|
||||
def p_bernoulli(n, k, m, j):
|
||||
probabilities = np.zeros((n + 1, n + 1))
|
||||
probabilities.fill(-1)
|
||||
# if n == k:
|
||||
# return 1.0
|
||||
# if k > p:
|
||||
# return 0.0
|
||||
stack = [(0,0)]
|
||||
while len(stack) > 0:
|
||||
(a, b) = stack.pop()
|
||||
if a + b == n:
|
||||
probabilities[a][b] = 1 if a == k else 0
|
||||
elif a > j:
|
||||
probabilities[a][b] = 0
|
||||
elif b > (m - j):
|
||||
probabilities[a][b] = 0
|
||||
else:
|
||||
p_left = probabilities[a + 1][b]
|
||||
p_right = probabilities[a][b + 1]
|
||||
if p_left >= 0 and p_right >= 0:
|
||||
p = (j - a) / (m - a - b)
|
||||
probabilities[a][b] = p_left * p + p_right * (1 - p)
|
||||
else:
|
||||
stack.append((a, b))
|
||||
if p_left < 0:
|
||||
stack.append((a + 1, b))
|
||||
if p_right < 0:
|
||||
stack.append((a, b + 1))
|
||||
return probabilities[0][0]
|
||||
|
||||
# P = 1.0
|
||||
# p_k = 0
|
||||
# p_nk = 0
|
||||
# for i in range(1, k + 1):
|
||||
# P *= (n + 1 - i) / i
|
||||
# while P > 1.0 and p_k < k:
|
||||
# P *= p
|
||||
# p_k += 1
|
||||
# while P > 1.0 and p_nk < (n - k):
|
||||
# P *= (1 - p)
|
||||
# p_nk += 1
|
||||
# while p_k < k:
|
||||
# P *= p
|
||||
# p_k += 1
|
||||
# while (p_nk < (n - k)):
|
||||
# P *= (1 - p)
|
||||
# p_nk += 1
|
||||
# return P
|
||||
|
||||
def average_index(x):
|
||||
total = 0
|
||||
for k in range(0, len(x)):
|
||||
total += k * x[k]
|
||||
return total / np.sum(x)
|
||||
|
||||
def compute_cumulative_probability(N, bases, p_n):
|
||||
# p_n = np.zeros(N)
|
||||
# p_n.fill(0.5)
|
||||
states = [[]]
|
||||
flips = set()
|
||||
for i in range(1, len(bases)):
|
||||
# (base, _) = bases[i]
|
||||
(_, flip) = bases[i]
|
||||
# p_forward = 0
|
||||
# p_backward = 0
|
||||
# for k in range(0, N - 1):
|
||||
# p_forward += base[k + 1] * next_p[k]
|
||||
# p_backward += base[k] * next_p[k + 1]
|
||||
if flip in flips:
|
||||
# p_n[flip] -= p_forward
|
||||
# p_n[flip] += p_backward
|
||||
flips.remove(flip)
|
||||
else:
|
||||
# p_n[flip] += p_forward
|
||||
# p_n[flip] -= p_backward
|
||||
flips.add(flip)
|
||||
states.append(flips.copy())
|
||||
# np.clip(p_n, 0, 1, p_n)
|
||||
# print('Contribution probabilities', p_n)
|
||||
|
||||
min_p_n = np.min(p_n)
|
||||
max_p_n = np.max(p_n)
|
||||
|
||||
|
||||
p_k = np.zeros(N)
|
||||
for k in range(0, N):
|
||||
stack = [(k, len(bases) - 1)]
|
||||
probabilities = np.zeros((N, len(bases)))
|
||||
probabilities.fill(-1)
|
||||
while len(stack) > 0:
|
||||
(i, base_index) = stack.pop()
|
||||
(base, flip) = bases[base_index]
|
||||
if base_index == 0:
|
||||
probabilities[i, 0] = base[i]
|
||||
else:
|
||||
left = i - 1
|
||||
right = i + 1
|
||||
state = states[base_index - 1]
|
||||
p_flip = max(min(p_n[flip] + 0.5, 1.0), 0)
|
||||
if flip in state:
|
||||
p_flip = 1 - p_flip
|
||||
p_left = probabilities[left, base_index - 1] if left >= 0 else 0
|
||||
p_right = probabilities[right, base_index - 1] if right < N else 0
|
||||
if p_left >= 0 and p_right >= 0:
|
||||
probabilities[i, base_index] = base[i] * p_left * (1 - p_flip) + base[i] * p_right * p_flip
|
||||
else:
|
||||
stack.append((i, base_index))
|
||||
if p_left < 0:
|
||||
stack.append((left, base_index - 1))
|
||||
if p_right < 0:
|
||||
stack.append((right, base_index - 1))
|
||||
p_k[k] = probabilities[k][-1]
|
||||
np.divide(p_k, np.sum(p_k), p_k)
|
||||
return p_k
|
||||
|
||||
# 8, 32, 2^5
|
||||
# 10, 64, 2^6
|
||||
# 12, 128, 2^7
|
||||
# 14, 256, 2^8
|
||||
# 16, 512, 2^9
|
||||
# 18, 1024, 2^10
|
||||
# 20, 2048, 2^11
|
||||
# 22, 4096, 2^12
|
||||
def main():
|
||||
N = 8
|
||||
sample_size = 16
|
||||
sample_ids = set()
|
||||
samples = []
|
||||
|
||||
dist = compute_pseudopascal(N)
|
||||
print(dist)
|
||||
|
||||
for i in range(0, sample_size):
|
||||
x = random_x(N)
|
||||
y = int(xor(x))
|
||||
p = Point(x, y)
|
||||
p_id = p.id()
|
||||
if p_id in sample_ids:
|
||||
continue
|
||||
sample_ids.add(p_id)
|
||||
samples.append(p)
|
||||
# confusion = confusion_probabilities(N, samples)
|
||||
# print(confusion)
|
||||
# return
|
||||
|
||||
# for i in range(0, 2**N):
|
||||
# x = decode(i, N)
|
||||
# y = int(xor(x))
|
||||
# samples.append(Point(x,y))
|
||||
|
||||
base = np.zeros(N)
|
||||
current = np.zeros(N)
|
||||
cumulative_probability = np.ones(N)
|
||||
flip_likelihood = np.zeros(N)
|
||||
cumulative_deltas = np.zeros(N)
|
||||
direction = -1
|
||||
flips = set()
|
||||
bases = []
|
||||
last_flip = -1
|
||||
|
||||
for _ in range(0, 2 ** N):
|
||||
lowest_err = -1
|
||||
use_flip = -1
|
||||
for flip in range(-1, N):
|
||||
coherent_distances = np.zeros((len(samples), N+1))
|
||||
incoherent_distances = np.zeros((len(samples), N+1))
|
||||
all_coherent = True
|
||||
for i in range(0, len(samples)):
|
||||
a = samples[i]
|
||||
for j in range(0, len(samples)):
|
||||
b = samples[j]
|
||||
distance = hamming_distance(a, b)
|
||||
is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)
|
||||
if is_coherent:
|
||||
coherent_distances[i][distance] += 1
|
||||
else:
|
||||
incoherent_distances[i][distance] += 1
|
||||
all_coherent = False
|
||||
if all_coherent:
|
||||
print('Flip and halt', flip)
|
||||
return
|
||||
# print(coherent_distances, incoherent_distances)
|
||||
|
||||
# print(coherent_distances, incoherent_distances)
|
||||
# est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances))
|
||||
# print(est_incoherence)
|
||||
|
||||
probability = np.ones(N)
|
||||
np.divide(probability, np.sum(probability), probability)
|
||||
components = []
|
||||
for i in range(0, len(samples)):
|
||||
for j in range(1, N + 1):
|
||||
p_k = np.zeros(N)
|
||||
# confusion = np.zeros((N, N))
|
||||
n = coherent_distances[i][j] + incoherent_distances[i][j]
|
||||
if n == 0:
|
||||
continue
|
||||
a = incoherent_distances[i][j]
|
||||
t = math.comb(N, j)
|
||||
# for k in range(0, N):
|
||||
# p = dist[k][j - 1]
|
||||
# a_ideal = round(p * n / t)
|
||||
# # base_prob = p_bernoulli(int(n), a_ideal, t, int(p))
|
||||
# for q in range(0, N):
|
||||
# u = dist[q][j - 1]
|
||||
# p_ratio = p / t
|
||||
# u_ratio = u / t
|
||||
# confusion[k][q] = p_bernoulli(int(n), a_ideal, t, int(u))
|
||||
# np.divide(confusion, np.max(confusion, axis=0), confusion)
|
||||
|
||||
for k in range(0, N):
|
||||
p = dist[k][j - 1]
|
||||
a_ideal = round(p * n / t)
|
||||
# How likely are we to correctly identify an ideal sample?
|
||||
# for q in range(0, N):
|
||||
p_ideal = p_bernoulli(int(n), a_ideal, t, int(p))
|
||||
# P = math.comb(int(n), int(a)) * math.pow(p, int(a)) * math.pow(1 - p, int(n - a))
|
||||
p_k[k] = p_bernoulli(int(n), int(a), t, int(p))# * (n / t)
|
||||
# p_bernoulli(int(n), int(a), math.comb(N, j), int(p))
|
||||
# probability *= P
|
||||
components.append(p_k)
|
||||
np.divide(p_k, np.sum(p_k), p_k)
|
||||
np.multiply(probability, p_k, probability)
|
||||
np.divide(probability, np.sum(probability), probability)
|
||||
|
||||
# p_cross_k is the probability that we correctly identified at k
|
||||
# plus the probabilities that we missidentify at q and it is actually k
|
||||
|
||||
# probability of drawing from sample k = p_bernoulli
|
||||
|
||||
# p_cross_k = np.zeros(N)
|
||||
# for k in range(0, N):
|
||||
# for q in range(0, N):
|
||||
# p_cross_k[k] += p_k[q] * confusion[k][q]
|
||||
# if k == q:
|
||||
# continue
|
||||
# p_cross_k[k] += (1 - p_k[k]) * p_k[q] * confusion[k][q]
|
||||
# p_cross_k[k] -= (1 - p_k[q]) * p_k[k] * confusion[q][k]
|
||||
|
||||
# if q == k:
|
||||
# continue
|
||||
# p_cross_k[k] += (1 - p_k[k]) * p_k[q] * confusion[k][q]
|
||||
# p_cross_k[k] -= (1 - p_k[k])
|
||||
# p_cross_k[k] -= p_k[k] * (1 - confusion[k][k]) * confusion[q][k]
|
||||
|
||||
|
||||
# for k in range(0, N):
|
||||
# P = p_k[k]
|
||||
# for m in range(0, N):
|
||||
# if m == k:
|
||||
# continue
|
||||
# if p_k[m] == 0:
|
||||
# continue
|
||||
# P /= p_k[m]
|
||||
# p_cross_k[k] = P
|
||||
# min_value = np.min(p_cross_k)
|
||||
# np.subtract(p_cross_k, min_value, p_cross_k)
|
||||
# np.add(probability, p_cross_k, probability)
|
||||
# total = np.sum(p_k)
|
||||
# if total > 0:
|
||||
# np.divide(p_k, total, p_k)
|
||||
# np.multiply(p_k, probability, probability)
|
||||
# np.divide(probability, np.sum(probability), probability)
|
||||
# print(probability)
|
||||
|
||||
|
||||
np.divide(probability, np.sum(probability), probability)
|
||||
if flip < 0:
|
||||
np.copyto(base, probability)
|
||||
else:
|
||||
np.copyto(current, probability)
|
||||
|
||||
|
||||
# print(k, i, min_true_value, max_true_value)
|
||||
|
||||
# confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative
|
||||
# err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence
|
||||
# denom += 1
|
||||
# print(flip, k, err)
|
||||
# err /= denom
|
||||
# if flip < 0:
|
||||
# base[k] = probability
|
||||
# else:
|
||||
# current[k] = probability
|
||||
|
||||
if flip >= 0:
|
||||
if np.sum(current) == 0:
|
||||
continue
|
||||
np.divide(current, np.sum(current), current)
|
||||
|
||||
base_mean_index = average_index(base)
|
||||
base_variance = 0
|
||||
for i in range(0, N):
|
||||
base_variance += base[i] * (base_mean_index - i) ** 2
|
||||
base_err = 0
|
||||
norm = np.zeros(N)
|
||||
for i in range(0, N):
|
||||
norm[i] = 1 / (base_variance * math.sqrt(2 * math.pi)) * math.exp(-1 / 2 * ((i - base_mean_index) / base_variance) ** 2)
|
||||
np.divide(norm, np.sum(norm), norm)
|
||||
for i in range(0, N):
|
||||
base_err += (base[i] - norm[i]) ** 2
|
||||
|
||||
current_mean_index = average_index(current)
|
||||
current_variance = 0
|
||||
for i in range(0, N):
|
||||
current_variance += current[i] * (current_mean_index - i) ** 2
|
||||
current_err = 0
|
||||
for i in range(0, N):
|
||||
norm[i] = 1 / (current_variance * math.sqrt(2 * math.pi)) * math.exp(-1 / 2 * ((i - current_mean_index) / current_variance) ** 2)
|
||||
np.divide(norm, np.sum(norm), norm)
|
||||
for i in range(0, N):
|
||||
current_err += (current[i] - norm[i]) ** 2
|
||||
|
||||
delta = abs(1 - (base_mean_index - current_mean_index))
|
||||
print(flip, current)
|
||||
print('Mean', current_mean_index, base_mean_index)
|
||||
print('Variance', current_variance, base_variance)
|
||||
print('Err', current_err, base_err)
|
||||
score = current_variance
|
||||
|
||||
# base_score = 0
|
||||
# for i in range(0, N):
|
||||
# base_score += (base[round(base_mean_index)] - base[i]) ** 2
|
||||
|
||||
# score = 0
|
||||
# for i in range(0, N):
|
||||
# score += (current[round(current_mean_index)] - current[i]) ** 2
|
||||
# print('Score', score, base_score)
|
||||
|
||||
# print(current)
|
||||
# temp = np.roll(cumulative_probability, -1)
|
||||
# temp[-1] = 1.0
|
||||
# np.multiply(current, temp, current)
|
||||
# np.divide(current, np.sum(current), current)
|
||||
# p_forward = 0
|
||||
# p_backward = 0
|
||||
# for i in range(1, N):
|
||||
# p_forward += base[i] * current[i - 1]
|
||||
# for i in range(0, N - 1):
|
||||
# p_backward += base[i] * current[i + 1]
|
||||
# scale = 0.01
|
||||
# if flip in flips:
|
||||
# flip_likelihood[flip] += scale * p_backward
|
||||
# flip_likelihood[flip] -= scale * p_forward
|
||||
# else:
|
||||
# flip_likelihood[flip] -= scale * p_backward
|
||||
# flip_likelihood[flip] += scale * p_forward
|
||||
# delta = p_forward - p_backward
|
||||
# print(flip, current, p_forward, p_backward)
|
||||
# base_index = average_index(base)
|
||||
# current_index = average_index(current)
|
||||
# err = abs(1 - (base_index - current_index))
|
||||
# print(base_index, current_index, err)
|
||||
|
||||
# base_index = average_index(cumulative_probability)
|
||||
# new_index = average_index(current)
|
||||
# if isnan(new_index):
|
||||
# continue
|
||||
# np.divide(current, np.sum(current), current)
|
||||
# np.subtract(1, current, current)
|
||||
# print(flip,p_forward,p_backward,current)
|
||||
if use_flip < 0 or delta < lowest_err:
|
||||
use_flip = flip
|
||||
lowest_err = score
|
||||
|
||||
# cumulative_deltas[flip] += 0
|
||||
|
||||
# for k in range(0, N - 1):
|
||||
# value = current[k] * cumulative_probability[k + 1]
|
||||
# if use_flip < 0 or value > lowest_err:
|
||||
# use_flip = flip
|
||||
# lowest_err = value
|
||||
# print(flip, highest_value)
|
||||
else:
|
||||
# p_next = np.zeros(N)
|
||||
# for i in range(0, N):
|
||||
# P = 0.0
|
||||
# for j in range(0, N):
|
||||
# if i == j:
|
||||
# continue
|
||||
# P += base[i] * (1 - base[j])
|
||||
# p_next[i] = P
|
||||
# base = p_next
|
||||
|
||||
# base[0] = 0
|
||||
np.divide(base, np.sum(base), base)
|
||||
bases.append((base.copy(), last_flip))
|
||||
# bases.insert(0, base.copy())
|
||||
# cumulative_probability = compute_cumulative_probability(N, bases)
|
||||
# p_forward = 0
|
||||
# p_backward = 0
|
||||
# for i in range(1, N):
|
||||
# p_forward += cumulative_probability[i] * base[i - 1]
|
||||
# for i in range(0, N - 1):
|
||||
# p_backward += cumulative_probability[i] * base[i + 1]
|
||||
print('Base', base)
|
||||
# # # np.subtract(1, base, base)
|
||||
# # # print(cumulative_probability)
|
||||
# shift_left = np.roll(cumulative_probability, -1)
|
||||
# shift_left[-1] = 0.0
|
||||
# # # # print('Shift Left', p_forward, shift_left)
|
||||
# shift_right = np.roll(cumulative_probability, 1)
|
||||
# shift_right[0] = 0.0
|
||||
# # # # print('Shift Right', p_backward, shift_right)
|
||||
# p_next = np.add(np.multiply(shift_left, 0.5), np.multiply(shift_right, 0.5))
|
||||
# p_next[0] = 0
|
||||
# np.divide(p_next, np.sum(p_next), p_next)
|
||||
# # # # print('Next', p_next)
|
||||
# # # # # print(cumulative_probability)
|
||||
# # # # # print(base)
|
||||
# np.multiply(base, p_next, cumulative_probability)
|
||||
# cumulative_probability[0] = 0
|
||||
# # # # # np.multiply(cumulative_probability, shift_right, cumulative_probability)
|
||||
# np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
|
||||
cumulative_probability = compute_cumulative_probability(N, bases, flip_likelihood)
|
||||
print('Cumulative', cumulative_probability)
|
||||
print('Likelihood', flip_likelihood)
|
||||
|
||||
# cumulative_probability[0] = 0
|
||||
# use_flip = -1
|
||||
# if direction < 0:
|
||||
# use_flip = np.argmax(cumulative_deltas)
|
||||
# if cumulative_deltas[use_flip] < 0:
|
||||
# use_flip = np.argmin(cumulative_deltas)
|
||||
# direction = 1
|
||||
# # cumulative_deltas.fill(0)
|
||||
# else:
|
||||
# use_flip = np.argmin(cumulative_deltas)
|
||||
# if cumulative_deltas[use_flip] > 0:
|
||||
# use_flip = np.argmax(cumulative_deltas)
|
||||
# direction = -1
|
||||
# # cumulative_deltas.fill(0)
|
||||
# if direction < 0:
|
||||
# cumulative_probability[0] = 0
|
||||
# else:
|
||||
# cumulative_probability[-1] = 0
|
||||
# np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
|
||||
# print(cumulative_deltas)
|
||||
|
||||
# use_flip = -1
|
||||
# highest_p = 0
|
||||
# for i in range(0, N):
|
||||
# p = flip_likelihood[i]
|
||||
# if i in flips:
|
||||
# p = -p
|
||||
# if use_flip < 0 or p > highest_p:
|
||||
# use_flip = i
|
||||
# highest_p = p
|
||||
# if not use_flip in flips and highest_p < 0 or use_flip in flips and highest_p > 0:
|
||||
# flip_likelihood[use_flip] *= -1.0
|
||||
|
||||
if use_flip < 0:
|
||||
return
|
||||
last_flip = use_flip
|
||||
if use_flip in flips:
|
||||
flips.remove(use_flip)
|
||||
else:
|
||||
flips.add(use_flip)
|
||||
print('Flip', use_flip, lowest_err)
|
||||
print(flips)
|
||||
cumulative_deltas[use_flip] = -cumulative_deltas[use_flip]
|
||||
for p in samples:
|
||||
if p.x[use_flip]:
|
||||
p.y ^= 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
656
mutations24.py
Normal file
656
mutations24.py
Normal file
@ -0,0 +1,656 @@
|
||||
from cmath import isnan
|
||||
import numpy as np
|
||||
import random
|
||||
import hashlib
|
||||
import math
|
||||
|
||||
def get_state_id(state):
|
||||
return ','.join([str(x) for x in sorted(state)])
|
||||
|
||||
class Point():
|
||||
def __init__(self, x, y):
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def id(self):
|
||||
return ','.join([str(int(x)) for x in self.x])
|
||||
|
||||
class Influence():
|
||||
def __init__(self, a, b):
|
||||
self.a = a
|
||||
self.b = b
|
||||
self.original_dof = set()
|
||||
self.dof = set()
|
||||
for i in range(0, len(a.x)):
|
||||
if a.x[i] != b.x[i]:
|
||||
self.original_dof.add(i)
|
||||
self.dof.add(i)
|
||||
|
||||
def coherent(self):
|
||||
return self.a.y == self.b.y
|
||||
|
||||
def id(self):
|
||||
return ','.join(sorted([self.a.id(), self.b.id()]))
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def decode(x, N):
|
||||
index = 0
|
||||
output = np.zeros((N))
|
||||
while x > 0 and index < N:
|
||||
output[index] = x & 0b1
|
||||
x >>= 1
|
||||
index += 1
|
||||
return output
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def hamming_distance(a, b):
|
||||
return np.sum(np.logical_xor(a.x, b.x))
|
||||
|
||||
def random_x(N):
|
||||
x = np.zeros((N))
|
||||
for i in range(0, N):
|
||||
x[i] = random.randint(0, 1)
|
||||
return x
|
||||
|
||||
def xor(x):
|
||||
# return sum(x) % 2
|
||||
half = int(len(x) / 2)
|
||||
return sum(x[:half]) % 2
|
||||
|
||||
def create_dof_map(influences):
|
||||
dof_map = {}
|
||||
for influence in influences:
|
||||
for i in influence.dof:
|
||||
if not i in dof_map:
|
||||
dof_map[i] = []
|
||||
dof_map[i].append(influence)
|
||||
return dof_map
|
||||
|
||||
def flip(influences, i):
|
||||
for influence in influences:
|
||||
if i in influence.dof:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
|
||||
def remove_dof(dof_map, i, flip = False):
|
||||
for influence in dof_map[i]:
|
||||
influence.dof.remove(i)
|
||||
if flip:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
# if len(influence.dof) == 0 and not influence.coherent():
|
||||
# raise Exception('Invalid')
|
||||
del dof_map[i]
|
||||
|
||||
def solve(dof_map, all_influences, all_samples):
|
||||
eliminated = True
|
||||
while eliminated:
|
||||
eliminated = False
|
||||
for influence in all_influences:
|
||||
if len(influence.dof) == 1:
|
||||
i = next(iter(influence.dof))
|
||||
if influence.coherent:
|
||||
remove_dof(dof_map, i)
|
||||
eliminated = True
|
||||
else:
|
||||
print('Forced', i)
|
||||
remove_dof(dof_map, i, True)
|
||||
eliminated = True
|
||||
|
||||
lowest_dof = None
|
||||
for influence in all_influences:
|
||||
if not influence.coherent and len(influence.dof) > 1:
|
||||
if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
|
||||
lowest_dof = influence
|
||||
|
||||
flip = None
|
||||
highest_score = -1
|
||||
|
||||
for i in lowest_dof.dof:
|
||||
per_point_scores = {}
|
||||
i_influences = dof_map[i]
|
||||
left = 0
|
||||
right = 0
|
||||
for influence in i_influences:
|
||||
if not influence.a in per_point_scores:
|
||||
per_point_scores[influence.a] = [0, 0]
|
||||
if not influence.b in per_point_scores:
|
||||
per_point_scores[influence.b] = [0, 0]
|
||||
if influence.coherent:
|
||||
per_point_scores[influence.a][0] += 1
|
||||
per_point_scores[influence.b][0] += 1
|
||||
left += 1
|
||||
else:
|
||||
per_point_scores[influence.a][1] += 1
|
||||
per_point_scores[influence.b][1] += 1
|
||||
right += 1
|
||||
print(i, left / (left + right))
|
||||
num = 0
|
||||
denom = 0
|
||||
for _, score in per_point_scores.items():
|
||||
if score[0] == score[1]:
|
||||
continue
|
||||
print(i, score)
|
||||
num += score[1] / (score[0] + score[1])
|
||||
denom += 1
|
||||
score = num / denom if denom > 0 else 0
|
||||
print(score)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# 1st row (n+1 choose k+1) * (1-(k mod 2))
|
||||
# psuedopascal to compute the follow-on rows
|
||||
# assuming solvability, we want to maximize the probability that our current state and our state with
|
||||
# a particular single flip are one order apart in the correct direction
|
||||
|
||||
|
||||
|
||||
# 2, 0
|
||||
# 2, 2, 0
|
||||
# 2, 4, 2, 0
|
||||
# 2, 6, 6, 2, 0
|
||||
# 2, 8,12, 8, 2, 0
|
||||
# 2,10,20,20,10, 2, 0
|
||||
|
||||
# 3,-9,19,-33,51,-73,99
|
||||
# 3,-6,10,-14,18,-22,26
|
||||
# 3,-3, 4, -4, 4, -4, 4
|
||||
# 3, 0, 1, 0, 0, 0, 0
|
||||
# 3, 3, 1, 1, 0, 0, 0
|
||||
# 3, 6, 4, 2, 1, 0, 0
|
||||
# 3, 9,10, 6, 3, 1, 0
|
||||
|
||||
# 4, 0, 4, 0
|
||||
# 4, 4, 4, 4, 0
|
||||
# 4, 8, 8, 8, 4, 0
|
||||
# 4,12,16,16,12, 4, 0
|
||||
|
||||
# 5, 0,10, 0, 1
|
||||
# 5, 5,10,10, 1, 1
|
||||
# 5,
|
||||
# 5,
|
||||
|
||||
|
||||
|
||||
# 3
|
||||
#
|
||||
# @1 [1, 2, 1]
|
||||
# @2 [2, 2, 0]
|
||||
# @3 [3, 0, 1]
|
||||
|
||||
# 5 [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)
|
||||
#
|
||||
# @1 [1, 4, 6, 4, 1], [4, 6, 4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),
|
||||
# @2 [2, 6, 6, 2, 0], [3, 4, 4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)
|
||||
# @3 [3, 6, 4, 2, 1], [2, 4, 6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)
|
||||
# @4 [4, 4, 4, 4, 0], [1, 6, 6, 1, 1] - 16, 15 -
|
||||
# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 -
|
||||
|
||||
# @0 [0.0, 0.0, 0.0, 0.0, 0.0]
|
||||
# @1 [0.2, 0.4, 0.6, 0.8, 1.0]
|
||||
# @2 [0.4, 0.6, 0.6, 0.4, 0.0]
|
||||
# @3 [0.6, 0.6, 0.4, 0.4, 1.0]
|
||||
# @4 [0.8, 0.4, 0.4, 0.8, 0.0]
|
||||
# @5 [1.0, 0.0, 1.0, 0.0, 1.0]
|
||||
|
||||
# 6
|
||||
#
|
||||
# @1 [1, 5, 10, 10, 5, 1]
|
||||
# @2 [2, 8, 12, 8, 2, 0]
|
||||
# @3 [3, 9, 10, 6, 3, 1]
|
||||
# @4 [4, 8, 8, 8, 4, 0]
|
||||
# @5 [5, 5, 10, 10, 1, 1]
|
||||
# @6 [6, 0, 20, 0, 6, 0]
|
||||
|
||||
# last row, 1 if odd, 0 if even
|
||||
# second to last, subtract 2 on odds, add 2 on evens
|
||||
|
||||
def compute_pseudopascal(N):
|
||||
dist = np.zeros((N, N))
|
||||
for j in range(0, N):
|
||||
dist[0][j] = math.comb(N - 1, j)
|
||||
dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
|
||||
for i in range(1, N):
|
||||
for j in range(0, i + 1):
|
||||
dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
|
||||
for k in range(i + 1, N):
|
||||
for j in reversed(range(0, k)):
|
||||
dist[i][j+1] = dist[i][j] + dist[i][j+1]
|
||||
return dist
|
||||
|
||||
def compute_distributions(N):
|
||||
dist = compute_pseudopascal(N)
|
||||
print(dist)
|
||||
for i in range(0, N):
|
||||
for j in range(0, N):
|
||||
denom = math.comb(N, j+1)
|
||||
dist[i][j] /= denom
|
||||
return dist
|
||||
|
||||
def confusion_probabilities(N, samples):
|
||||
sample_sizes = np.zeros(N)
|
||||
for i in range(0, len(samples)):
|
||||
a = samples[i]
|
||||
for j in range(0, len(samples)):
|
||||
b = samples[j]
|
||||
if i == j:
|
||||
continue
|
||||
distance = hamming_distance(a, b)
|
||||
sample_sizes[distance - 1] += 1
|
||||
|
||||
confusion = np.zeros((N, N))
|
||||
dist = compute_pseudopascal(N)
|
||||
np.multiply(dist, 2 ** N, dist)
|
||||
# These are the probabilities that we might mix up any two orders given a particular sample size
|
||||
for i in range(0, N):
|
||||
for j in range(0, N):
|
||||
probability = 1.0
|
||||
for k in range(0, N):
|
||||
full_size = math.comb(N, k+1) * (2 ** N)
|
||||
sample_size = sample_sizes[k]
|
||||
num_unknowns = full_size - sample_size
|
||||
i_incoherent = dist[i][k]
|
||||
# Worst case, we sample only the coherent points,
|
||||
i_min = max(i_incoherent - num_unknowns, 0) / full_size
|
||||
i_max = min(sample_size, i_incoherent) / full_size
|
||||
u = i_min + i_max / 2
|
||||
s = (i_max - i_min) / 2
|
||||
probability *= raised_cosine(dist[j][k] / full_size, u, s)
|
||||
confusion[i][j] = probability
|
||||
return confusion
|
||||
|
||||
def raised_cosine(x, u, s):
|
||||
if x < (u - s):
|
||||
return 0
|
||||
if x > (u + s):
|
||||
return 0
|
||||
return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s))
|
||||
|
||||
# Probability of getting k red balls after drawing n from a bag with m total balls and j red balls in it
|
||||
# (n choose k) * p^k * (1-p)^(n-k)
|
||||
|
||||
# p/m chance of getting a red ball
|
||||
# (1 - p/m) chance of not getting a red ball
|
||||
|
||||
# One way (p/m) * ((p-1)/(m-1)) * ((p-2)/(m-2))
|
||||
# (1 - (p/m))
|
||||
|
||||
def p_bernoulli(n, k, m, j):
|
||||
# probabilities = np.zeros((n + 1, n + 1))
|
||||
# probabilities.fill(-1)
|
||||
# # if n == k:
|
||||
# # return 1.0
|
||||
# # if k > p:
|
||||
# # return 0.0
|
||||
# stack = [(0,0)]
|
||||
# while len(stack) > 0:
|
||||
# (a, b) = stack.pop()
|
||||
# if a + b == n:
|
||||
# probabilities[a][b] = 1 if a == k else 0
|
||||
# elif a > j:
|
||||
# probabilities[a][b] = 0
|
||||
# elif b > (m - j):
|
||||
# probabilities[a][b] = 0
|
||||
# else:
|
||||
# p_left = probabilities[a + 1][b]
|
||||
# p_right = probabilities[a][b + 1]
|
||||
# if p_left >= 0 and p_right >= 0:
|
||||
# p = (j - a) / (m - a - b)
|
||||
# probabilities[a][b] = p_left * p + p_right * (1 - p)
|
||||
# else:
|
||||
# stack.append((a, b))
|
||||
# if p_left < 0:
|
||||
# stack.append((a + 1, b))
|
||||
# if p_right < 0:
|
||||
# stack.append((a, b + 1))
|
||||
# return probabilities[0][0]
|
||||
|
||||
p = j / m
|
||||
P = 1.0
|
||||
p_k = 0
|
||||
p_nk = 0
|
||||
for i in range(1, k + 1):
|
||||
P *= (n + 1 - i) / i
|
||||
while P > 1.0 and p_k < k:
|
||||
P *= p
|
||||
p_k += 1
|
||||
while P > 1.0 and p_nk < (n - k):
|
||||
P *= (1 - p)
|
||||
p_nk += 1
|
||||
while p_k < k:
|
||||
P *= p
|
||||
p_k += 1
|
||||
while (p_nk < (n - k)):
|
||||
P *= (1 - p)
|
||||
p_nk += 1
|
||||
return P
|
||||
|
||||
def average_index(x):
|
||||
total = 0
|
||||
for k in range(0, len(x)):
|
||||
total += k * x[k]
|
||||
return total / np.sum(x)
|
||||
|
||||
def compute_cumulative_probability(N, bases, p_n):
|
||||
# p_n = np.zeros(N)
|
||||
# p_n.fill(0.5)
|
||||
states = [[]]
|
||||
flips = set()
|
||||
for i in range(1, len(bases)):
|
||||
# (base, _) = bases[i]
|
||||
(_, flip) = bases[i]
|
||||
# p_forward = 0
|
||||
# p_backward = 0
|
||||
# for k in range(0, N - 1):
|
||||
# p_forward += base[k + 1] * next_p[k]
|
||||
# p_backward += base[k] * next_p[k + 1]
|
||||
if flip in flips:
|
||||
# p_n[flip] -= p_forward
|
||||
# p_n[flip] += p_backward
|
||||
flips.remove(flip)
|
||||
else:
|
||||
# p_n[flip] += p_forward
|
||||
# p_n[flip] -= p_backward
|
||||
flips.add(flip)
|
||||
states.append(flips.copy())
|
||||
# np.clip(p_n, 0, 1, p_n)
|
||||
# print('Contribution probabilities', p_n)
|
||||
|
||||
min_p_n = np.min(p_n)
|
||||
max_p_n = np.max(p_n)
|
||||
|
||||
|
||||
p_k = np.zeros(N)
|
||||
for k in range(0, N):
|
||||
stack = [(k, len(bases) - 1)]
|
||||
probabilities = np.zeros((N, len(bases)))
|
||||
probabilities.fill(-1)
|
||||
while len(stack) > 0:
|
||||
(i, base_index) = stack.pop()
|
||||
(base, flip) = bases[base_index]
|
||||
if base_index == 0:
|
||||
probabilities[i, 0] = base[i]
|
||||
else:
|
||||
left = i - 1
|
||||
right = i + 1
|
||||
state = states[base_index - 1]
|
||||
p_flip = max(min(p_n[flip] + 0.5, 1.0), 0)
|
||||
if flip in state:
|
||||
p_flip = 1 - p_flip
|
||||
p_left = probabilities[left, base_index - 1] if left >= 0 else 0
|
||||
p_right = probabilities[right, base_index - 1] if right < N else 0
|
||||
if p_left >= 0 and p_right >= 0:
|
||||
probabilities[i, base_index] = base[i] * p_left * (1 - p_flip) + base[i] * p_right * p_flip
|
||||
else:
|
||||
stack.append((i, base_index))
|
||||
if p_left < 0:
|
||||
stack.append((left, base_index - 1))
|
||||
if p_right < 0:
|
||||
stack.append((right, base_index - 1))
|
||||
p_k[k] = probabilities[k][-1]
|
||||
np.divide(p_k, np.sum(p_k), p_k)
|
||||
return p_k
|
||||
|
||||
# 8, 32, 2^5
|
||||
# 10, 64, 2^6
|
||||
# 12, 128, 2^7
|
||||
# 14, 256, 2^8
|
||||
# 16, 512, 2^9
|
||||
# 18, 1024, 2^10
|
||||
# 20, 2048, 2^11
|
||||
# 22, 4096, 2^12
|
||||
def main():
|
||||
N = 16
|
||||
sample_size = 128
|
||||
sample_ids = set()
|
||||
samples = []
|
||||
|
||||
dist = compute_pseudopascal(N)
|
||||
print(dist)
|
||||
|
||||
for i in range(0, sample_size):
|
||||
x = random_x(N)
|
||||
y = int(xor(x))
|
||||
p = Point(x, y)
|
||||
p_id = p.id()
|
||||
if p_id in sample_ids:
|
||||
continue
|
||||
sample_ids.add(p_id)
|
||||
samples.append(p)
|
||||
# confusion = confusion_probabilities(N, samples)
|
||||
# print(confusion)
|
||||
# return
|
||||
|
||||
# for i in range(0, 2**N):
|
||||
# x = decode(i, N)
|
||||
# y = int(xor(x))
|
||||
# samples.append(Point(x,y))
|
||||
|
||||
base = np.zeros(N)
|
||||
current = np.zeros(N)
|
||||
cumulative_probability = np.ones(N)
|
||||
flip_likelihood = np.zeros(N)
|
||||
cumulative_deltas = np.zeros(N)
|
||||
direction = -1
|
||||
flips = set()
|
||||
bases = []
|
||||
last_flip = -1
|
||||
|
||||
for _ in range(0, 2 ** N):
|
||||
lowest_err = -1
|
||||
use_flip = -1
|
||||
for flip in range(-1, N):
|
||||
coherent_distances = np.zeros(N+1)
|
||||
incoherent_distances = np.zeros(N+1)
|
||||
all_coherent = True
|
||||
for i in range(0, len(samples)):
|
||||
a = samples[i]
|
||||
for j in range(0, len(samples)):
|
||||
b = samples[j]
|
||||
distance = hamming_distance(a, b)
|
||||
is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)
|
||||
if is_coherent:
|
||||
coherent_distances[distance] += 1
|
||||
else:
|
||||
incoherent_distances[distance] += 1
|
||||
all_coherent = False
|
||||
if all_coherent:
|
||||
print('Flip and halt', flip)
|
||||
return
|
||||
# print(coherent_distances, incoherent_distances)
|
||||
|
||||
# print(coherent_distances, incoherent_distances)
|
||||
# est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances))
|
||||
# print(est_incoherence)
|
||||
|
||||
probability = np.ones(N)
|
||||
# np.divide(probability, np.sum(probability), probability)
|
||||
for j in range(1, N + 1):
|
||||
n = coherent_distances[j] + incoherent_distances[j]
|
||||
if n == 0:
|
||||
continue
|
||||
for k in range(0, N):
|
||||
a = incoherent_distances[j]
|
||||
t = math.comb(N, j) * (2 ** N)
|
||||
p = dist[k][j - 1] * (2 ** N)
|
||||
prob = p_bernoulli(int(n), int(a), t, p)
|
||||
probability[k] *= prob
|
||||
np.divide(probability, np.sum(probability), probability)
|
||||
|
||||
if flip < 0:
|
||||
np.copyto(base, probability)
|
||||
else:
|
||||
np.copyto(current, probability)
|
||||
|
||||
|
||||
# print(k, i, min_true_value, max_true_value)
|
||||
|
||||
# confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative
|
||||
# err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence
|
||||
# denom += 1
|
||||
# print(flip, k, err)
|
||||
# err /= denom
|
||||
# if flip < 0:
|
||||
# base[k] = probability
|
||||
# else:
|
||||
# current[k] = probability
|
||||
|
||||
if flip >= 0:
|
||||
if np.sum(current) == 0:
|
||||
continue
|
||||
np.divide(current, np.sum(current), current)
|
||||
# print(current)
|
||||
# temp = np.roll(cumulative_probability, -1)
|
||||
# temp[-1] = 1.0
|
||||
# np.multiply(current, temp, current)
|
||||
# np.divide(current, np.sum(current), current)
|
||||
p_forward = 0
|
||||
p_backward = 0
|
||||
for i in range(1, N):
|
||||
p_forward += base[i] * current[i - 1]
|
||||
for i in range(0, N - 1):
|
||||
p_backward += base[i] * current[i + 1]
|
||||
scale = 0.01
|
||||
if flip in flips:
|
||||
flip_likelihood[flip] += scale * p_backward
|
||||
flip_likelihood[flip] -= scale * p_forward
|
||||
else:
|
||||
flip_likelihood[flip] -= scale * p_backward
|
||||
flip_likelihood[flip] += scale * p_forward
|
||||
delta = p_forward - p_backward
|
||||
print(flip, current, p_forward, p_backward)
|
||||
base_index = average_index(base)
|
||||
current_index = average_index(current)
|
||||
err = abs(1 - (base_index - current_index))
|
||||
print(base_index, current_index, err)
|
||||
|
||||
# base_index = average_index(cumulative_probability)
|
||||
# new_index = average_index(current)
|
||||
# if isnan(new_index):
|
||||
# continue
|
||||
# np.divide(current, np.sum(current), current)
|
||||
# np.subtract(1, current, current)
|
||||
# print(flip,p_forward,p_backward,current)
|
||||
if delta > 0 and (use_flip < 0 or delta > lowest_err):
|
||||
use_flip = flip
|
||||
lowest_err = delta
|
||||
|
||||
# cumulative_deltas[flip] += 0
|
||||
|
||||
# for k in range(0, N - 1):
|
||||
# value = current[k] * cumulative_probability[k + 1]
|
||||
# if use_flip < 0 or value > lowest_err:
|
||||
# use_flip = flip
|
||||
# lowest_err = value
|
||||
# print(flip, highest_value)
|
||||
else:
|
||||
# p_next = np.zeros(N)
|
||||
# for i in range(0, N):
|
||||
# P = 0.0
|
||||
# for j in range(0, N):
|
||||
# if i == j:
|
||||
# continue
|
||||
# P += base[i] * (1 - base[j])
|
||||
# p_next[i] = P
|
||||
# base = p_next
|
||||
|
||||
# base[0] = 0
|
||||
np.divide(base, np.sum(base), base)
|
||||
bases.append((base.copy(), last_flip))
|
||||
# bases.insert(0, base.copy())
|
||||
# cumulative_probability = compute_cumulative_probability(N, bases)
|
||||
# p_forward = 0
|
||||
# p_backward = 0
|
||||
# for i in range(1, N):
|
||||
# p_forward += cumulative_probability[i] * base[i - 1]
|
||||
# for i in range(0, N - 1):
|
||||
# p_backward += cumulative_probability[i] * base[i + 1]
|
||||
print('Base', base)
|
||||
# # # np.subtract(1, base, base)
|
||||
# # # print(cumulative_probability)
|
||||
# shift_left = np.roll(cumulative_probability, -1)
|
||||
# shift_left[-1] = 0.0
|
||||
# # # # print('Shift Left', p_forward, shift_left)
|
||||
# shift_right = np.roll(cumulative_probability, 1)
|
||||
# shift_right[0] = 0.0
|
||||
# # # # print('Shift Right', p_backward, shift_right)
|
||||
# p_next = np.add(np.multiply(shift_left, 0.5), np.multiply(shift_right, 0.5))
|
||||
# p_next[0] = 0
|
||||
# np.divide(p_next, np.sum(p_next), p_next)
|
||||
# # # # print('Next', p_next)
|
||||
# # # # # print(cumulative_probability)
|
||||
# # # # # print(base)
|
||||
# np.multiply(base, p_next, cumulative_probability)
|
||||
# cumulative_probability[0] = 0
|
||||
# # # # # np.multiply(cumulative_probability, shift_right, cumulative_probability)
|
||||
# np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
|
||||
cumulative_probability = compute_cumulative_probability(N, bases, flip_likelihood)
|
||||
print('Cumulative', cumulative_probability)
|
||||
print('Likelihood', flip_likelihood)
|
||||
|
||||
# cumulative_probability[0] = 0
|
||||
# use_flip = -1
|
||||
# if direction < 0:
|
||||
# use_flip = np.argmax(cumulative_deltas)
|
||||
# if cumulative_deltas[use_flip] < 0:
|
||||
# use_flip = np.argmin(cumulative_deltas)
|
||||
# direction = 1
|
||||
# # cumulative_deltas.fill(0)
|
||||
# else:
|
||||
# use_flip = np.argmin(cumulative_deltas)
|
||||
# if cumulative_deltas[use_flip] > 0:
|
||||
# use_flip = np.argmax(cumulative_deltas)
|
||||
# direction = -1
|
||||
# # cumulative_deltas.fill(0)
|
||||
# if direction < 0:
|
||||
# cumulative_probability[0] = 0
|
||||
# else:
|
||||
# cumulative_probability[-1] = 0
|
||||
# np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
|
||||
# print(cumulative_deltas)
|
||||
|
||||
# use_flip = -1
|
||||
# highest_p = 0
|
||||
# for i in range(0, N):
|
||||
# p = flip_likelihood[i]
|
||||
# if i in flips:
|
||||
# p = -p
|
||||
# if use_flip < 0 or p > highest_p:
|
||||
# use_flip = i
|
||||
# highest_p = p
|
||||
# if not use_flip in flips and highest_p < 0 or use_flip in flips and highest_p > 0:
|
||||
# flip_likelihood[use_flip] *= -1.0
|
||||
|
||||
if use_flip < 0:
|
||||
return
|
||||
last_flip = use_flip
|
||||
if use_flip in flips:
|
||||
flips.remove(use_flip)
|
||||
else:
|
||||
flips.add(use_flip)
|
||||
print('Flip', use_flip, lowest_err)
|
||||
print(flips)
|
||||
cumulative_deltas[use_flip] = -cumulative_deltas[use_flip]
|
||||
for p in samples:
|
||||
if p.x[use_flip]:
|
||||
p.y ^= 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
791
mutations25.py
Normal file
791
mutations25.py
Normal file
@ -0,0 +1,791 @@
|
||||
from cmath import isnan
|
||||
import numpy as np
|
||||
import random
|
||||
import hashlib
|
||||
import math
|
||||
|
||||
def get_state_id(state):
|
||||
return ','.join([str(x) for x in sorted(state)])
|
||||
|
||||
class Point():
|
||||
def __init__(self, x, y):
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def id(self):
|
||||
return ','.join([str(int(x)) for x in self.x])
|
||||
|
||||
class Influence():
|
||||
def __init__(self, a, b):
|
||||
self.a = a
|
||||
self.b = b
|
||||
self.original_dof = set()
|
||||
self.dof = set()
|
||||
for i in range(0, len(a.x)):
|
||||
if a.x[i] != b.x[i]:
|
||||
self.original_dof.add(i)
|
||||
self.dof.add(i)
|
||||
|
||||
def coherent(self):
|
||||
return self.a.y == self.b.y
|
||||
|
||||
def id(self):
|
||||
return ','.join(sorted([self.a.id(), self.b.id()]))
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def decode(x, N):
|
||||
index = 0
|
||||
output = np.zeros((N))
|
||||
while x > 0 and index < N:
|
||||
output[index] = x & 0b1
|
||||
x >>= 1
|
||||
index += 1
|
||||
return output
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def hamming_distance(a, b):
|
||||
return np.sum(np.logical_xor(a.x, b.x))
|
||||
|
||||
def random_x(N):
|
||||
x = np.zeros((N))
|
||||
for i in range(0, N):
|
||||
x[i] = random.randint(0, 1)
|
||||
return x
|
||||
|
||||
def xor(x):
|
||||
# return sum(x) % 2
|
||||
half = int(len(x) * 3 / 4)
|
||||
return sum(x[:half]) % 2
|
||||
|
||||
def create_dof_map(influences):
|
||||
dof_map = {}
|
||||
for influence in influences:
|
||||
for i in influence.dof:
|
||||
if not i in dof_map:
|
||||
dof_map[i] = []
|
||||
dof_map[i].append(influence)
|
||||
return dof_map
|
||||
|
||||
def flip(influences, i):
|
||||
for influence in influences:
|
||||
if i in influence.dof:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
|
||||
def remove_dof(dof_map, i, flip = False):
|
||||
for influence in dof_map[i]:
|
||||
influence.dof.remove(i)
|
||||
if flip:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
# if len(influence.dof) == 0 and not influence.coherent():
|
||||
# raise Exception('Invalid')
|
||||
del dof_map[i]
|
||||
|
||||
def solve(dof_map, all_influences, all_samples):
|
||||
eliminated = True
|
||||
while eliminated:
|
||||
eliminated = False
|
||||
for influence in all_influences:
|
||||
if len(influence.dof) == 1:
|
||||
i = next(iter(influence.dof))
|
||||
if influence.coherent:
|
||||
remove_dof(dof_map, i)
|
||||
eliminated = True
|
||||
else:
|
||||
print('Forced', i)
|
||||
remove_dof(dof_map, i, True)
|
||||
eliminated = True
|
||||
|
||||
lowest_dof = None
|
||||
for influence in all_influences:
|
||||
if not influence.coherent and len(influence.dof) > 1:
|
||||
if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
|
||||
lowest_dof = influence
|
||||
|
||||
flip = None
|
||||
highest_score = -1
|
||||
|
||||
for i in lowest_dof.dof:
|
||||
per_point_scores = {}
|
||||
i_influences = dof_map[i]
|
||||
left = 0
|
||||
right = 0
|
||||
for influence in i_influences:
|
||||
if not influence.a in per_point_scores:
|
||||
per_point_scores[influence.a] = [0, 0]
|
||||
if not influence.b in per_point_scores:
|
||||
per_point_scores[influence.b] = [0, 0]
|
||||
if influence.coherent:
|
||||
per_point_scores[influence.a][0] += 1
|
||||
per_point_scores[influence.b][0] += 1
|
||||
left += 1
|
||||
else:
|
||||
per_point_scores[influence.a][1] += 1
|
||||
per_point_scores[influence.b][1] += 1
|
||||
right += 1
|
||||
print(i, left / (left + right))
|
||||
num = 0
|
||||
denom = 0
|
||||
for _, score in per_point_scores.items():
|
||||
if score[0] == score[1]:
|
||||
continue
|
||||
print(i, score)
|
||||
num += score[1] / (score[0] + score[1])
|
||||
denom += 1
|
||||
score = num / denom if denom > 0 else 0
|
||||
print(score)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# 1st row (n+1 choose k+1) * (1-(k mod 2))
|
||||
# psuedopascal to compute the follow-on rows
|
||||
# assuming solvability, we want to maximize the probability that our current state and our state with
|
||||
# a particular single flip are one order apart in the correct direction
|
||||
|
||||
|
||||
|
||||
# 2, 0
|
||||
# 2, 2, 0
|
||||
# 2, 4, 2, 0
|
||||
# 2, 6, 6, 2, 0
|
||||
# 2, 8,12, 8, 2, 0
|
||||
# 2,10,20,20,10, 2, 0
|
||||
|
||||
# 3,-9,19,-33,51,-73,99
|
||||
# 3,-6,10,-14,18,-22,26
|
||||
# 3,-3, 4, -4, 4, -4, 4
|
||||
# 3, 0, 1, 0, 0, 0, 0
|
||||
# 3, 3, 1, 1, 0, 0, 0
|
||||
# 3, 6, 4, 2, 1, 0, 0
|
||||
# 3, 9,10, 6, 3, 1, 0
|
||||
|
||||
# 4, 0, 4, 0
|
||||
# 4, 4, 4, 4, 0
|
||||
# 4, 8, 8, 8, 4, 0
|
||||
# 4,12,16,16,12, 4, 0
|
||||
|
||||
# 5, 0,10, 0, 1
|
||||
# 5, 5,10,10, 1, 1
|
||||
# 5,
|
||||
# 5,
|
||||
|
||||
|
||||
|
||||
# 3
|
||||
#
|
||||
# @1 [1, 2, 1]
|
||||
# @2 [2, 2, 0]
|
||||
# @3 [3, 0, 1]
|
||||
|
||||
# 5 [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)
|
||||
#
|
||||
# @1 [1, 4, 6, 4, 1], [4, 6, 4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),
|
||||
# @2 [2, 6, 6, 2, 0], [3, 4, 4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)
|
||||
# @3 [3, 6, 4, 2, 1], [2, 4, 6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)
|
||||
# @4 [4, 4, 4, 4, 0], [1, 6, 6, 1, 1] - 16, 15 -
|
||||
# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 -
|
||||
|
||||
# @0 [0.0, 0.0, 0.0, 0.0, 0.0]
|
||||
# @1 [0.2, 0.4, 0.6, 0.8, 1.0]
|
||||
# @2 [0.4, 0.6, 0.6, 0.4, 0.0]
|
||||
# @3 [0.6, 0.6, 0.4, 0.4, 1.0]
|
||||
# @4 [0.8, 0.4, 0.4, 0.8, 0.0]
|
||||
# @5 [1.0, 0.0, 1.0, 0.0, 1.0]
|
||||
|
||||
# 6
|
||||
#
|
||||
# @1 [1, 5, 10, 10, 5, 1]
|
||||
# @2 [2, 8, 12, 8, 2, 0]
|
||||
# @3 [3, 9, 10, 6, 3, 1]
|
||||
# @4 [4, 8, 8, 8, 4, 0]
|
||||
# @5 [5, 5, 10, 10, 1, 1]
|
||||
# @6 [6, 0, 20, 0, 6, 0]
|
||||
|
||||
# last row, 1 if odd, 0 if even
|
||||
# second to last, subtract 2 on odds, add 2 on evens
|
||||
|
||||
def compute_pseudopascal(N):
|
||||
dist = np.zeros((N, N))
|
||||
for j in range(0, N):
|
||||
dist[0][j] = math.comb(N - 1, j)
|
||||
dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
|
||||
for i in range(1, N):
|
||||
for j in range(0, i + 1):
|
||||
dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
|
||||
for k in range(i + 1, N):
|
||||
for j in reversed(range(0, k)):
|
||||
dist[i][j+1] = dist[i][j] + dist[i][j+1]
|
||||
return dist
|
||||
|
||||
def compute_distributions(N):
|
||||
dist = compute_pseudopascal(N)
|
||||
print(dist)
|
||||
for i in range(0, N):
|
||||
for j in range(0, N):
|
||||
denom = math.comb(N, j+1)
|
||||
dist[i][j] /= denom
|
||||
return dist
|
||||
|
||||
def compute_pyramids(N):
|
||||
num_orders = max(int(N / 2), 1)
|
||||
pyramids = np.zeros((num_orders, N, N)).astype(np.int32)
|
||||
# 1st order can be filled in as multiplication and forms the base case
|
||||
for i in range(0, N):
|
||||
for j in range(0, i + 1):
|
||||
pyramids[0][i][j] = (i - j + 1) * (j + 1)
|
||||
for order in range(1, num_orders):
|
||||
offset = order * 2
|
||||
|
||||
# fill in the LHS and diagonal
|
||||
for i in range(0, N - offset):
|
||||
value = math.comb(2 * (order + 1) + i - 1, i)
|
||||
pyramids[order][i + offset][0] = value
|
||||
# mirror
|
||||
pyramids[order][i + offset][i + offset] = value
|
||||
|
||||
# accumulate along the diagonals
|
||||
for i in range(1, N):
|
||||
value = pyramids[order][i][0]
|
||||
acc = value
|
||||
for j in range(1, N - i):
|
||||
value += acc
|
||||
pyramids[order][i + j][j] = value
|
||||
acc += pyramids[order - 1][i + j - 1][j - 1]
|
||||
|
||||
return pyramids
|
||||
|
||||
def get_total_band_count(distance, band_distance, N):
|
||||
if band_distance % 2 == 1:
|
||||
return 0
|
||||
order = int(band_distance / 2) - 1
|
||||
if order < 0:
|
||||
return 0
|
||||
if distance < order + 1:
|
||||
return 0
|
||||
if distance > N - order - 1:
|
||||
return 0
|
||||
order_root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2
|
||||
scale = math.comb(N - (order + 1) * 2, distance - order - 1)
|
||||
value = math.comb(2 * (order + 1) + N - 2 * (order + 1), N - 2 * (order + 1))
|
||||
return order_root * scale * value
|
||||
|
||||
def get_incoherent_band_count(pyramids, distance, band_distance, k, N):
|
||||
if k == 0 or k == N or band_distance % 2 == 1:
|
||||
return 0
|
||||
order = int(band_distance / 2) - 1
|
||||
if order < 0:
|
||||
return 0
|
||||
if distance < order + 1:
|
||||
return 0
|
||||
if distance > N - order - 1:
|
||||
return 0
|
||||
order_root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2
|
||||
scale = math.comb(N - (order + 1) * 2, distance - order - 1)
|
||||
value = pyramids[order][N - 2][k - 1]
|
||||
return order_root * scale * value
|
||||
|
||||
def confusion_probabilities(N, samples):
|
||||
sample_sizes = np.zeros(N)
|
||||
for i in range(0, len(samples)):
|
||||
a = samples[i]
|
||||
for j in range(0, len(samples)):
|
||||
b = samples[j]
|
||||
if i == j:
|
||||
continue
|
||||
distance = hamming_distance(a, b)
|
||||
sample_sizes[distance - 1] += 1
|
||||
|
||||
confusion = np.zeros((N, N))
|
||||
dist = compute_pseudopascal(N)
|
||||
np.multiply(dist, 2 ** N, dist)
|
||||
# These are the probabilities that we might mix up any two orders given a particular sample size
|
||||
for i in range(0, N):
|
||||
for j in range(0, N):
|
||||
probability = 1.0
|
||||
for k in range(0, N):
|
||||
full_size = math.comb(N, k+1) * (2 ** N)
|
||||
sample_size = sample_sizes[k]
|
||||
num_unknowns = full_size - sample_size
|
||||
i_incoherent = dist[i][k]
|
||||
# Worst case, we sample only the coherent points,
|
||||
i_min = max(i_incoherent - num_unknowns, 0) / full_size
|
||||
i_max = min(sample_size, i_incoherent) / full_size
|
||||
u = i_min + i_max / 2
|
||||
s = (i_max - i_min) / 2
|
||||
probability *= raised_cosine(dist[j][k] / full_size, u, s)
|
||||
confusion[i][j] = probability
|
||||
return confusion
|
||||
|
||||
def raised_cosine(x, u, s):
|
||||
if x < (u - s):
|
||||
return 0
|
||||
if x > (u + s):
|
||||
return 0
|
||||
return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s))
|
||||
|
||||
# Probability of getting k red balls after drawing n from a bag with m total balls and j red balls in it
|
||||
# (n choose k) * p^k * (1-p)^(n-k)
|
||||
|
||||
# p/m chance of getting a red ball
|
||||
# (1 - p/m) chance of not getting a red ball
|
||||
|
||||
# One way (p/m) * ((p-1)/(m-1)) * ((p-2)/(m-2))
|
||||
# (1 - (p/m))
|
||||
|
||||
cache = {}
|
||||
hits = 0
|
||||
misses = 0
|
||||
def p_bernoulli(n, k, m, j):
|
||||
global hits, misses
|
||||
key = (n, k, m, j)
|
||||
if key in cache:
|
||||
hits += 1
|
||||
return cache[key]
|
||||
misses += 1
|
||||
probabilities = np.zeros((n + 1, n + 1))
|
||||
probabilities.fill(-1)
|
||||
stack = [(0,0)]
|
||||
while len(stack) > 0:
|
||||
(a, b) = stack.pop()
|
||||
if a + b == n:
|
||||
probabilities[a][b] = 1 if a == k else 0
|
||||
elif a > j:
|
||||
probabilities[a][b] = 0
|
||||
elif b > (m - j):
|
||||
probabilities[a][b] = 0
|
||||
else:
|
||||
p_left = probabilities[a + 1][b]
|
||||
p_right = probabilities[a][b + 1]
|
||||
if p_left >= 0 and p_right >= 0:
|
||||
p = (j - a) / (m - a - b)
|
||||
probabilities[a][b] = p_left * p + p_right * (1 - p)
|
||||
else:
|
||||
stack.append((a, b))
|
||||
if p_left < 0:
|
||||
stack.append((a + 1, b))
|
||||
if p_right < 0:
|
||||
stack.append((a, b + 1))
|
||||
cache[key] = probabilities[0][0]
|
||||
# if len(cache) % 100 == 0:
|
||||
# print('Cache size: ', len(cache), math.floor(10000 * hits / (hits + misses)) / 100, '%')
|
||||
return probabilities[0][0]
|
||||
|
||||
p = j / m
|
||||
if n == k:
|
||||
return 1.0
|
||||
if k > p:
|
||||
return 0.0
|
||||
P = 1.0
|
||||
p_k = 0
|
||||
p_nk = 0
|
||||
for i in range(1, k + 1):
|
||||
P *= (n + 1 - i) / i
|
||||
while P > 1.0 and p_k < k:
|
||||
P *= p
|
||||
p_k += 1
|
||||
while P > 1.0 and p_nk < (n - k):
|
||||
P *= (1 - p)
|
||||
p_nk += 1
|
||||
while p_k < k:
|
||||
P *= p
|
||||
p_k += 1
|
||||
while (p_nk < (n - k)):
|
||||
P *= (1 - p)
|
||||
p_nk += 1
|
||||
return P
|
||||
|
||||
def average_index(x):
|
||||
total = 0
|
||||
for k in range(0, len(x)):
|
||||
total += k * x[k]
|
||||
return total / np.sum(x)
|
||||
|
||||
def compute_cumulative_probability(N, bases, p_n):
|
||||
# p_n = np.zeros(N)
|
||||
# p_n.fill(0.5)
|
||||
states = [[]]
|
||||
flips = set()
|
||||
for i in range(1, len(bases)):
|
||||
# (base, _) = bases[i]
|
||||
(_, flip) = bases[i]
|
||||
# p_forward = 0
|
||||
# p_backward = 0
|
||||
# for k in range(0, N - 1):
|
||||
# p_forward += base[k + 1] * next_p[k]
|
||||
# p_backward += base[k] * next_p[k + 1]
|
||||
if flip in flips:
|
||||
# p_n[flip] -= p_forward
|
||||
# p_n[flip] += p_backward
|
||||
flips.remove(flip)
|
||||
else:
|
||||
# p_n[flip] += p_forward
|
||||
# p_n[flip] -= p_backward
|
||||
flips.add(flip)
|
||||
states.append(flips.copy())
|
||||
# np.clip(p_n, 0, 1, p_n)
|
||||
# print('Contribution probabilities', p_n)
|
||||
|
||||
min_p_n = np.min(p_n)
|
||||
max_p_n = np.max(p_n)
|
||||
|
||||
|
||||
p_k = np.zeros(N)
|
||||
for k in range(0, N):
|
||||
stack = [(k, len(bases) - 1)]
|
||||
probabilities = np.zeros((N, len(bases)))
|
||||
probabilities.fill(-1)
|
||||
while len(stack) > 0:
|
||||
(i, base_index) = stack.pop()
|
||||
(base, flip) = bases[base_index]
|
||||
if base_index == 0:
|
||||
probabilities[i, 0] = base[i]
|
||||
else:
|
||||
left = i - 1
|
||||
right = i + 1
|
||||
state = states[base_index - 1]
|
||||
p_flip = max(min(p_n[flip] + 0.5, 1.0), 0)
|
||||
if flip in state:
|
||||
p_flip = 1 - p_flip
|
||||
p_left = probabilities[left, base_index - 1] if left >= 0 else 0
|
||||
p_right = probabilities[right, base_index - 1] if right < N else 0
|
||||
if p_left >= 0 and p_right >= 0:
|
||||
probabilities[i, base_index] = base[i] * p_left * (1 - p_flip) + base[i] * p_right * p_flip
|
||||
else:
|
||||
stack.append((i, base_index))
|
||||
if p_left < 0:
|
||||
stack.append((left, base_index - 1))
|
||||
if p_right < 0:
|
||||
stack.append((right, base_index - 1))
|
||||
p_k[k] = probabilities[k][-1]
|
||||
np.divide(p_k, np.sum(p_k), p_k)
|
||||
return p_k
|
||||
|
||||
# 8, 32, 2^5
|
||||
# 10, 64, 2^6
|
||||
# 12, 128, 2^7
|
||||
# 14, 256, 2^8
|
||||
# 16, 512, 2^9
|
||||
# 18, 1024, 2^10
|
||||
# 20, 2048, 2^11
|
||||
# 22, 4096, 2^12
|
||||
def main():
|
||||
N = 10
|
||||
sample_size = 32
|
||||
sample_ids = set()
|
||||
samples = []
|
||||
|
||||
dist = compute_pseudopascal(N)
|
||||
pyramids = compute_pyramids(N + 1)
|
||||
|
||||
for i in range(0, sample_size):
|
||||
x = random_x(N)
|
||||
y = int(xor(x))
|
||||
p = Point(x, y)
|
||||
p_id = p.id()
|
||||
if p_id in sample_ids:
|
||||
continue
|
||||
sample_ids.add(p_id)
|
||||
samples.append(p)
|
||||
# confusion = confusion_probabilities(N, samples)
|
||||
# print(confusion)
|
||||
# return
|
||||
|
||||
# for i in range(0, 2**N):
|
||||
# x = decode(i, N)
|
||||
# y = int(xor(x))
|
||||
# samples.append(Point(x,y))
|
||||
|
||||
base = np.zeros(N)
|
||||
current = np.zeros(N)
|
||||
cumulative_probability = np.ones(N)
|
||||
flip_likelihood = np.zeros(N)
|
||||
cumulative_deltas = np.zeros(N)
|
||||
direction = -1
|
||||
flips = set()
|
||||
bases = []
|
||||
last_flip = -1
|
||||
max_base_index = -1
|
||||
scores = np.zeros(N)
|
||||
indices = []
|
||||
|
||||
for _ in range(0, 2 ** N):
|
||||
lowest_err = -1
|
||||
use_flip = -1
|
||||
for flip in range(-1, N):
|
||||
coherent_distances = np.zeros(N+1)
|
||||
incoherent_distances = np.zeros(N+1)
|
||||
probability = np.ones(N)
|
||||
all_coherent = True
|
||||
for i in range(0, len(samples)):
|
||||
a = samples[i]
|
||||
bands = [[] for _ in range(0, N + 1)]
|
||||
for j in range(0, len(samples)):
|
||||
if i == j:
|
||||
continue
|
||||
b = samples[j]
|
||||
distance = hamming_distance(a, b)
|
||||
bands[distance].append(b)
|
||||
is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)
|
||||
if is_coherent:
|
||||
coherent_distances[distance] += 1
|
||||
else:
|
||||
incoherent_distances[distance] += 1
|
||||
all_coherent = False
|
||||
for distance in range(0, N + 1):
|
||||
band = bands[distance]
|
||||
if len(band) < 2:
|
||||
continue
|
||||
coherent_bands = np.zeros(N + 1)
|
||||
incoherent_bands = np.zeros(N + 1)
|
||||
for j in range(0, len(band)):
|
||||
c = band[j]
|
||||
for k in range(0, len(band)):
|
||||
if j == k:
|
||||
continue
|
||||
d = band[k]
|
||||
band_distance = hamming_distance(c, d)
|
||||
is_coherent = ((flip < 0 or c.x[flip] == d.x[flip]) and c.y == d.y) or ((flip >= 0 and c.x[flip] != d.x[flip]) and c.y != d.y)
|
||||
if is_coherent:
|
||||
coherent_bands[band_distance] += 1
|
||||
else:
|
||||
incoherent_bands[band_distance] += 1
|
||||
for band_distance in range(1, N + 1):
|
||||
n = coherent_bands[band_distance] + incoherent_bands[band_distance]
|
||||
if n == 0:
|
||||
continue
|
||||
t = get_total_band_count(distance, band_distance, N)
|
||||
if t == 0:
|
||||
continue
|
||||
a = incoherent_bands[band_distance]
|
||||
for k in range(0, N):
|
||||
p = get_incoherent_band_count(pyramids, distance, band_distance, k + 1, N)
|
||||
prob = p_bernoulli(int(n), int(a), t, p)
|
||||
# if prob == 0 and k == 5:
|
||||
# p = get_incoherent_band_count(pyramids, distance, band_distance, k, N)
|
||||
# print('test')
|
||||
probability[k] *= prob
|
||||
if np.sum(probability) == 0:
|
||||
print('Uh-oh')
|
||||
np.divide(probability, np.sum(probability), probability)
|
||||
|
||||
if all_coherent:
|
||||
print('Flip and halt', flip)
|
||||
return
|
||||
# print(coherent_distances, incoherent_distances)
|
||||
|
||||
# print(coherent_distances, incoherent_distances)
|
||||
# est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances))
|
||||
# print(est_incoherence)
|
||||
# np.divide(probability, np.sum(probability), probability)
|
||||
for j in range(1, N + 1):
|
||||
n = coherent_distances[j] + incoherent_distances[j]
|
||||
if n == 0:
|
||||
continue
|
||||
t = math.comb(N, j) * (2 ** N)
|
||||
if t == 0:
|
||||
continue
|
||||
a = incoherent_distances[j]
|
||||
for k in range(0, N):
|
||||
p = dist[k][j - 1] * (2 ** N)
|
||||
prob = p_bernoulli(int(n), int(a), t, p)
|
||||
probability[k] *= prob
|
||||
if np.sum(probability) == 0:
|
||||
print('Uh-oh')
|
||||
np.divide(probability, np.sum(probability), probability)
|
||||
|
||||
if flip < 0:
|
||||
np.copyto(base, probability)
|
||||
else:
|
||||
np.copyto(current, probability)
|
||||
|
||||
|
||||
# print(k, i, min_true_value, max_true_value)
|
||||
|
||||
# confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative
|
||||
# err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence
|
||||
# denom += 1
|
||||
# print(flip, k, err)
|
||||
# err /= denom
|
||||
# if flip < 0:
|
||||
# base[k] = probability
|
||||
# else:
|
||||
# current[k] = probability
|
||||
|
||||
if flip >= 0:
|
||||
if np.sum(current) == 0:
|
||||
continue
|
||||
np.divide(current, np.sum(current), current)
|
||||
|
||||
# print(current)
|
||||
# temp = np.roll(cumulative_probability, -1)
|
||||
# temp[-1] = 1.0
|
||||
# np.multiply(current, temp, current)
|
||||
# np.divide(current, np.sum(current), current)
|
||||
p_forward = 0
|
||||
p_backward = 0
|
||||
for i in range(1, N):
|
||||
p_forward += base[i] * current[i - 1]
|
||||
for i in range(0, N - 1):
|
||||
p_backward += base[i] * current[i + 1]
|
||||
scores[flip] += p_forward - p_backward
|
||||
|
||||
scale = 0.01
|
||||
if flip in flips:
|
||||
flip_likelihood[flip] += scale * p_backward
|
||||
flip_likelihood[flip] -= scale * p_forward
|
||||
else:
|
||||
flip_likelihood[flip] -= scale * p_backward
|
||||
flip_likelihood[flip] += scale * p_forward
|
||||
delta = p_forward - p_backward
|
||||
# print(flip, current, p_forward, p_backward)
|
||||
base_index = average_index(cumulative_probability)
|
||||
current_index = average_index(current)
|
||||
err = abs(1 - (base_index - current_index))
|
||||
# print(base_index, current_index, err)
|
||||
|
||||
# base_index = average_index(cumulative_probability)
|
||||
# new_index = average_index(current)
|
||||
# if isnan(new_index):
|
||||
# continue
|
||||
# np.divide(current, np.sum(current), current)
|
||||
# np.subtract(1, current, current)
|
||||
# print(flip,p_forward,p_backward,current)
|
||||
if use_flip < 0 or delta > lowest_err:
|
||||
use_flip = flip
|
||||
lowest_err = delta
|
||||
|
||||
# cumulative_deltas[flip] += 0
|
||||
|
||||
# for k in range(0, N - 1):
|
||||
# value = current[k] * cumulative_probability[k + 1]
|
||||
# if use_flip < 0 or value > lowest_err:
|
||||
# use_flip = flip
|
||||
# lowest_err = value
|
||||
# print(flip, highest_value)
|
||||
else:
|
||||
# p_next = np.zeros(N)
|
||||
# for i in range(0, N):
|
||||
# P = 0.0
|
||||
# for j in range(0, N):
|
||||
# if i == j:
|
||||
# continue
|
||||
# P += base[i] * (1 - base[j])
|
||||
# p_next[i] = P
|
||||
# base = p_next
|
||||
|
||||
# base[0] = 0
|
||||
np.divide(base, np.sum(base), base)
|
||||
max_base_index = np.argmax(base)
|
||||
bases.append((base.copy(), last_flip))
|
||||
# bases.insert(0, base.copy())
|
||||
# cumulative_probability = compute_cumulative_probability(N, bases)
|
||||
# p_forward = 0
|
||||
# p_backward = 0
|
||||
# for i in range(1, N):
|
||||
# p_forward += cumulative_probability[i] * base[i - 1]
|
||||
# for i in range(0, N - 1):
|
||||
# p_backward += cumulative_probability[i] * base[i + 1]
|
||||
print('Base', base)
|
||||
# # np.subtract(1, base, base)
|
||||
# # print(cumulative_probability)
|
||||
# shift_left = np.roll(cumulative_probability, -len(indic))
|
||||
# shift_left[-1] = 0.0
|
||||
# # # # print('Shift Left', p_forward, shift_left)
|
||||
# shift_right = np.roll(cumulative_probability, 1)
|
||||
# shift_right[0] = 0.0
|
||||
# # # # print('Shift Right', p_backward, shift_right)
|
||||
# p_next = np.add(np.multiply(shift_left, 0.5), np.multiply(shift_right, 0.5))
|
||||
# np.divide(p_next, np.sum(p_next), p_next)
|
||||
# # # # # print('Next', p_next)
|
||||
# # # # # # print(cumulative_probability)
|
||||
# # # # # # print(base)
|
||||
# np.multiply(base, p_next, cumulative_probability)
|
||||
# cumulative_probability[0] = 0
|
||||
# # # # # np.multiply(cumulative_probability, shift_right, cumulative_probability)
|
||||
# np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
|
||||
# cumulative_probability = compute_cumulative_probability(N, bases, flip_likelihood)
|
||||
# print('Cumulative', cumulative_probability)
|
||||
# print('Likelihood', flip_likelihood)
|
||||
|
||||
# cumulative_probability[0] = 0
|
||||
# use_flip = -1
|
||||
# if direction < 0:
|
||||
# use_flip = np.argmax(cumulative_deltas)
|
||||
# if cumulative_deltas[use_flip] < 0:
|
||||
# use_flip = np.argmin(cumulative_deltas)
|
||||
# direction = 1
|
||||
# # cumulative_deltas.fill(0)
|
||||
# else:
|
||||
# use_flip = np.argmin(cumulative_deltas)
|
||||
# if cumulative_deltas[use_flip] > 0:
|
||||
# use_flip = np.argmax(cumulative_deltas)
|
||||
# direction = -1
|
||||
# # cumulative_deltas.fill(0)
|
||||
# if direction < 0:
|
||||
# cumulative_probability[0] = 0
|
||||
# else:
|
||||
# cumulative_probability[-1] = 0
|
||||
# np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
|
||||
# print(cumulative_deltas)
|
||||
|
||||
# use_flip = -1
|
||||
# highest_p = 0
|
||||
# for i in range(0, N):
|
||||
# p = flip_likelihood[i]
|
||||
# if i in flips:
|
||||
# p = -p
|
||||
# if use_flip < 0 or p > highest_p:
|
||||
# use_flip = i
|
||||
# highest_p = p
|
||||
# if not use_flip in flips and highest_p < 0 or use_flip in flips and highest_p > 0:
|
||||
# flip_likelihood[use_flip] *= -1.0
|
||||
print(scores)
|
||||
|
||||
indices = sorted(range(len(scores)), key=lambda i: scores[i])[-(max_base_index + 1):]
|
||||
print(indices)
|
||||
|
||||
for flip in indices:
|
||||
scores[flip] *= -1.0
|
||||
if flip in flips:
|
||||
flips.remove(flip)
|
||||
else:
|
||||
flips.add(flip)
|
||||
for p in samples:
|
||||
if p.x[flip]:
|
||||
p.y ^= 1
|
||||
print(flips)
|
||||
|
||||
# if use_flip < 0:
|
||||
# return
|
||||
# last_flip = use_flip
|
||||
# if use_flip in flips:
|
||||
# flips.remove(use_flip)
|
||||
# else:
|
||||
# flips.add(use_flip)
|
||||
# print('Flip', use_flip, lowest_err)
|
||||
# print(flips)
|
||||
# cumulative_deltas[use_flip] = -cumulative_deltas[use_flip]
|
||||
# for p in samples:
|
||||
# if p.x[use_flip]:
|
||||
# p.y ^= 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
741
mutations26.py
Normal file
741
mutations26.py
Normal file
@ -0,0 +1,741 @@
|
||||
from cmath import isnan
|
||||
import numpy as np
|
||||
import random
|
||||
import hashlib
|
||||
import math
|
||||
|
||||
def get_state_id(state):
|
||||
return ','.join([str(x) for x in sorted(state)])
|
||||
|
||||
class Point():
|
||||
def __init__(self, x, y):
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def id(self):
|
||||
return ','.join([str(int(x)) for x in self.x])
|
||||
|
||||
class Influence():
|
||||
def __init__(self, a, b):
|
||||
self.a = a
|
||||
self.b = b
|
||||
self.original_dof = set()
|
||||
self.dof = set()
|
||||
for i in range(0, len(a.x)):
|
||||
if a.x[i] != b.x[i]:
|
||||
self.original_dof.add(i)
|
||||
self.dof.add(i)
|
||||
|
||||
def coherent(self):
|
||||
return self.a.y == self.b.y
|
||||
|
||||
def id(self):
|
||||
return ','.join(sorted([self.a.id(), self.b.id()]))
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(len(v) / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def decode(x, N):
|
||||
index = 0
|
||||
output = np.zeros((N))
|
||||
while x > 0 and index < N:
|
||||
output[index] = x & 0b1
|
||||
x >>= 1
|
||||
index += 1
|
||||
return output
|
||||
|
||||
def sha(v):
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def hamming_distance(a, b):
|
||||
return np.sum(np.logical_xor(a.x, b.x))
|
||||
|
||||
def random_x(N):
|
||||
x = np.zeros((N))
|
||||
for i in range(0, N):
|
||||
x[i] = random.randint(0, 1)
|
||||
return x
|
||||
|
||||
def xor_n(x, n):
|
||||
return sum(x[:n]) % 2
|
||||
|
||||
def create_dof_map(influences):
|
||||
dof_map = {}
|
||||
for influence in influences:
|
||||
for i in influence.dof:
|
||||
if not i in dof_map:
|
||||
dof_map[i] = []
|
||||
dof_map[i].append(influence)
|
||||
return dof_map
|
||||
|
||||
def flip(influences, i):
|
||||
for influence in influences:
|
||||
if i in influence.dof:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
|
||||
def remove_dof(dof_map, i, flip = False):
|
||||
for influence in dof_map[i]:
|
||||
influence.dof.remove(i)
|
||||
if flip:
|
||||
influence.a.y = int(influence.a.y) ^ 1
|
||||
# if len(influence.dof) == 0 and not influence.coherent():
|
||||
# raise Exception('Invalid')
|
||||
del dof_map[i]
|
||||
|
||||
def solve(dof_map, all_influences, all_samples):
|
||||
eliminated = True
|
||||
while eliminated:
|
||||
eliminated = False
|
||||
for influence in all_influences:
|
||||
if len(influence.dof) == 1:
|
||||
i = next(iter(influence.dof))
|
||||
if influence.coherent:
|
||||
remove_dof(dof_map, i)
|
||||
eliminated = True
|
||||
else:
|
||||
print('Forced', i)
|
||||
remove_dof(dof_map, i, True)
|
||||
eliminated = True
|
||||
|
||||
lowest_dof = None
|
||||
for influence in all_influences:
|
||||
if not influence.coherent and len(influence.dof) > 1:
|
||||
if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
|
||||
lowest_dof = influence
|
||||
|
||||
flip = None
|
||||
highest_score = -1
|
||||
|
||||
for i in lowest_dof.dof:
|
||||
per_point_scores = {}
|
||||
i_influences = dof_map[i]
|
||||
left = 0
|
||||
right = 0
|
||||
for influence in i_influences:
|
||||
if not influence.a in per_point_scores:
|
||||
per_point_scores[influence.a] = [0, 0]
|
||||
if not influence.b in per_point_scores:
|
||||
per_point_scores[influence.b] = [0, 0]
|
||||
if influence.coherent:
|
||||
per_point_scores[influence.a][0] += 1
|
||||
per_point_scores[influence.b][0] += 1
|
||||
left += 1
|
||||
else:
|
||||
per_point_scores[influence.a][1] += 1
|
||||
per_point_scores[influence.b][1] += 1
|
||||
right += 1
|
||||
print(i, left / (left + right))
|
||||
num = 0
|
||||
denom = 0
|
||||
for _, score in per_point_scores.items():
|
||||
if score[0] == score[1]:
|
||||
continue
|
||||
print(i, score)
|
||||
num += score[1] / (score[0] + score[1])
|
||||
denom += 1
|
||||
score = num / denom if denom > 0 else 0
|
||||
print(score)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# 1st row (n+1 choose k+1) * (1-(k mod 2))
|
||||
# psuedopascal to compute the follow-on rows
|
||||
# assuming solvability, we want to maximize the probability that our current state and our state with
|
||||
# a particular single flip are one order apart in the correct direction
|
||||
|
||||
|
||||
|
||||
# 2, 0
|
||||
# 2, 2, 0
|
||||
# 2, 4, 2, 0
|
||||
# 2, 6, 6, 2, 0
|
||||
# 2, 8,12, 8, 2, 0
|
||||
# 2,10,20,20,10, 2, 0
|
||||
|
||||
# 3,-9,19,-33,51,-73,99
|
||||
# 3,-6,10,-14,18,-22,26
|
||||
# 3,-3, 4, -4, 4, -4, 4
|
||||
# 3, 0, 1, 0, 0, 0, 0
|
||||
# 3, 3, 1, 1, 0, 0, 0
|
||||
# 3, 6, 4, 2, 1, 0, 0
|
||||
# 3, 9,10, 6, 3, 1, 0
|
||||
|
||||
# 4, 0, 4, 0
|
||||
# 4, 4, 4, 4, 0
|
||||
# 4, 8, 8, 8, 4, 0
|
||||
# 4,12,16,16,12, 4, 0
|
||||
|
||||
# 5, 0,10, 0, 1
|
||||
# 5, 5,10,10, 1, 1
|
||||
# 5,
|
||||
# 5,
|
||||
|
||||
|
||||
|
||||
# 3
|
||||
#
|
||||
# @1 [1, 2, 1]
|
||||
# @2 [2, 2, 0]
|
||||
# @3 [3, 0, 1]
|
||||
|
||||
# 5 [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)
|
||||
#
|
||||
# @1 [1, 4, 6, 4, 1], [4, 6, 4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),
|
||||
# @2 [2, 6, 6, 2, 0], [3, 4, 4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)
|
||||
# @3 [3, 6, 4, 2, 1], [2, 4, 6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)
|
||||
# @4 [4, 4, 4, 4, 0], [1, 6, 6, 1, 1] - 16, 15 -
|
||||
# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 -
|
||||
|
||||
# @0 [0.0, 0.0, 0.0, 0.0, 0.0]
|
||||
# @1 [0.2, 0.4, 0.6, 0.8, 1.0]
|
||||
# @2 [0.4, 0.6, 0.6, 0.4, 0.0]
|
||||
# @3 [0.6, 0.6, 0.4, 0.4, 1.0]
|
||||
# @4 [0.8, 0.4, 0.4, 0.8, 0.0]
|
||||
# @5 [1.0, 0.0, 1.0, 0.0, 1.0]
|
||||
|
||||
# 6
|
||||
#
|
||||
# @1 [1, 5, 10, 10, 5, 1]
|
||||
# @2 [2, 8, 12, 8, 2, 0]
|
||||
# @3 [3, 9, 10, 6, 3, 1]
|
||||
# @4 [4, 8, 8, 8, 4, 0]
|
||||
# @5 [5, 5, 10, 10, 1, 1]
|
||||
# @6 [6, 0, 20, 0, 6, 0]
|
||||
|
||||
# last row, 1 if odd, 0 if even
|
||||
# second to last, subtract 2 on odds, add 2 on evens
|
||||
|
||||
def compute_pseudopascal(N):
|
||||
dist = np.zeros((N, N))
|
||||
for j in range(0, N):
|
||||
dist[0][j] = math.comb(N - 1, j)
|
||||
dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
|
||||
for i in range(1, N):
|
||||
for j in range(0, i + 1):
|
||||
dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
|
||||
for k in range(i + 1, N):
|
||||
for j in reversed(range(0, k)):
|
||||
dist[i][j+1] = dist[i][j] + dist[i][j+1]
|
||||
return dist
|
||||
|
||||
def compute_distributions(N):
|
||||
dist = compute_pseudopascal(N)
|
||||
print(dist)
|
||||
for i in range(0, N):
|
||||
for j in range(0, N):
|
||||
denom = math.comb(N, j+1)
|
||||
dist[i][j] /= denom
|
||||
return dist
|
||||
|
||||
def confusion_probabilities(N, samples):
|
||||
sample_sizes = np.zeros(N)
|
||||
for i in range(0, len(samples)):
|
||||
a = samples[i]
|
||||
for j in range(0, len(samples)):
|
||||
b = samples[j]
|
||||
if i == j:
|
||||
continue
|
||||
distance = hamming_distance(a, b)
|
||||
sample_sizes[distance - 1] += 1
|
||||
|
||||
confusion = np.zeros((N, N))
|
||||
dist = compute_pseudopascal(N)
|
||||
np.multiply(dist, 2 ** N, dist)
|
||||
# These are the probabilities that we might mix up any two orders given a particular sample size
|
||||
for i in range(0, N):
|
||||
for j in range(0, N):
|
||||
probability = 1.0
|
||||
for k in range(0, N):
|
||||
full_size = math.comb(N, k+1) * (2 ** N)
|
||||
sample_size = sample_sizes[k]
|
||||
num_unknowns = full_size - sample_size
|
||||
i_incoherent = dist[i][k]
|
||||
# Worst case, we sample only the coherent points,
|
||||
i_min = max(i_incoherent - num_unknowns, 0) / full_size
|
||||
i_max = min(sample_size, i_incoherent) / full_size
|
||||
u = i_min + i_max / 2
|
||||
s = (i_max - i_min) / 2
|
||||
probability *= raised_cosine(dist[j][k] / full_size, u, s)
|
||||
confusion[i][j] = probability
|
||||
return confusion
|
||||
|
||||
def raised_cosine(x, u, s):
|
||||
if x < (u - s):
|
||||
return 0
|
||||
if x > (u + s):
|
||||
return 0
|
||||
return 1.0 / (2.0 * s) * (1 + math.cos(math.pi * (x - u) / s))
|
||||
|
||||
# Probability of getting k red balls after drawing n from a bag with m total balls and j red balls in it
|
||||
# (n choose k) * p^k * (1-p)^(n-k)
|
||||
|
||||
# p/m chance of getting a red ball
|
||||
# (1 - p/m) chance of not getting a red ball
|
||||
|
||||
# One way (p/m) * ((p-1)/(m-1)) * ((p-2)/(m-2))
|
||||
# (1 - (p/m))
|
||||
|
||||
def p_bernoulli(n, k, m, j):
|
||||
# probabilities = np.zeros((n + 1, n + 1))
|
||||
# probabilities.fill(-1)
|
||||
# # if n == k:
|
||||
# # return 1.0
|
||||
# # if k > p:
|
||||
# # return 0.0
|
||||
# stack = [(0,0)]
|
||||
# while len(stack) > 0:
|
||||
# (a, b) = stack.pop()
|
||||
# if a + b == n:
|
||||
# probabilities[a][b] = 1 if a == k else 0
|
||||
# elif a > j:
|
||||
# probabilities[a][b] = 0
|
||||
# elif b > (m - j):
|
||||
# probabilities[a][b] = 0
|
||||
# else:
|
||||
# p_left = probabilities[a + 1][b]
|
||||
# p_right = probabilities[a][b + 1]
|
||||
# if p_left >= 0 and p_right >= 0:
|
||||
# p = (j - a) / (m - a - b)
|
||||
# probabilities[a][b] = p_left * p + p_right * (1 - p)
|
||||
# else:
|
||||
# stack.append((a, b))
|
||||
# if p_left < 0:
|
||||
# stack.append((a + 1, b))
|
||||
# if p_right < 0:
|
||||
# stack.append((a, b + 1))
|
||||
# return probabilities[0][0]
|
||||
|
||||
p = j / m
|
||||
P = 1.0
|
||||
p_k = 0
|
||||
p_nk = 0
|
||||
for i in range(1, k + 1):
|
||||
P *= (n + 1 - i) / i
|
||||
while P > 1.0 and p_k < k:
|
||||
P *= p
|
||||
p_k += 1
|
||||
while P > 1.0 and p_nk < (n - k):
|
||||
P *= (1 - p)
|
||||
p_nk += 1
|
||||
while p_k < k:
|
||||
P *= p
|
||||
p_k += 1
|
||||
while (p_nk < (n - k)):
|
||||
P *= (1 - p)
|
||||
p_nk += 1
|
||||
return P
|
||||
|
||||
def average_index(x):
|
||||
total = 0
|
||||
for k in range(0, len(x)):
|
||||
total += k * x[k]
|
||||
return total / np.sum(x)
|
||||
|
||||
def compute_cumulative_probability(N, bases, p_n):
|
||||
# p_n = np.zeros(N)
|
||||
# p_n.fill(0.5)
|
||||
states = [[]]
|
||||
flips = set()
|
||||
for i in range(1, len(bases)):
|
||||
# (base, _) = bases[i]
|
||||
(_, flip) = bases[i]
|
||||
# p_forward = 0
|
||||
# p_backward = 0
|
||||
# for k in range(0, N - 1):
|
||||
# p_forward += base[k + 1] * next_p[k]
|
||||
# p_backward += base[k] * next_p[k + 1]
|
||||
if flip in flips:
|
||||
# p_n[flip] -= p_forward
|
||||
# p_n[flip] += p_backward
|
||||
flips.remove(flip)
|
||||
else:
|
||||
# p_n[flip] += p_forward
|
||||
# p_n[flip] -= p_backward
|
||||
flips.add(flip)
|
||||
states.append(flips.copy())
|
||||
# np.clip(p_n, 0, 1, p_n)
|
||||
# print('Contribution probabilities', p_n)
|
||||
|
||||
min_p_n = np.min(p_n)
|
||||
max_p_n = np.max(p_n)
|
||||
|
||||
|
||||
p_k = np.zeros(N)
|
||||
for k in range(0, N):
|
||||
stack = [(k, len(bases) - 1)]
|
||||
probabilities = np.zeros((N, len(bases)))
|
||||
probabilities.fill(-1)
|
||||
while len(stack) > 0:
|
||||
(i, base_index) = stack.pop()
|
||||
(base, flip) = bases[base_index]
|
||||
if base_index == 0:
|
||||
probabilities[i, 0] = base[i]
|
||||
else:
|
||||
left = i - 1
|
||||
right = i + 1
|
||||
state = states[base_index - 1]
|
||||
p_flip = max(min(p_n[flip] + 0.5, 1.0), 0)
|
||||
if flip in state:
|
||||
p_flip = 1 - p_flip
|
||||
p_left = probabilities[left, base_index - 1] if left >= 0 else 0
|
||||
p_right = probabilities[right, base_index - 1] if right < N else 0
|
||||
if p_left >= 0 and p_right >= 0:
|
||||
probabilities[i, base_index] = base[i] * p_left * (1 - p_flip) + base[i] * p_right * p_flip
|
||||
else:
|
||||
stack.append((i, base_index))
|
||||
if p_left < 0:
|
||||
stack.append((left, base_index - 1))
|
||||
if p_right < 0:
|
||||
stack.append((right, base_index - 1))
|
||||
p_k[k] = probabilities[k][-1]
|
||||
np.divide(p_k, np.sum(p_k), p_k)
|
||||
return p_k
|
||||
|
||||
# 8, 32, 2^5
|
||||
# 10, 64, 2^6
|
||||
# 12, 128, 2^7
|
||||
# 14, 256, 2^8
|
||||
# 16, 512, 2^9
|
||||
# 18, 1024, 2^10
|
||||
# 20, 2048, 2^11
|
||||
# 22, 4096, 2^12
|
||||
def main():
|
||||
N = 16
|
||||
sample_size = 32
|
||||
e_bits = 2
|
||||
sample_ids = set()
|
||||
samples = []
|
||||
|
||||
dist = compute_pseudopascal(N)
|
||||
print(dist)
|
||||
|
||||
for i in range(0, sample_size):
|
||||
x = random_x(N)
|
||||
y = int(xor_n(x, e_bits))
|
||||
p = Point(x, y)
|
||||
p_id = p.id()
|
||||
if p_id in sample_ids:
|
||||
continue
|
||||
sample_ids.add(p_id)
|
||||
samples.append(p)
|
||||
|
||||
chords = [{} for _ in range(0, len(samples))]
|
||||
for i in range(0, len(samples)):
|
||||
a = samples[i]
|
||||
for j in range(i + 1, len(samples)):
|
||||
b = samples[j]
|
||||
distance = hamming_distance(a, b)
|
||||
if distance not in chords[i]:
|
||||
chords[i][distance] = []
|
||||
chords[i][distance].append(j)
|
||||
if distance not in chords[j]:
|
||||
chords[j][distance] = []
|
||||
chords[j][distance].append(i)
|
||||
|
||||
probability = np.zeros((N, N))
|
||||
scalars = np.ones(N)
|
||||
for i in range(0, len(samples)):
|
||||
origin = samples[i]
|
||||
for (distance, points) in chords[i].items():
|
||||
n = len(points)
|
||||
t = math.comb(N, distance)
|
||||
a = sum([0 if origin.y == samples[index].y else 1 for index in points])
|
||||
for k in range(1, N - 1):
|
||||
p = dist[k][distance - 1]
|
||||
prob_at_k = p_bernoulli(n, a, t, p)
|
||||
for flip in range(0, N):
|
||||
a_flip = sum([0 if origin.y == samples[index].y and origin.x[flip] == samples[index].x[flip] or origin.y != samples[index].y and origin.x[flip] != samples[index].x[flip] else 1 for index in points])
|
||||
p_forward = dist[k - 1][distance - 1]
|
||||
p_backward = dist[k + 1][distance - 1]
|
||||
prob_at_k_forward = p_bernoulli(n, a_flip, t, p_forward)
|
||||
prob_at_k_backward = p_bernoulli(n, a_flip, t, p_backward)
|
||||
# prob_at_k_backward = 0
|
||||
probability[k][flip] += (n / t) * prob_at_k * (prob_at_k_forward - prob_at_k_backward)
|
||||
# probability[k][flip] *= prob_at_k * prob_at_k_forward
|
||||
# scalars[k] *= np.max(probability[k])
|
||||
# np.divide(probability[k], np.max(probability[k]), probability[k])
|
||||
|
||||
# print(scalars)
|
||||
print(probability)
|
||||
return
|
||||
|
||||
coherent_distances = np.zeros(N + 1)
|
||||
incoherent_distances = np.zeros(N + 1)
|
||||
total_distances = np.zeros(N + 1)
|
||||
for i in range(0, len(samples)):
|
||||
coherent_distances.fill(0)
|
||||
incoherent_distances.fill(0)
|
||||
total_distances.fill(0)
|
||||
a = samples[i]
|
||||
for j in range(0, len(samples)):
|
||||
b = samples[j]
|
||||
distance = hamming_distance(a, b)
|
||||
is_coherent = a.y == b.y
|
||||
total_distances[distance] += 1
|
||||
if is_coherent:
|
||||
coherent_distances[distance] += 1
|
||||
else:
|
||||
incoherent_distances[distance] += 1
|
||||
print(total_distances)
|
||||
print(incoherent_distances)
|
||||
print()
|
||||
for d in range(1, N + 1):
|
||||
n = coherent_distances[d] + incoherent_distances[d]
|
||||
if n == 0:
|
||||
continue
|
||||
local_probability = np.ones(N)
|
||||
for k in range(0, N):
|
||||
a = incoherent_distances[d]
|
||||
t = math.comb(N, d)
|
||||
p = dist[k][d - 1]
|
||||
prob = p_bernoulli(int(n), int(a), t, p)
|
||||
local_probability[k] = prob
|
||||
probability[i][k] *= prob
|
||||
print(local_probability)
|
||||
np.divide(probability[i], np.sum(probability[i]), probability[i])
|
||||
print()
|
||||
print(probability)
|
||||
total_probability = np.ones(N)
|
||||
for i in range(0, len(samples)):
|
||||
np.multiply(probability[i], total_probability, total_probability)
|
||||
np.divide(total_probability, np.sum(total_probability), total_probability)
|
||||
print(total_probability)
|
||||
|
||||
return
|
||||
|
||||
|
||||
# confusion = confusion_probabilities(N, samples)
|
||||
# print(confusion)
|
||||
# return
|
||||
|
||||
# for i in range(0, 2**N):
|
||||
# x = decode(i, N)
|
||||
# y = int(xor(x))
|
||||
# samples.append(Point(x,y))
|
||||
|
||||
base = np.zeros(N)
|
||||
current = np.zeros(N)
|
||||
cumulative_probability = np.ones(N)
|
||||
flip_likelihood = np.zeros(N)
|
||||
cumulative_deltas = np.zeros(N)
|
||||
direction = -1
|
||||
flips = set()
|
||||
bases = []
|
||||
last_flip = -1
|
||||
|
||||
for _ in range(0, 2 ** N):
|
||||
lowest_err = -1
|
||||
use_flip = -1
|
||||
for flip in range(-1, N):
|
||||
coherent_distances = np.zeros(len(samples), N+1)
|
||||
incoherent_distances = np.zeros(N+1)
|
||||
all_coherent = True
|
||||
for i in range(0, len(samples)):
|
||||
a = samples[i]
|
||||
for j in range(0, len(samples)):
|
||||
b = samples[j]
|
||||
distance = hamming_distance(a, b)
|
||||
is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)
|
||||
if is_coherent:
|
||||
coherent_distances[distance] += 1
|
||||
else:
|
||||
incoherent_distances[distance] += 1
|
||||
all_coherent = False
|
||||
if all_coherent:
|
||||
print('Flip and halt', flip)
|
||||
return
|
||||
# print(coherent_distances, incoherent_distances)
|
||||
|
||||
# print(coherent_distances, incoherent_distances)
|
||||
# est_incoherence = np.divide(incoherent_distances, np.add(coherent_distances, incoherent_distances))
|
||||
# print(est_incoherence)
|
||||
|
||||
probability = np.ones(N)
|
||||
# np.divide(probability, np.sum(probability), probability)
|
||||
for j in range(1, N + 1):
|
||||
n = coherent_distances[j] + incoherent_distances[j]
|
||||
if n == 0:
|
||||
continue
|
||||
for k in range(0, N):
|
||||
a = incoherent_distances[j]
|
||||
t = math.comb(N, j) * (2 ** N)
|
||||
p = dist[k][j - 1] * (2 ** N)
|
||||
prob = p_bernoulli(int(n), int(a), t, p)
|
||||
probability[k] *= prob
|
||||
np.divide(probability, np.sum(probability), probability)
|
||||
|
||||
if flip < 0:
|
||||
np.copyto(base, probability)
|
||||
else:
|
||||
np.copyto(current, probability)
|
||||
|
||||
|
||||
# print(k, i, min_true_value, max_true_value)
|
||||
|
||||
# confidence = (coherent_distances[i] + incoherent_distances[i]) / math.comb(N, i) # probability that the sample is representative
|
||||
# err += abs(est_incoherence[i] - known_incoherence_at_k[i-1]) * confidence
|
||||
# denom += 1
|
||||
# print(flip, k, err)
|
||||
# err /= denom
|
||||
# if flip < 0:
|
||||
# base[k] = probability
|
||||
# else:
|
||||
# current[k] = probability
|
||||
|
||||
if flip >= 0:
|
||||
if np.sum(current) == 0:
|
||||
continue
|
||||
np.divide(current, np.sum(current), current)
|
||||
# print(current)
|
||||
# temp = np.roll(cumulative_probability, -1)
|
||||
# temp[-1] = 1.0
|
||||
# np.multiply(current, temp, current)
|
||||
# np.divide(current, np.sum(current), current)
|
||||
p_forward = 0
|
||||
p_backward = 0
|
||||
for i in range(1, N):
|
||||
p_forward += base[i] * current[i - 1]
|
||||
for i in range(0, N - 1):
|
||||
p_backward += base[i] * current[i + 1]
|
||||
scale = 0.01
|
||||
if flip in flips:
|
||||
flip_likelihood[flip] += scale * p_backward
|
||||
flip_likelihood[flip] -= scale * p_forward
|
||||
else:
|
||||
flip_likelihood[flip] -= scale * p_backward
|
||||
flip_likelihood[flip] += scale * p_forward
|
||||
delta = p_forward - p_backward
|
||||
print(flip, current, p_forward, p_backward)
|
||||
base_index = average_index(base)
|
||||
current_index = average_index(current)
|
||||
err = abs(1 - (base_index - current_index))
|
||||
print(base_index, current_index, err)
|
||||
|
||||
# base_index = average_index(cumulative_probability)
|
||||
# new_index = average_index(current)
|
||||
# if isnan(new_index):
|
||||
# continue
|
||||
# np.divide(current, np.sum(current), current)
|
||||
# np.subtract(1, current, current)
|
||||
# print(flip,p_forward,p_backward,current)
|
||||
if delta > 0 and (use_flip < 0 or delta > lowest_err):
|
||||
use_flip = flip
|
||||
lowest_err = delta
|
||||
|
||||
# cumulative_deltas[flip] += 0
|
||||
|
||||
# for k in range(0, N - 1):
|
||||
# value = current[k] * cumulative_probability[k + 1]
|
||||
# if use_flip < 0 or value > lowest_err:
|
||||
# use_flip = flip
|
||||
# lowest_err = value
|
||||
# print(flip, highest_value)
|
||||
else:
|
||||
# p_next = np.zeros(N)
|
||||
# for i in range(0, N):
|
||||
# P = 0.0
|
||||
# for j in range(0, N):
|
||||
# if i == j:
|
||||
# continue
|
||||
# P += base[i] * (1 - base[j])
|
||||
# p_next[i] = P
|
||||
# base = p_next
|
||||
|
||||
# base[0] = 0
|
||||
np.divide(base, np.sum(base), base)
|
||||
bases.append((base.copy(), last_flip))
|
||||
# bases.insert(0, base.copy())
|
||||
# cumulative_probability = compute_cumulative_probability(N, bases)
|
||||
# p_forward = 0
|
||||
# p_backward = 0
|
||||
# for i in range(1, N):
|
||||
# p_forward += cumulative_probability[i] * base[i - 1]
|
||||
# for i in range(0, N - 1):
|
||||
# p_backward += cumulative_probability[i] * base[i + 1]
|
||||
print('Base', base)
|
||||
# # # np.subtract(1, base, base)
|
||||
# # # print(cumulative_probability)
|
||||
# shift_left = np.roll(cumulative_probability, -1)
|
||||
# shift_left[-1] = 0.0
|
||||
# # # # print('Shift Left', p_forward, shift_left)
|
||||
# shift_right = np.roll(cumulative_probability, 1)
|
||||
# shift_right[0] = 0.0
|
||||
# # # # print('Shift Right', p_backward, shift_right)
|
||||
# p_next = np.add(np.multiply(shift_left, 0.5), np.multiply(shift_right, 0.5))
|
||||
# p_next[0] = 0
|
||||
# np.divide(p_next, np.sum(p_next), p_next)
|
||||
# # # # print('Next', p_next)
|
||||
# # # # # print(cumulative_probability)
|
||||
# # # # # print(base)
|
||||
# np.multiply(base, p_next, cumulative_probability)
|
||||
# cumulative_probability[0] = 0
|
||||
# # # # # np.multiply(cumulative_probability, shift_right, cumulative_probability)
|
||||
# np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
|
||||
cumulative_probability = compute_cumulative_probability(N, bases, flip_likelihood)
|
||||
print('Cumulative', cumulative_probability)
|
||||
print('Likelihood', flip_likelihood)
|
||||
|
||||
# cumulative_probability[0] = 0
|
||||
# use_flip = -1
|
||||
# if direction < 0:
|
||||
# use_flip = np.argmax(cumulative_deltas)
|
||||
# if cumulative_deltas[use_flip] < 0:
|
||||
# use_flip = np.argmin(cumulative_deltas)
|
||||
# direction = 1
|
||||
# # cumulative_deltas.fill(0)
|
||||
# else:
|
||||
# use_flip = np.argmin(cumulative_deltas)
|
||||
# if cumulative_deltas[use_flip] > 0:
|
||||
# use_flip = np.argmax(cumulative_deltas)
|
||||
# direction = -1
|
||||
# # cumulative_deltas.fill(0)
|
||||
# if direction < 0:
|
||||
# cumulative_probability[0] = 0
|
||||
# else:
|
||||
# cumulative_probability[-1] = 0
|
||||
# np.divide(cumulative_probability, np.sum(cumulative_probability), cumulative_probability)
|
||||
# print(cumulative_deltas)
|
||||
|
||||
# use_flip = -1
|
||||
# highest_p = 0
|
||||
# for i in range(0, N):
|
||||
# p = flip_likelihood[i]
|
||||
# if i in flips:
|
||||
# p = -p
|
||||
# if use_flip < 0 or p > highest_p:
|
||||
# use_flip = i
|
||||
# highest_p = p
|
||||
# if not use_flip in flips and highest_p < 0 or use_flip in flips and highest_p > 0:
|
||||
# flip_likelihood[use_flip] *= -1.0
|
||||
|
||||
if use_flip < 0:
|
||||
return
|
||||
last_flip = use_flip
|
||||
if use_flip in flips:
|
||||
flips.remove(use_flip)
|
||||
else:
|
||||
flips.add(use_flip)
|
||||
print('Flip', use_flip, lowest_err)
|
||||
print(flips)
|
||||
cumulative_deltas[use_flip] = -cumulative_deltas[use_flip]
|
||||
for p in samples:
|
||||
if p.x[use_flip]:
|
||||
p.y ^= 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
541
mutations3.py
Normal file
541
mutations3.py
Normal file
@ -0,0 +1,541 @@
|
||||
import hashlib
|
||||
import math
|
||||
from matplotlib import offsetbox
|
||||
import numpy as np
|
||||
import random
|
||||
from struct import pack, pack_into, unpack_from
|
||||
import secrets
|
||||
|
||||
from numpy import hamming
|
||||
|
||||
N = 32
|
||||
M = 2
|
||||
|
||||
def bit_at_index(buffer, index):
|
||||
offset = (index >> 3) % len(buffer)
|
||||
return buffer[offset] & (1 << (index & 0b111)) != 0
|
||||
|
||||
def count_one_bits(n):
|
||||
return bin(n).count("1")
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def encode_f(f, buffer, offset=0):
|
||||
(inverted, flips, child) = f
|
||||
pack_into('I', buffer, offset, inverted)
|
||||
offset += 4
|
||||
for index in flips:
|
||||
pack_into('I', buffer, offset, 0)
|
||||
offset += 4
|
||||
pack_into('I', buffer, offset, index)
|
||||
offset += 4
|
||||
if child is None:
|
||||
pack_into('I', buffer, offset, 1)
|
||||
offset += 4
|
||||
return offset
|
||||
(inverted, left, right) = child
|
||||
pack_into('I', buffer, offset, 2 if not inverted else 3)
|
||||
offset += 4
|
||||
offset = encode_f(left, buffer, offset)
|
||||
offset = encode_f(right, buffer, offset)
|
||||
return offset
|
||||
|
||||
def generate_random_branch(p_mutation):
|
||||
global N
|
||||
|
||||
p_add_indices = p_mutation * random.random()
|
||||
p_add_children = p_mutation * random.random()
|
||||
|
||||
inverted = random.randint(0, 1)
|
||||
indices = set()
|
||||
children = []
|
||||
|
||||
# randomly add indices
|
||||
while random.random() < p_add_indices and len(indices) < N:
|
||||
available_indices = [i for i in range(0, N) if i not in indices]
|
||||
if len(available_indices) == 1:
|
||||
indices.add(available_indices[0])
|
||||
continue
|
||||
indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
|
||||
# randomly add children
|
||||
while random.random() < p_add_children:
|
||||
child_inverted = random.randint(0, 1)
|
||||
left = generate_random_branch(p_add_children)
|
||||
right = generate_random_branch(p_add_children)
|
||||
children.append((child_inverted, left, right))
|
||||
return (inverted, indices, children)
|
||||
|
||||
def mutate_f(f, p_mutation):
|
||||
global N
|
||||
(inverted, indices, children) = f
|
||||
mutated_indices = set(indices)
|
||||
mutated_children = children[:]
|
||||
|
||||
p_invert = p_mutation * random.random()
|
||||
p_drop_indices = p_mutation * random.random()
|
||||
p_add_indices = p_mutation * random.random()
|
||||
p_drop_children = p_mutation * random.random()
|
||||
p_mutate_child = p_mutation * random.random()
|
||||
p_clone_child = p_mutation * random.random()
|
||||
p_invert_child = p_mutation * random.random()
|
||||
p_add_children = p_mutation * random.random()
|
||||
|
||||
# randomly invert
|
||||
if random.random() < p_invert:
|
||||
inverted ^= 1
|
||||
# randomly drop indices
|
||||
while random.random() < p_drop_indices and len(mutated_indices) > 0:
|
||||
mutated_indices.pop()
|
||||
# randomly add indices
|
||||
while random.random() < p_add_indices and len(mutated_indices) < N:
|
||||
available_indices = [i for i in range(0, N) if i not in mutated_indices]
|
||||
if len(available_indices) == 1:
|
||||
mutated_indices.add(available_indices[0])
|
||||
continue
|
||||
mutated_indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
|
||||
# randomly drop children
|
||||
while random.random() < p_drop_children and len(mutated_children) > 0:
|
||||
if len(mutated_children) == 1:
|
||||
del mutated_children[0]
|
||||
break
|
||||
del mutated_children[random.randint(0, len(mutated_children) - 1)]
|
||||
# randomly clone children
|
||||
while random.random() < p_clone_child and len(mutated_children) > 0:
|
||||
index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
|
||||
(child_inverted, left, right) = mutated_children[index]
|
||||
if random.random() < p_invert_child:
|
||||
child_inverted ^= 1
|
||||
clone = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
|
||||
mutated_children.append(clone)
|
||||
# randomly mutate children
|
||||
while random.random() < p_mutate_child and len(mutated_children) > 0:
|
||||
index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
|
||||
(child_inverted, left, right) = mutated_children[index]
|
||||
if random.random() < p_invert_child:
|
||||
child_inverted ^= 1
|
||||
mutated_children[index] = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
|
||||
# randomly add children
|
||||
while random.random() < p_add_children:
|
||||
child_inverted = random.randint(0, 1)
|
||||
left = generate_random_branch(p_mutation)
|
||||
right = generate_random_branch(p_mutation)
|
||||
mutated_children.append((child_inverted, left, right))
|
||||
return (inverted, mutated_indices, mutated_children)
|
||||
|
||||
def generate_program(model, output_var='output'):
|
||||
global N, M
|
||||
(constant, indices, child) = model
|
||||
|
||||
statement = 'multiply(' + np.array2string(indices, separator=',') + ', x, temp)\n\t'
|
||||
statement += output_var + '=' + str(constant) + '+sum(temp)\n\t'
|
||||
|
||||
if not child is None:
|
||||
left_output = output_var + '0'
|
||||
right_output = output_var + '1'
|
||||
(left, right) = child
|
||||
statement += generate_program(left, left_output)
|
||||
statement += generate_program(right, right_output)
|
||||
statement += output_var + '+=' + left_output + '*' + right_output + '\n\t'
|
||||
statement += output_var + '%=' + str(M) + '\n\t'
|
||||
return statement
|
||||
|
||||
def compile(model):
|
||||
program = 'def f(x, temp):\n\t' + generate_program(model) + 'return output'
|
||||
scope = {'multiply': np.multiply, 'sum': np.sum}
|
||||
exec(program, scope)
|
||||
return scope['f']
|
||||
|
||||
def evaluate(model, x, value = 0):
|
||||
(inverted, indices, children) = model
|
||||
for i in indices:
|
||||
if bit_at_index(x, i) != 0:
|
||||
value ^= 1
|
||||
for child in children:
|
||||
(child_inverted, left, right) = child
|
||||
left = evaluate(left, x)
|
||||
right = evaluate(right, x)
|
||||
if left & right != child_inverted:
|
||||
value ^= 1
|
||||
if inverted:
|
||||
value ^= 1
|
||||
return value
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(N / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(x)
|
||||
|
||||
def sha(v):
|
||||
global M
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] % M
|
||||
|
||||
def xor(x):
|
||||
num_one_bits = 0
|
||||
for n in x:
|
||||
num_one_bits += count_one_bits(n)
|
||||
return num_one_bits % 2
|
||||
|
||||
def random_sample(m, n):
|
||||
inputs = np.zeros((m, n))
|
||||
for i in range(0, m):
|
||||
for j in range(0, n):
|
||||
inputs[i][j] = random.randint(0, 1)
|
||||
return inputs
|
||||
|
||||
def update_sample(sample, index):
|
||||
global N
|
||||
for j in range(0, N):
|
||||
sample[index][j] = random.randint(0, 1)
|
||||
|
||||
def coherence(inputs, outputs, scratch):
|
||||
coherences = []
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
y_a = outputs[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = inputs[j]
|
||||
y_b = outputs[j]
|
||||
distance = hamming_distance(x_a, x_b, scratch)
|
||||
weight = 1.0 / (2 ** distance)
|
||||
denominator += weight
|
||||
if y_a == y_b:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def build_coherence_models(inputs, scratch):
|
||||
coherence_models = []
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
distances = [hamming_distance(x_a, inputs[j], scratch) for j in range(0, len(inputs))]
|
||||
indices = sorted(range(len(distances)), key=lambda i: distances[i])
|
||||
lowest = -1
|
||||
denominator = 0
|
||||
components = []
|
||||
for index in range(0, len(indices)):
|
||||
j = indices[index]
|
||||
if distances[j] == 0:
|
||||
continue
|
||||
if lowest < 0:
|
||||
lowest = distances[j]
|
||||
distance = distances[j] - lowest
|
||||
if distance >= 8:
|
||||
break
|
||||
weight = 2 ** -distance
|
||||
denominator += weight
|
||||
components.append((weight, j))
|
||||
coherence_models.append((denominator, components))
|
||||
return coherence_models
|
||||
|
||||
def fast_coherence(coherence_models, outputs):
|
||||
coherences = []
|
||||
for i in range(0, len(coherence_models)):
|
||||
(denominator, components) = coherence_models[i]
|
||||
numerator = 0
|
||||
for component in components:
|
||||
(weight, j) = component
|
||||
if outputs[i] == outputs[j]:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def score(f, sample, distances):
|
||||
return coherence([(x, f(x) ^ y) for (x, y) in sample], distances)
|
||||
|
||||
def compute_distances(inputs, distances, scratch):
|
||||
for i in range(0, len(inputs)):
|
||||
a = inputs[i]
|
||||
for j in range(i, len(inputs)):
|
||||
if i == j:
|
||||
distances[i][j] = 0
|
||||
continue
|
||||
b = inputs[j]
|
||||
distance = 2 ** -hamming_distance(a, b, scratch)
|
||||
distances[i][j] = distance
|
||||
distances[j][i] = distance
|
||||
|
||||
def update_distances(inputs, distances, i, scratch):
|
||||
a = inputs[i]
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
distances[i][j] = 0
|
||||
continue
|
||||
b = inputs[j]
|
||||
distance = 2 ** -hamming_distance(a, b, scratch)
|
||||
distances[i][j] = distance
|
||||
distances[j][i] = distance
|
||||
|
||||
def clone_model(model, p_mutation):
|
||||
global N, M
|
||||
|
||||
clone = model[:]
|
||||
p_insert_node = p_mutation
|
||||
|
||||
i = 0
|
||||
while i < len(clone):
|
||||
(bias, op, indices, (p_modify, p_bias, p_index, p_insert)) = clone[i]
|
||||
|
||||
# if random.random() < p_modify:
|
||||
# p_modify += 0.01
|
||||
p_add_index = p_index
|
||||
indices = indices.copy()
|
||||
if random.random() < p_bias:
|
||||
p_bias += 0.001
|
||||
bias += random.randint(0, M - 1)
|
||||
bias %= M
|
||||
else:
|
||||
p_bias -= 0.001
|
||||
for absolute_index in range(0, N + i):
|
||||
relative_index = N - absolute_index - 1
|
||||
if random.random() < p_add_index:
|
||||
p_index += 0.001
|
||||
if relative_index in indices:
|
||||
indices.remove(relative_index)
|
||||
else:
|
||||
indices.add(relative_index)
|
||||
else:
|
||||
p_index -= 0.001
|
||||
# else:
|
||||
# p_modify -= 0.01
|
||||
|
||||
if random.random() < p_insert:
|
||||
p_insert += 0.001
|
||||
clone.insert(i, random_node(i, p_mutation))
|
||||
for j in range(i + 1, len(clone)):
|
||||
(bias, op, indices, p) = clone[j]
|
||||
modified_indices = set()
|
||||
for index in indices:
|
||||
if index >= 0:
|
||||
modified_indices.add(index)
|
||||
continue
|
||||
absolute_index = j + index
|
||||
if absolute_index == i:
|
||||
if random.random() > 0.5:
|
||||
modified_indices.add(index)
|
||||
else:
|
||||
modified_indices.add(index - 1)
|
||||
continue
|
||||
if absolute_index < i:
|
||||
modified_indices.add(index - 1)
|
||||
else:
|
||||
modified_indices.add(index)
|
||||
clone[j] = (bias, op, modified_indices, p)
|
||||
i += 1
|
||||
else:
|
||||
p_insert -= 0.001
|
||||
|
||||
p_modify = min(max(0.001, p_modify), 0.999)
|
||||
p_bias = min(max(0.001, p_bias), 0.999)
|
||||
p_index = min(max(0.001, p_index), 0.999)
|
||||
p_insert = min(max(0.001, p_insert), 0.999)
|
||||
clone[i] = (bias, op, indices, (p_modify, p_bias, p_index, p_insert))
|
||||
i += 1
|
||||
|
||||
if random.random() < p_insert_node:
|
||||
i = len(clone)
|
||||
clone.insert(i, random_node(i, p_mutation))
|
||||
for j in range(i + 1, len(clone)):
|
||||
(bias, op, indices, p) = clone[j]
|
||||
modified_indices = set()
|
||||
for index in indices:
|
||||
if index < N:
|
||||
modified_indices.add(index)
|
||||
continue
|
||||
shifted_index = index - N
|
||||
if shifted_index == i:
|
||||
if random.randint(0, 1) == 0:
|
||||
modified_indices.add(index)
|
||||
else:
|
||||
modified_indices.add(index + 1)
|
||||
if shifted_index > i:
|
||||
modified_indices.add(index + 1)
|
||||
else:
|
||||
modified_indices.add(index)
|
||||
clone[j] = (bias, op, modified_indices, p)
|
||||
return clone
|
||||
|
||||
def random_node(i, p_mutation):
|
||||
global N, M
|
||||
bias = random.randint(0, M - 1)
|
||||
op = random.randint(0, 1)
|
||||
p_modify = 0.5
|
||||
p_bias = 0.01
|
||||
p_index = 0.5
|
||||
p_insert = 0.01
|
||||
max_index = N + i - 1
|
||||
indices = set()
|
||||
indices.add(N - 1 - random.randint(0, max_index))
|
||||
|
||||
for index in range(0, max_index + 1):
|
||||
if random.random() < p_index:
|
||||
indices.add(N - 1 - index)
|
||||
return (bias, op, indices, (p_modify, p_bias, p_index, p_insert))
|
||||
|
||||
def null_candidate():
|
||||
global N
|
||||
return []
|
||||
|
||||
def eval_model(model, buffer, x):
|
||||
global N, M
|
||||
for i in range(0, len(model)):
|
||||
(bias, op, indices, _) = model[i]
|
||||
value = op
|
||||
for index in indices:
|
||||
if op == 1:
|
||||
value *= x[index] if index >= 0 else buffer[i + index]
|
||||
value %= M
|
||||
else:
|
||||
value += x[index] if index >= 0 else buffer[i + index]
|
||||
value %= M
|
||||
value += bias
|
||||
value %= M
|
||||
if i == len(model) - 1:
|
||||
return value
|
||||
else:
|
||||
buffer[i] = value
|
||||
return 0
|
||||
|
||||
def size(model):
|
||||
return len(model)
|
||||
|
||||
def main():
|
||||
global N, M
|
||||
epochs = 10000
|
||||
num_survivors = 10
|
||||
num_offspring = 10
|
||||
num_candidates = num_survivors + num_survivors * num_offspring
|
||||
sample_size = 64
|
||||
eval_size = 100
|
||||
max_nodes = 65536
|
||||
p_mutation = 0.5
|
||||
g = sha
|
||||
current_generation = [null_candidate() for _ in range(0, num_candidates)]
|
||||
|
||||
distances = np.zeros((sample_size, sample_size))
|
||||
output_equality = np.zeros((sample_size, sample_size))
|
||||
inputs = random_sample(sample_size, N)
|
||||
scratch = np.zeros(N,)
|
||||
# compute_distances(inputs, distances, scratch)
|
||||
expected_outputs = np.zeros((sample_size,))
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
outputs = np.zeros((sample_size,))
|
||||
output_xor = np.zeros((sample_size,))
|
||||
ones = np.ones((sample_size,))
|
||||
numerators = np.zeros((sample_size,))
|
||||
denominators = np.zeros((sample_size,))
|
||||
coherences = np.zeros((sample_size,))
|
||||
np.matmul(ones, distances, denominators)
|
||||
scores = np.zeros((num_candidates,))
|
||||
eval_buffer = np.zeros((max_nodes,))
|
||||
max_score = 0
|
||||
last_score = 0
|
||||
streak = 0
|
||||
|
||||
coherence_models = build_coherence_models(inputs, scratch)
|
||||
|
||||
for epoch in range(0, epochs):
|
||||
for i in range(0, num_candidates):
|
||||
candidate = current_generation[i]
|
||||
for j in range(0, sample_size):
|
||||
outputs[j] = eval_model(candidate, eval_buffer, inputs[j])
|
||||
np.subtract(outputs, expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
# for p in range(0, sample_size):
|
||||
# for q in range(0, sample_size):
|
||||
# m = int(output_xor[p])
|
||||
# n = int(output_xor[q])
|
||||
# distance = abs(m - n)
|
||||
# if distance > M / 2:
|
||||
# distance = M - distance
|
||||
# distance /= (M / 2)
|
||||
# distance **= 2
|
||||
# output_equality[p][q] = distance
|
||||
# # output_equality[p][q] = 1 if m == n else 0
|
||||
# np.multiply(output_equality, distances, output_equality)
|
||||
# np.matmul(ones, output_equality, numerators)
|
||||
# np.divide(numerators, denominators, coherences)
|
||||
# score = np.average(coherences)
|
||||
score = fast_coherence(coherence_models, output_xor)
|
||||
# if random.random() < 0.1:
|
||||
# check = coherence(inputs, output_xor, scratch)
|
||||
# if check - score > 1e-3:
|
||||
# print('not equal')
|
||||
scores[i] = score
|
||||
|
||||
top_n = sorted(range(len(scores)), key=lambda i: (scores[i], -size(current_generation[i])))[-num_survivors:]
|
||||
survivors = [current_generation[index] for index in top_n]
|
||||
|
||||
# f = lambda x: evaluate(current_generation[0], x)
|
||||
# correct = 0
|
||||
# for i in range(0, eval_size):
|
||||
# x = random_input()
|
||||
# if f(x) == g(x):
|
||||
# correct += 1
|
||||
|
||||
top_score = scores[top_n[-1]]
|
||||
print(epoch, top_score, size(survivors[-1]))
|
||||
if top_score <= max_score:
|
||||
p_mutation += 0.001
|
||||
else:
|
||||
p_mutation = 0.5
|
||||
max_score = top_score
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
current_generation[i] = survivors[i]
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
candidate = survivors[i]
|
||||
for j in range(0, num_offspring):
|
||||
index = num_survivors + j * num_survivors + i
|
||||
current_generation[index] = clone_model(candidate, random.random())
|
||||
|
||||
# inputs = random_sample(sample_size, N)
|
||||
# coherence_models = build_coherence_models(inputs, scratch)
|
||||
# for i in range(0, sample_size):
|
||||
# expected_outputs[i] = g(inputs[i])
|
||||
|
||||
|
||||
# # while random.random() < 0.5:
|
||||
# if last_score == top_score:
|
||||
# streak += 1
|
||||
# else:
|
||||
# streak = 0
|
||||
# if streak >= 4:
|
||||
# inputs = random_sample(sample_size, N)
|
||||
# coherence_models = build_coherence_models(inputs, scratch)
|
||||
# # compute_distances(inputs, distances, scratch)
|
||||
# # np.matmul(ones, distances, denominators)
|
||||
# for i in range(0, sample_size):
|
||||
# expected_outputs[i] = g(inputs[i])
|
||||
# streak = 0
|
||||
# expected_outputs = np.zeros((sample_size,))
|
||||
# for i in range(0, sample_size):
|
||||
# expected_outputs[i] = g(inputs[i])
|
||||
# index = random.randint(0, sample_size - 1)
|
||||
# update_sample(inputs, index)
|
||||
# expected_outputs[index] = g(inputs[index])
|
||||
# update_distances(inputs, distances, index, scratch)
|
||||
# np.matmul(ones, distances, denominators)
|
||||
last_score = top_score
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
591
mutations4.py
Normal file
591
mutations4.py
Normal file
@ -0,0 +1,591 @@
|
||||
import hashlib
|
||||
import math
|
||||
from matplotlib import offsetbox
|
||||
import numpy as np
|
||||
import random
|
||||
from struct import pack, pack_into, unpack_from
|
||||
import secrets
|
||||
|
||||
from numpy import hamming
|
||||
|
||||
N = 32
|
||||
M = 2
|
||||
|
||||
def bit_at_index(buffer, index):
|
||||
offset = (index >> 3) % len(buffer)
|
||||
return buffer[offset] & (1 << (index & 0b111)) != 0
|
||||
|
||||
def count_one_bits(n):
|
||||
return bin(n).count("1")
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def encode_f(f, buffer, offset=0):
|
||||
(inverted, flips, child) = f
|
||||
pack_into('I', buffer, offset, inverted)
|
||||
offset += 4
|
||||
for index in flips:
|
||||
pack_into('I', buffer, offset, 0)
|
||||
offset += 4
|
||||
pack_into('I', buffer, offset, index)
|
||||
offset += 4
|
||||
if child is None:
|
||||
pack_into('I', buffer, offset, 1)
|
||||
offset += 4
|
||||
return offset
|
||||
(inverted, left, right) = child
|
||||
pack_into('I', buffer, offset, 2 if not inverted else 3)
|
||||
offset += 4
|
||||
offset = encode_f(left, buffer, offset)
|
||||
offset = encode_f(right, buffer, offset)
|
||||
return offset
|
||||
|
||||
def generate_random_branch(p_mutation):
|
||||
global N
|
||||
|
||||
p_add_indices = p_mutation * random.random()
|
||||
p_add_children = p_mutation * random.random()
|
||||
|
||||
inverted = random.randint(0, 1)
|
||||
indices = set()
|
||||
children = []
|
||||
|
||||
# randomly add indices
|
||||
while random.random() < p_add_indices and len(indices) < N:
|
||||
available_indices = [i for i in range(0, N) if i not in indices]
|
||||
if len(available_indices) == 1:
|
||||
indices.add(available_indices[0])
|
||||
continue
|
||||
indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
|
||||
# randomly add children
|
||||
while random.random() < p_add_children:
|
||||
child_inverted = random.randint(0, 1)
|
||||
left = generate_random_branch(p_add_children)
|
||||
right = generate_random_branch(p_add_children)
|
||||
children.append((child_inverted, left, right))
|
||||
return (inverted, indices, children)
|
||||
|
||||
def mutate_f(f, p_mutation):
|
||||
global N
|
||||
(inverted, indices, children) = f
|
||||
mutated_indices = set(indices)
|
||||
mutated_children = children[:]
|
||||
|
||||
p_invert = p_mutation * random.random()
|
||||
p_drop_indices = p_mutation * random.random()
|
||||
p_add_indices = p_mutation * random.random()
|
||||
p_drop_children = p_mutation * random.random()
|
||||
p_mutate_child = p_mutation * random.random()
|
||||
p_clone_child = p_mutation * random.random()
|
||||
p_invert_child = p_mutation * random.random()
|
||||
p_add_children = p_mutation * random.random()
|
||||
|
||||
# randomly invert
|
||||
if random.random() < p_invert:
|
||||
inverted ^= 1
|
||||
# randomly drop indices
|
||||
while random.random() < p_drop_indices and len(mutated_indices) > 0:
|
||||
mutated_indices.pop()
|
||||
# randomly add indices
|
||||
while random.random() < p_add_indices and len(mutated_indices) < N:
|
||||
available_indices = [i for i in range(0, N) if i not in mutated_indices]
|
||||
if len(available_indices) == 1:
|
||||
mutated_indices.add(available_indices[0])
|
||||
continue
|
||||
mutated_indices.add(available_indices[random.randint(0, len(available_indices) - 1)])
|
||||
# randomly drop children
|
||||
while random.random() < p_drop_children and len(mutated_children) > 0:
|
||||
if len(mutated_children) == 1:
|
||||
del mutated_children[0]
|
||||
break
|
||||
del mutated_children[random.randint(0, len(mutated_children) - 1)]
|
||||
# randomly clone children
|
||||
while random.random() < p_clone_child and len(mutated_children) > 0:
|
||||
index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
|
||||
(child_inverted, left, right) = mutated_children[index]
|
||||
if random.random() < p_invert_child:
|
||||
child_inverted ^= 1
|
||||
clone = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
|
||||
mutated_children.append(clone)
|
||||
# randomly mutate children
|
||||
while random.random() < p_mutate_child and len(mutated_children) > 0:
|
||||
index = 0 if len(mutated_children) == 1 else random.randint(0, len(mutated_children) - 1)
|
||||
(child_inverted, left, right) = mutated_children[index]
|
||||
if random.random() < p_invert_child:
|
||||
child_inverted ^= 1
|
||||
mutated_children[index] = (child_inverted, mutate_f(left, p_mutation), mutate_f(right, p_mutation))
|
||||
# randomly add children
|
||||
while random.random() < p_add_children:
|
||||
child_inverted = random.randint(0, 1)
|
||||
left = generate_random_branch(p_mutation)
|
||||
right = generate_random_branch(p_mutation)
|
||||
mutated_children.append((child_inverted, left, right))
|
||||
return (inverted, mutated_indices, mutated_children)
|
||||
|
||||
def decode_f(buffer, mutate = False, offset = 0, skip_invert = False):
|
||||
global N
|
||||
inverted = 0
|
||||
if not skip_invert:
|
||||
[inverted] = unpack_from('I', buffer, offset)
|
||||
offset += 4
|
||||
# random invert
|
||||
if mutate and random.random() < 0.01:
|
||||
inverted ^= 1
|
||||
inverted &= 0b1
|
||||
flips = set()
|
||||
# random add flip
|
||||
while mutate and random.random() < 0.5 and len(flips) < N:
|
||||
available_indices = [i for i in range(0, N) if i not in flips]
|
||||
if len(available_indices) == 1:
|
||||
flips.add(available_indices[0])
|
||||
continue
|
||||
flips.add(available_indices[random.randint(0, len(available_indices) - 1)])
|
||||
while offset < len(buffer):
|
||||
# random create branch
|
||||
if mutate and random.random() < 0.01:
|
||||
gate_inverted = random.randint(0, 1)
|
||||
left = generate_random_branch()
|
||||
(offset, right) = decode_f(buffer, mutate, offset, True)
|
||||
return (offset, (inverted, flips, (gate_inverted, left, right)))
|
||||
[opcode] = unpack_from('I', buffer, offset)
|
||||
offset += 4
|
||||
opcode &= 0b11
|
||||
if opcode == 0:
|
||||
[index] = unpack_from('I', buffer, offset)
|
||||
offset += 4
|
||||
# random skip flip
|
||||
if mutate and random.random() < 0.01:
|
||||
continue
|
||||
if index in flips:
|
||||
flips.remove(index)
|
||||
else:
|
||||
flips.add(index)
|
||||
elif opcode == 1:
|
||||
return (offset, (inverted, flips, None))
|
||||
else:
|
||||
(offset, left) = decode_f(buffer, mutate, offset)
|
||||
(offset, right) = decode_f(buffer, mutate, offset)
|
||||
gate_inverted = 0 if opcode == 2 else 1
|
||||
# random invert
|
||||
if mutate and random.random() < 0.01:
|
||||
gate_inverted ^= 1
|
||||
# random skip branch
|
||||
if mutate and random.random() < 0.01:
|
||||
return (offset, (inverted, flips, None))
|
||||
return (offset, (inverted, flips, (gate_inverted, left, right)))
|
||||
return (offset, (inverted, [], None))
|
||||
|
||||
def generate_program(model, output_var='output'):
|
||||
global N, M
|
||||
(constant, indices, child) = model
|
||||
|
||||
statement = 'multiply(' + np.array2string(indices, separator=',') + ', x, temp)\n\t'
|
||||
statement += output_var + '=' + str(constant) + '+sum(temp)\n\t'
|
||||
|
||||
if not child is None:
|
||||
left_output = output_var + '0'
|
||||
right_output = output_var + '1'
|
||||
(left, right) = child
|
||||
statement += generate_program(left, left_output)
|
||||
statement += generate_program(right, right_output)
|
||||
statement += output_var + '+=' + left_output + '*' + right_output + '\n\t'
|
||||
statement += output_var + '%=' + str(M) + '\n\t'
|
||||
return statement
|
||||
|
||||
def compile(model):
|
||||
program = 'def f(x, temp):\n\t' + generate_program(model) + 'return output'
|
||||
scope = {'multiply': np.multiply, 'sum': np.sum}
|
||||
exec(program, scope)
|
||||
return scope['f']
|
||||
|
||||
def evaluate(model, x, value = 0):
|
||||
(inverted, indices, children) = model
|
||||
for i in indices:
|
||||
if bit_at_index(x, i) != 0:
|
||||
value ^= 1
|
||||
for child in children:
|
||||
(child_inverted, left, right) = child
|
||||
left = evaluate(left, x)
|
||||
right = evaluate(right, x)
|
||||
if left & right != child_inverted:
|
||||
value ^= 1
|
||||
if inverted:
|
||||
value ^= 1
|
||||
return value
|
||||
|
||||
def encode(v):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(N / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(x)
|
||||
|
||||
def sha(v):
|
||||
global M
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] % M
|
||||
|
||||
def xor(x):
|
||||
num_one_bits = 0
|
||||
for n in x:
|
||||
num_one_bits += count_one_bits(n)
|
||||
return num_one_bits % 2
|
||||
|
||||
def random_sample(m, n):
|
||||
inputs = np.zeros((m, n))
|
||||
for i in range(0, m):
|
||||
for j in range(0, n):
|
||||
inputs[i][j] = random.randint(0, 1)
|
||||
return inputs
|
||||
|
||||
def update_sample(sample, index):
|
||||
global N
|
||||
for j in range(0, N):
|
||||
sample[index][j] = random.randint(0, 1)
|
||||
|
||||
def coherence(inputs, outputs, scratch):
|
||||
coherences = []
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
y_a = outputs[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = inputs[j]
|
||||
y_b = outputs[j]
|
||||
distance = hamming_distance(x_a, x_b, scratch)
|
||||
weight = 1.0 / (2 ** distance)
|
||||
denominator += weight
|
||||
if y_a == y_b:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def build_coherence_models(inputs, scratch):
|
||||
coherence_models = []
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
distances = [hamming_distance(x_a, inputs[j], scratch) for j in range(0, len(inputs))]
|
||||
indices = sorted(range(len(distances)), key=lambda i: distances[i])
|
||||
lowest = -1
|
||||
denominator = 0
|
||||
components = []
|
||||
for index in range(0, len(indices)):
|
||||
j = indices[index]
|
||||
if distances[j] == 0:
|
||||
continue
|
||||
if lowest < 0:
|
||||
lowest = distances[j]
|
||||
distance = distances[j] - lowest
|
||||
if distance >= 8:
|
||||
break
|
||||
weight = 2 ** -distance
|
||||
denominator += weight
|
||||
components.append((weight, j))
|
||||
coherence_models.append((denominator, components))
|
||||
return coherence_models
|
||||
|
||||
def fast_coherence(coherence_models, outputs):
|
||||
coherences = []
|
||||
for i in range(0, len(coherence_models)):
|
||||
(denominator, components) = coherence_models[i]
|
||||
numerator = 0
|
||||
for component in components:
|
||||
(weight, j) = component
|
||||
if outputs[i] == outputs[j]:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def score(f, sample, distances):
|
||||
return coherence([(x, f(x) ^ y) for (x, y) in sample], distances)
|
||||
|
||||
def compute_distances(inputs, distances, scratch):
|
||||
for i in range(0, len(inputs)):
|
||||
a = inputs[i]
|
||||
for j in range(i, len(inputs)):
|
||||
if i == j:
|
||||
distances[i][j] = 0
|
||||
continue
|
||||
b = inputs[j]
|
||||
distance = 2 ** -hamming_distance(a, b, scratch)
|
||||
distances[i][j] = distance
|
||||
distances[j][i] = distance
|
||||
|
||||
def update_distances(inputs, distances, i, scratch):
|
||||
a = inputs[i]
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
distances[i][j] = 0
|
||||
continue
|
||||
b = inputs[j]
|
||||
distance = 2 ** -hamming_distance(a, b, scratch)
|
||||
distances[i][j] = distance
|
||||
distances[j][i] = distance
|
||||
|
||||
def clone_model(model, p_mutation):
|
||||
global N, M
|
||||
|
||||
clone = model[:]
|
||||
p_insert_node = p_mutation * random.random()
|
||||
|
||||
i = 0
|
||||
while i < len(clone):
|
||||
(bias, op, indices, (p_modify, p_bias, p_index)) = clone[i]
|
||||
p_modify_node = p_modify
|
||||
|
||||
if random.random() < p_modify_node:
|
||||
p_modify += 0.01
|
||||
p_add_index = p_index
|
||||
p_modify_bias = p_bias
|
||||
indices = indices.copy()
|
||||
if random.random() < p_modify_bias:
|
||||
p_bias += 0.01
|
||||
bias += random.randint(0, M - 1)
|
||||
bias %= M
|
||||
else:
|
||||
p_bias -= 0.01
|
||||
for index in range(0, N + i):
|
||||
if random.random() < p_add_index:
|
||||
p_index += 0.01
|
||||
if index in indices:
|
||||
indices.remove(index)
|
||||
else:
|
||||
indices.add(index)
|
||||
else:
|
||||
p_index -= 0.01
|
||||
else:
|
||||
p_modify -= 0.01
|
||||
|
||||
p_modify = min(max(0.01, p_modify), 0.99)
|
||||
p_bias = min(max(0.01, p_bias), 0.99)
|
||||
p_index = min(max(0.01, p_index), 0.99)
|
||||
clone[i] = (bias, op, indices, (p_modify, p_bias, p_index))
|
||||
i += 1
|
||||
|
||||
if random.random() < p_insert_node:
|
||||
i = random.randint(0, len(clone))
|
||||
clone.insert(i, random_node(N + i - 1, p_mutation))
|
||||
for j in range(i + 1, len(clone)):
|
||||
(bias, op, indices, p) = clone[j]
|
||||
modified_indices = set()
|
||||
for index in indices:
|
||||
if index < N:
|
||||
modified_indices.add(index)
|
||||
continue
|
||||
shifted_index = index - N
|
||||
if shifted_index == i:
|
||||
if random.randint(0, 1) == 0:
|
||||
modified_indices.add(index)
|
||||
else:
|
||||
modified_indices.add(index + 1)
|
||||
if shifted_index > i:
|
||||
modified_indices.add(index + 1)
|
||||
else:
|
||||
modified_indices.add(index)
|
||||
clone[j] = (bias, op, modified_indices, p)
|
||||
return clone
|
||||
|
||||
def random_node(max_index, p_mutation):
|
||||
global N
|
||||
bias = random.randint(0, M - 1)
|
||||
op = random.randint(0, 1)
|
||||
p_modify = random.random()
|
||||
p_bias = random.random()
|
||||
p_index = random.random()
|
||||
indices = set()
|
||||
indices.add(random.randint(0, max_index))
|
||||
|
||||
p_add_index = p_mutation * random.random()
|
||||
for index in range(0, max_index):
|
||||
if random.random() < p_add_index:
|
||||
indices.add(index)
|
||||
return (bias, op, indices, (p_modify, p_bias, p_index))
|
||||
|
||||
def null_candidate():
|
||||
global N
|
||||
return []
|
||||
|
||||
def encode_tree(tree_model):
|
||||
stack = [tree_model]
|
||||
node_indices = {}
|
||||
index = 0
|
||||
while len(stack) > 0:
|
||||
node = stack.pop()
|
||||
node_indices[node] = index
|
||||
index += 1
|
||||
(p, bias, value) = node
|
||||
if isinstance(value, int):
|
||||
continue
|
||||
(left, right) = value
|
||||
stack.append(left)
|
||||
stack.append(right)
|
||||
length = index
|
||||
|
||||
stack = [tree_model]
|
||||
serialized_model = []
|
||||
while len(stack) > 0:
|
||||
node = stack.pop()
|
||||
(p, bias, value) = node
|
||||
serialized_model.insert(0, )
|
||||
|
||||
def eval_model(model, buffer, x):
|
||||
global N, M
|
||||
for i in range(0, len(model)):
|
||||
(bias, op, indices, _) = model[i]
|
||||
value = op
|
||||
for index in indices:
|
||||
if index >= N + i:
|
||||
print('This should not happen')
|
||||
if op == 1:
|
||||
value *= x[index] if index < N else buffer[index - N]
|
||||
value %= M
|
||||
else:
|
||||
value += x[index] if index < N else buffer[index - N]
|
||||
value %= M
|
||||
value += bias
|
||||
value %= M
|
||||
if i == len(model) - 1:
|
||||
return value
|
||||
else:
|
||||
buffer[i] = value
|
||||
return 0
|
||||
|
||||
def size(model):
|
||||
return len(model)
|
||||
|
||||
def main():
|
||||
global N, M
|
||||
epochs = 10000
|
||||
num_survivors = 100
|
||||
num_offspring = 10
|
||||
num_candidates = num_survivors + num_survivors * num_offspring
|
||||
sample_size = 64
|
||||
eval_size = 100
|
||||
max_nodes = 65536
|
||||
p_mutation = 0.5
|
||||
g = sha
|
||||
current_generation = [null_candidate() for _ in range(0, num_candidates)]
|
||||
|
||||
distances = np.zeros((sample_size, sample_size))
|
||||
output_equality = np.zeros((sample_size, sample_size))
|
||||
inputs = random_sample(sample_size, N)
|
||||
scratch = np.zeros(N,)
|
||||
# compute_distances(inputs, distances, scratch)
|
||||
expected_outputs = np.zeros((sample_size,))
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
outputs = np.zeros((sample_size,))
|
||||
output_xor = np.zeros((sample_size,))
|
||||
ones = np.ones((sample_size,))
|
||||
numerators = np.zeros((sample_size,))
|
||||
denominators = np.zeros((sample_size,))
|
||||
coherences = np.zeros((sample_size,))
|
||||
np.matmul(ones, distances, denominators)
|
||||
scores = np.zeros((num_candidates,))
|
||||
eval_buffer = np.zeros((max_nodes,))
|
||||
max_score = 0
|
||||
last_score = 0
|
||||
streak = 0
|
||||
|
||||
coherence_models = build_coherence_models(inputs, scratch)
|
||||
|
||||
for epoch in range(0, epochs):
|
||||
for i in range(0, num_candidates):
|
||||
candidate = current_generation[i]
|
||||
for j in range(0, sample_size):
|
||||
outputs[j] = eval_model(candidate, eval_buffer, inputs[j])
|
||||
np.subtract(outputs, expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
# for p in range(0, sample_size):
|
||||
# for q in range(0, sample_size):
|
||||
# m = int(output_xor[p])
|
||||
# n = int(output_xor[q])
|
||||
# distance = abs(m - n)
|
||||
# if distance > M / 2:
|
||||
# distance = M - distance
|
||||
# distance /= (M / 2)
|
||||
# distance **= 2
|
||||
# output_equality[p][q] = distance
|
||||
# # output_equality[p][q] = 1 if m == n else 0
|
||||
# np.multiply(output_equality, distances, output_equality)
|
||||
# np.matmul(ones, output_equality, numerators)
|
||||
# np.divide(numerators, denominators, coherences)
|
||||
# score = np.average(coherences)
|
||||
score = fast_coherence(coherence_models, output_xor)
|
||||
# if random.random() < 0.1:
|
||||
# check = coherence(inputs, output_xor, scratch)
|
||||
# if check - score > 1e-3:
|
||||
# print('not equal')
|
||||
scores[i] = score
|
||||
|
||||
top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
|
||||
survivors = [current_generation[index] for index in top_n]
|
||||
|
||||
# f = lambda x: evaluate(current_generation[0], x)
|
||||
# correct = 0
|
||||
# for i in range(0, eval_size):
|
||||
# x = random_input()
|
||||
# if f(x) == g(x):
|
||||
# correct += 1
|
||||
|
||||
top_score = scores[top_n[-1]]
|
||||
print(epoch, top_score, size(survivors[-1]))
|
||||
if top_score <= max_score:
|
||||
p_mutation += 0.01
|
||||
else:
|
||||
p_mutation = 0.5
|
||||
max_score = top_score
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
current_generation[i] = survivors[i]
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
candidate = survivors[i]
|
||||
for j in range(0, num_offspring):
|
||||
index = num_survivors + j * num_survivors + i
|
||||
current_generation[index] = clone_model(candidate, random.random() * 0.1)
|
||||
|
||||
inputs = random_sample(sample_size, N)
|
||||
coherence_models = build_coherence_models(inputs, scratch)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
|
||||
# while random.random() < 0.5:
|
||||
# if last_score == top_score:
|
||||
# streak += 1
|
||||
# else:
|
||||
# streak = 0
|
||||
# if streak >= 4:
|
||||
# inputs = random_sample(sample_size, N)
|
||||
# coherence_models = build_coherence_models(inputs, scratch)
|
||||
# # compute_distances(inputs, distances, scratch)
|
||||
# # np.matmul(ones, distances, denominators)
|
||||
# for i in range(0, sample_size):
|
||||
# expected_outputs[i] = g(inputs[i])
|
||||
# streak = 0
|
||||
# expected_outputs = np.zeros((sample_size,))
|
||||
# for i in range(0, sample_size):
|
||||
# expected_outputs[i] = g(inputs[i])
|
||||
# index = random.randint(0, sample_size - 1)
|
||||
# update_sample(inputs, index)
|
||||
# expected_outputs[index] = g(inputs[index])
|
||||
# update_distances(inputs, distances, index, scratch)
|
||||
# np.matmul(ones, distances, denominators)
|
||||
last_score = top_score
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
417
mutations5.py
Normal file
417
mutations5.py
Normal file
@ -0,0 +1,417 @@
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
N = 8
|
||||
M = 2
|
||||
|
||||
class Candidate:
|
||||
def __init__(self):
|
||||
global N
|
||||
self.bias = 0
|
||||
self.offsets = np.zeros((N,)).astype(np.int32)
|
||||
self.has_child = 0
|
||||
self.left = None
|
||||
self.right = None
|
||||
|
||||
def addOffset(self, x):
|
||||
self.offsets[x] = 1
|
||||
return self
|
||||
|
||||
def setChild(self, left, right):
|
||||
self.has_child = 1
|
||||
self.left = left
|
||||
self.right = right
|
||||
return self
|
||||
|
||||
class Probabilities:
|
||||
def __init__(self):
|
||||
global N, M
|
||||
self.p_bias = np.zeros(2,)
|
||||
self.p_bias.fill(0.5)
|
||||
self.p_offsets = np.zeros((2,N))
|
||||
self.p_offsets.fill(0.5)
|
||||
self.p_has_child = 0
|
||||
|
||||
self.bias_coherences = np.zeros((2, M,))
|
||||
self.bias_coherences.fill(0.5)
|
||||
self.offset_coherences = np.zeros((2, M, N))
|
||||
self.offset_coherences.fill(0.5)
|
||||
self.has_child_coherences = np.zeros((2,))
|
||||
self.has_child_coherences.fill(0.5)
|
||||
|
||||
self.uncertainty = np.zeros((2,))
|
||||
self.totals = np.zeros((2,))
|
||||
|
||||
self.left = None
|
||||
self.right = None
|
||||
self.parent = None
|
||||
self.depth = 1
|
||||
|
||||
def reset_uncertainty(self):
|
||||
if self.totals[0] == 0 and self.totals[1] == 0:
|
||||
return
|
||||
self.uncertainty.fill(0)
|
||||
self.totals.fill(0)
|
||||
if not self.left is None:
|
||||
self.left.reset_uncertainty()
|
||||
if not self.right is None:
|
||||
self.right.reset_uncertainty()
|
||||
|
||||
def min_p_has_child(self):
|
||||
without_child = self.uncertainty[0] / self.totals[0] if self.totals[0] > 0 else 0
|
||||
with_child = self.uncertainty[1] / self.totals[1] if self.totals[1] > 0 else 0
|
||||
|
||||
if without_child == 0 and with_child == 0:
|
||||
return 0.5
|
||||
return without_child / (without_child + with_child)
|
||||
|
||||
def confidence(self):
|
||||
global N
|
||||
total = (2 * self.p_bias[0] - 1) ** 2
|
||||
for i in range(0, N):
|
||||
total += (2 * self.p_offsets[0][i] - 1) ** 2
|
||||
return total / (N + 1)
|
||||
|
||||
def clamp(x, min_value = 0.01, max_value = 1):
|
||||
return min(max(x, min_value), max_value)
|
||||
|
||||
def encode(v):
|
||||
global N
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(N / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
def sha(v):
|
||||
global M
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] % M
|
||||
|
||||
def xor(x):
|
||||
num_one_bits = 0
|
||||
for i in range(0, len(x)):
|
||||
if i == 0:
|
||||
continue
|
||||
num_one_bits += x[i]
|
||||
return num_one_bits % 2
|
||||
|
||||
test_candidate = Candidate().addOffset(0).addOffset(1).setChild(
|
||||
Candidate().addOffset(2), Candidate().addOffset(3).setChild(
|
||||
Candidate().addOffset(4), Candidate().addOffset(5)
|
||||
))
|
||||
|
||||
def eval_test_candidate(x):
|
||||
global test_candidate
|
||||
return evaluate_candidate(test_candidate, x)
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def coherence(inputs, outputs, scratch):
|
||||
coherences = []
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
y_a = outputs[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = inputs[j]
|
||||
y_b = outputs[j]
|
||||
distance = hamming_distance(x_a, x_b, scratch)
|
||||
weight = 1.0 / (2 ** distance)
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def random_sample(m, n):
|
||||
inputs = np.zeros((m, n))
|
||||
for i in range(0, m):
|
||||
for j in range(0, n):
|
||||
inputs[i][j] = random.randint(0, 1)
|
||||
return inputs
|
||||
|
||||
def evaluate_candidate(candidate, x):
|
||||
global N, M
|
||||
value = candidate.bias
|
||||
for i in range(0, N):
|
||||
value += x[i] * candidate.offsets[i]
|
||||
value %= M
|
||||
if candidate.has_child == 0:
|
||||
return value
|
||||
left = evaluate_candidate(candidate.left, x)
|
||||
right = evaluate_candidate(candidate.right, x)
|
||||
value += left * right
|
||||
value %= M
|
||||
return value
|
||||
|
||||
def evaluate(probabilities, candidate, x, z, update_uncertainty = True):
|
||||
global N, M
|
||||
value = candidate.bias
|
||||
for i in range(0, N):
|
||||
value += x[i] * candidate.offsets[i]
|
||||
value %= M
|
||||
if candidate.has_child == 0:
|
||||
if update_uncertainty:
|
||||
if value != z:
|
||||
probabilities.uncertainty[0] += 1
|
||||
probabilities.totals[0] += 1
|
||||
return value
|
||||
e = (value - z) % M
|
||||
left = evaluate(probabilities.left, candidate.left, x, e, False)
|
||||
right = evaluate(probabilities.right, candidate.right, x, e, False)
|
||||
if update_uncertainty:
|
||||
if e == 0:
|
||||
if left == 1 and right == 1:
|
||||
evaluate(probabilities.left, candidate.left, x, e)
|
||||
evaluate(probabilities.right, candidate.right, x, e)
|
||||
if left == 0:
|
||||
evaluate(probabilities.left, candidate.left, x, e)
|
||||
if right == 0:
|
||||
evaluate(probabilities.right, candidate.right, x, e)
|
||||
elif e == 1:
|
||||
if left == 1 and right == 1:
|
||||
evaluate(probabilities.left, candidate.left, x, e)
|
||||
evaluate(probabilities.right, candidate.right, x, e)
|
||||
if left == 0:
|
||||
evaluate(probabilities.left, candidate.left, x, e)
|
||||
if right == 0:
|
||||
evaluate(probabilities.right, candidate.right, x, e)
|
||||
value += left * right
|
||||
value %= M
|
||||
if update_uncertainty:
|
||||
if value != z:
|
||||
probabilities.uncertainty[1] += 1
|
||||
probabilities.totals[1] += 1
|
||||
return value
|
||||
|
||||
def update_probabilities(probabilities, candidates, scores, depth = 1):
|
||||
global N, M
|
||||
num_candidates = len(candidates)
|
||||
min_p_has_child = probabilities.min_p_has_child()
|
||||
|
||||
for z in range(0, 2):
|
||||
for i in range(0, M):
|
||||
bias_i_max = 0
|
||||
for k in range(0, num_candidates):
|
||||
candidate = candidates[k]
|
||||
if candidate is None:
|
||||
continue
|
||||
if candidate.bias != i:
|
||||
continue
|
||||
if candidate.has_child != z:
|
||||
continue
|
||||
bias_i_max = max(bias_i_max, scores[k])
|
||||
if bias_i_max == 0:
|
||||
continue
|
||||
probabilities.bias_coherences[z][i] = 0.9 * probabilities.bias_coherences[z][i] + 0.1 * bias_i_max
|
||||
|
||||
for z in range(0, 2):
|
||||
for i in range(0, M):
|
||||
for j in range(0, N):
|
||||
offset_ij_max = 0
|
||||
for k in range(0, num_candidates):
|
||||
candidate = candidates[k]
|
||||
if candidate is None:
|
||||
continue
|
||||
if candidate.offsets[j] != i:
|
||||
continue
|
||||
if candidate.has_child != z:
|
||||
continue
|
||||
offset_ij_max = max(offset_ij_max, scores[k])
|
||||
if offset_ij_max == 0:
|
||||
continue
|
||||
probabilities.offset_coherences[z][i][j] = 0.9 * probabilities.offset_coherences[z][i][j] + 0.1 * offset_ij_max
|
||||
|
||||
for i in range(0, 2):
|
||||
has_child_i_max = 0
|
||||
for k in range(0, num_candidates):
|
||||
candidate = candidates[k]
|
||||
if candidate is None:
|
||||
continue
|
||||
if candidate.has_child != i:
|
||||
continue
|
||||
has_child_i_max = max(has_child_i_max, scores[k])
|
||||
if has_child_i_max == 0:
|
||||
continue
|
||||
probabilities.has_child_coherences[i] = 0.9 * probabilities.has_child_coherences[i] + 0.1 * has_child_i_max
|
||||
|
||||
|
||||
for z in range(0, 2):
|
||||
# direction = 1 if z == 0 and probabilities.has_child_coherences[0] > probabilities.has_child_coherences[1] or z == 1 and probabilities.has_child_coherences[1] > probabilities.has_child_coherences[0] else -1
|
||||
direction = 1
|
||||
p_bias_next = clamp(probabilities.p_bias[z] + direction * (probabilities.bias_coherences[z][1] - probabilities.bias_coherences[z][0]), 0, 1)
|
||||
# if z == 0 and probabilities.has_child_coherences[0] < probabilities.has_child_coherences[1] or z == 1 and probabilities.has_child_coherences[0] > probabilities.has_child_coherences[1]:
|
||||
# p_bias_next = 0.5
|
||||
probabilities.p_bias[z] = 0.9 * probabilities.p_bias[z] + 0.1 * p_bias_next
|
||||
for j in range(0, N):
|
||||
p_offset_next = clamp(probabilities.p_offsets[z][j] + direction * (probabilities.offset_coherences[z][1][j] - probabilities.offset_coherences[z][0][j]), 0, 1)
|
||||
# if z == 0 and probabilities.has_child_coherences[0] < probabilities.has_child_coherences[1] or z == 1 and probabilities.has_child_coherences[0] > probabilities.has_child_coherences[1]:
|
||||
# p_offset_next = 0.5
|
||||
probabilities.p_offsets[z][j] = 0.9 * probabilities.p_offsets[z][j] + 0.1 * p_offset_next
|
||||
|
||||
# direction = 1 if probabilities.parent is None or probabilities.parent.has_child_coherences[1] > probabilities.parent.has_child_coherences[0] else -1
|
||||
direction = 1
|
||||
# p_has_child_next = clamp(probabilities.p_has_child + direction * (probabilities.has_child_coherences[1] - probabilities.has_child_coherences[0]), probabilities.min_p_has_child(), 1)
|
||||
# probabilities.p_has_child = 0.9 * probabilities.p_has_child + 0.1 *
|
||||
if probabilities.confidence() > 0.9 and probabilities.p_has_child == 0:
|
||||
probabilities.p_bias[0] = round(probabilities.p_bias[0])
|
||||
for i in range(0, N):
|
||||
probabilities.p_offsets[0][i] = round(probabilities.p_offsets[0][i])
|
||||
probabilities.p_has_child = 1
|
||||
|
||||
# if probabilities.has_child_coherences[0] > probabilities.has_child_coherences[1]:
|
||||
# return
|
||||
|
||||
p_left = probabilities.left
|
||||
p_right = probabilities.right
|
||||
if not p_left is None:
|
||||
left = [candidate.left if not candidate is None and candidate.has_child else None for candidate in candidates]
|
||||
if any(x is not None for x in left):
|
||||
update_probabilities(p_left, left, scores, depth + 1)
|
||||
if not p_right is None:
|
||||
right = [candidate.right if not candidate is None and candidate.has_child else None for candidate in candidates]
|
||||
if any(x is not None for x in right):
|
||||
update_probabilities(p_right, right, scores, depth + 1)
|
||||
|
||||
|
||||
def create_candidate(probabilities, candidate):
|
||||
global N
|
||||
new_children = 0
|
||||
z = 1 if random.random() < probabilities.p_has_child and probabilities.depth <= 4 else 0
|
||||
candidate.bias = 1 if random.random() < probabilities.p_bias[0] else 0
|
||||
for i in range(0, N):
|
||||
candidate.offsets[i] = 1 if random.random() < probabilities.p_offsets[0][i] else 0
|
||||
if not z:
|
||||
candidate.has_child = 0
|
||||
return new_children
|
||||
if probabilities.p_has_child < 1:
|
||||
new_children += 1
|
||||
candidate.has_child = 1
|
||||
if candidate.left is None:
|
||||
candidate.left = Candidate()
|
||||
if candidate.right is None:
|
||||
candidate.right = Candidate()
|
||||
depth = probabilities.depth + 1
|
||||
if probabilities.left is None:
|
||||
probabilities.left = Probabilities()
|
||||
probabilities.left.parent = probabilities
|
||||
probabilities.left.depth = depth
|
||||
# probabilities.left.p_has_child = 2 ** -depth
|
||||
if probabilities.right is None:
|
||||
probabilities.right = Probabilities()
|
||||
probabilities.right.parent = probabilities
|
||||
probabilities.right.depth = depth
|
||||
# probabilities.right.p_has_child = 2 ** -depth
|
||||
new_children += create_candidate(probabilities.left, candidate.left)
|
||||
new_children += create_candidate(probabilities.right, candidate.right)
|
||||
return new_children
|
||||
|
||||
def copy_candidate(src, dest):
|
||||
global N
|
||||
dest.bias = src.bias
|
||||
for i in range(0, N):
|
||||
dest.offsets[i] = src.offsets[i]
|
||||
has_child = src.has_child
|
||||
dest.has_child = has_child
|
||||
if not has_child:
|
||||
return
|
||||
if dest.left is None:
|
||||
dest.left = Candidate()
|
||||
if dest.right is None:
|
||||
dest.right = Candidate()
|
||||
copy_candidate(src.left, dest.left)
|
||||
copy_candidate(src.right, dest.right)
|
||||
|
||||
def p(x):
|
||||
return math.ceil(x * 100) / 100
|
||||
|
||||
def p_a(x):
|
||||
return [p(z) for z in x]
|
||||
|
||||
def print_probabilities(probabilities, depth=0):
|
||||
global M
|
||||
if depth == 0:
|
||||
print('=====================')
|
||||
left = probabilities.left
|
||||
right = probabilities.right
|
||||
if left is None:
|
||||
print('None')
|
||||
else:
|
||||
print_probabilities(left, depth + 1)
|
||||
if right is None:
|
||||
print('None')
|
||||
else:
|
||||
print_probabilities(right, depth + 1)
|
||||
for z in range(0, 2):
|
||||
# for i in range(0, M):
|
||||
# print(z, i, p(probabilities.bias_coherences[z][i]), p_a(probabilities.offset_coherences[z][i]), p(probabilities.has_child_coherences[i]))
|
||||
print(depth, z, p(probabilities.p_bias[z]), p_a(probabilities.p_offsets[z]), p(probabilities.p_has_child), p(probabilities.confidence()))
|
||||
if depth == 0:
|
||||
print('=====================')
|
||||
|
||||
def main():
|
||||
global N, M
|
||||
sample_size = 64
|
||||
num_candidates = 100
|
||||
num_survivors = 10
|
||||
epochs = 1000
|
||||
output_xor = np.zeros(sample_size,)
|
||||
scratch = np.zeros(N,)
|
||||
g = eval_test_candidate
|
||||
expected_outputs = np.zeros((sample_size,))
|
||||
inputs = random_sample(sample_size, N)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
outputs = np.zeros((sample_size,))
|
||||
probabilities = Probabilities()
|
||||
candidates = [Candidate() for _ in range(0, num_candidates + num_survivors)]
|
||||
scores = np.zeros((num_candidates + num_survivors,))
|
||||
|
||||
while True:
|
||||
max_new_children = 0
|
||||
min_new_children = 1e6
|
||||
probabilities.reset_uncertainty()
|
||||
for i in range(0, len(candidates)):
|
||||
candidate = candidates[i]
|
||||
if i < num_candidates:
|
||||
create_candidate(probabilities, candidate)
|
||||
for j in range(0, sample_size):
|
||||
outputs[j] = evaluate(probabilities, candidate, inputs[j], expected_outputs[j])
|
||||
np.subtract(outputs, expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
scores[i] = coherence(inputs, output_xor, scratch)
|
||||
update_probabilities(probabilities, candidates, scores)
|
||||
print_probabilities(probabilities)
|
||||
print(np.max(scores))
|
||||
|
||||
top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
|
||||
for i in range(0, num_survivors):
|
||||
src_index = top_n[i]
|
||||
dest_index = num_candidates + i
|
||||
if src_index == dest_index:
|
||||
continue
|
||||
copy_candidate(candidates[src_index], candidates[dest_index])
|
||||
|
||||
inputs = random_sample(sample_size, N)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
488
mutations6.py
Normal file
488
mutations6.py
Normal file
@ -0,0 +1,488 @@
|
||||
from enum import unique
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
import time
|
||||
|
||||
N = 8
|
||||
M = 2
|
||||
|
||||
def timeit(f):
|
||||
def timed(*args, **kw):
|
||||
ts = time.time()
|
||||
result = f(*args, **kw)
|
||||
te = time.time()
|
||||
|
||||
print('func:%r took: %2.4f sec' % (f.__name__, te-ts))
|
||||
return result
|
||||
return timed
|
||||
|
||||
def vec_to_int(bias, x):
|
||||
global N
|
||||
z = bias
|
||||
for i in range(0, N):
|
||||
z <<= 1
|
||||
z |= x[i]
|
||||
return z
|
||||
|
||||
class Candidate:
|
||||
def __init__(self, layer):
|
||||
global N
|
||||
self.layer = layer
|
||||
self.node_count = 2 ** layer
|
||||
self.bias = np.zeros((self.node_count,)).astype(np.int32)
|
||||
self.offsets = np.zeros((self.node_count, N)).astype(np.int32)
|
||||
|
||||
def normalize(self):
|
||||
global N
|
||||
if self.node_count < 2:
|
||||
return
|
||||
# pairs of two must be in order
|
||||
for i in range(0, self.node_count, 2):
|
||||
left_id = vec_to_int(self.bias[i], self.offsets[i])
|
||||
right_id = vec_to_int(self.bias[i + 1], self.offsets[i + 1])
|
||||
if left_id > right_id:
|
||||
temp = self.bias[i]
|
||||
self.bias[i] = self.bias[i + 1]
|
||||
self.bias[i + 1] = temp
|
||||
for j in range(0, N):
|
||||
temp = self.offsets[i][j]
|
||||
self.offsets[i][j] = self.offsets[i + 1][j]
|
||||
self.offsets[i + 1][j] = temp
|
||||
|
||||
class Probabilities:
|
||||
def __init__(self, layer):
|
||||
global N
|
||||
self.layer = layer
|
||||
self.node_count = 2 ** layer
|
||||
self.p_bias = np.zeros((self.node_count,))
|
||||
self.p_bias.fill(0.5)
|
||||
self.p_offsets = np.zeros((self.node_count, N))
|
||||
self.p_offsets.fill(0.5)
|
||||
|
||||
self.bias_coherences = np.zeros((2, self.node_count,))
|
||||
self.bias_coherences.fill(0.5)
|
||||
self.offset_coherences = np.zeros((2, self.node_count, N))
|
||||
self.offset_coherences.fill(0.5)
|
||||
|
||||
def inertia(self):
|
||||
global N
|
||||
total = 0
|
||||
for i in range(0, self.node_count):
|
||||
if self.p_bias[i] > 1e-2 and self.p_bias[i] < (1 - 1e-2):
|
||||
total += abs(self.bias_coherences[1][i] - self.bias_coherences[0][i])
|
||||
for j in range(0, N):
|
||||
if self.p_offsets[i][j] > 1e-2 and self.p_offsets[i][j] < (1 - 1e-2):
|
||||
total += abs(self.offset_coherences[1][i][j] - self.offset_coherences[0][i][j])
|
||||
return total
|
||||
|
||||
def has_converged(self):
|
||||
global N
|
||||
for i in range(0, self.node_count):
|
||||
for j in range(0, N):
|
||||
if self.p_offsets[i][j] > 1e-2 and self.p_offsets[i][j] < 1 - 1e-2:
|
||||
return False
|
||||
return True
|
||||
|
||||
def confidence(self):
|
||||
global N
|
||||
total = 0
|
||||
for i in range(0, self.node_count):
|
||||
total += (2 * self.p_bias[i] - 1) ** 2
|
||||
for j in range(0, N):
|
||||
total += (2 * self.p_offsets[i][j] - 1) ** 2
|
||||
return total / ((N + 1) * self.node_count)
|
||||
|
||||
def flatten(self):
|
||||
candidate = Candidate(self.layer)
|
||||
for i in range(0, self.node_count):
|
||||
force_zero = True
|
||||
if self.node_count > 1:
|
||||
k = i ^ 0b1
|
||||
if self.p_bias[k] > 1e-2:
|
||||
force_zero = False
|
||||
if force_zero:
|
||||
for j in range(0, N):
|
||||
if self.p_offsets[k][j] > 1e-2:
|
||||
force_zero = False
|
||||
break
|
||||
else:
|
||||
force_zero = False
|
||||
|
||||
candidate.bias[i] = 1 if not force_zero and self.p_bias[i] >= (1 - 1e-2) else 0
|
||||
for j in range(0, N):
|
||||
candidate.offsets[i][j] = 1 if not force_zero and self.p_offsets[i][j] >= (1 - 1e-2) else 0
|
||||
return candidate
|
||||
|
||||
def clamp(x, min_value = 0.01, max_value = 1):
|
||||
return min(max(x, min_value), max_value)
|
||||
|
||||
def encode(v):
|
||||
global N
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(N / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
# 00100111 x4
|
||||
# 00000110 x1
|
||||
def sha(v):
|
||||
global M
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] % M
|
||||
|
||||
def xor(x):
|
||||
num_one_bits = 0
|
||||
for i in range(0, len(x)):
|
||||
if i == 0:
|
||||
continue
|
||||
num_one_bits += x[i]
|
||||
return num_one_bits % 2
|
||||
|
||||
def test_fn(x):
|
||||
# 0 1
|
||||
# 2 | 3
|
||||
# 4 | 5 | 6 | 7
|
||||
# | | 0 | 7 | | | |
|
||||
return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7])))
|
||||
|
||||
def candidate_fn(x):
|
||||
return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2])
|
||||
|
||||
def true_fn(x):
|
||||
return x[0] ^ x[1] ^ (x[3] * x[2])
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def coherence(outputs, distances):
|
||||
coherences = []
|
||||
for i in range(0, len(outputs)):
|
||||
y_a = outputs[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(outputs)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = outputs[j]
|
||||
weight = distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def random_sample(m, n):
|
||||
inputs = np.zeros((m, n)).astype(np.int32)
|
||||
for i in range(0, m):
|
||||
for j in range(0, n):
|
||||
inputs[i][j] = random.randint(0, 1)
|
||||
return inputs
|
||||
|
||||
def populate_distances(inputs, distances, scratch):
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, scratch)
|
||||
distances[i][j] = 1.0 / (2 ** distance)
|
||||
|
||||
def populate_layers_scratch(layers, x, layers_scratch, compute_scratch):
|
||||
layers_scratch[0].fill(0)
|
||||
for i in range(1, len(layers_scratch)):
|
||||
scratch = layers_scratch[i]
|
||||
layer = layers[i - 1]
|
||||
for j in range(0, layer.node_count):
|
||||
value = layer.bias[j]
|
||||
np.multiply(layer.offsets[j], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
left = layers_scratch[i - 1][j * 2]
|
||||
right = layers_scratch[i - 1][j * 2 + 1]
|
||||
value ^= left * right
|
||||
scratch[j] = value
|
||||
return layers_scratch[-1][0]
|
||||
|
||||
def evaluate_cached(candidate, x, layers_scratch, layers_scratch_base, compute_scratch):
|
||||
global N
|
||||
maybe_evaluate = set()
|
||||
for j in range(0, candidate.node_count, 2):
|
||||
value = candidate.bias[j]
|
||||
np.multiply(candidate.offsets[j], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
layers_scratch[0][j] = value
|
||||
if candidate.node_count > 1:
|
||||
value = candidate.bias[j + 1]
|
||||
np.multiply(candidate.offsets[j + 1], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
layers_scratch[0][j + 1] = value
|
||||
if layers_scratch[0][j] == 1 and layers_scratch[0][j + 1] == 1:
|
||||
maybe_evaluate.add(int(j / 2))
|
||||
|
||||
for i in range(1, len(layers_scratch)):
|
||||
np.copyto(layers_scratch[i], layers_scratch_base[i])
|
||||
maybe_evaluate_next = set()
|
||||
for j in maybe_evaluate:
|
||||
left = layers_scratch[i - 1][j * 2]
|
||||
right = layers_scratch[i - 1][j * 2 + 1]
|
||||
child_value = left * right
|
||||
left_base = layers_scratch_base[i - 1][j * 2]
|
||||
right_base = layers_scratch_base[i - 1][j * 2 + 1]
|
||||
child_base_value = left_base * right_base
|
||||
if child_value != child_base_value:
|
||||
layers_scratch[i][j] ^= 1
|
||||
maybe_evaluate_next.add(int(j / 2))
|
||||
maybe_evaluate = maybe_evaluate_next
|
||||
return layers_scratch[-1][0]
|
||||
|
||||
def evaluate(layers, candidate, x, layers_scratch, compute_scratch):
|
||||
global N
|
||||
for i in range(0, len(layers_scratch)):
|
||||
scratch = layers_scratch[i]
|
||||
if i == 0:
|
||||
for j in range(0, candidate.node_count):
|
||||
value = candidate.bias[j]
|
||||
np.multiply(candidate.offsets[j], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
scratch[j] = value
|
||||
else:
|
||||
layer = layers[i - 1]
|
||||
for j in range(0, layer.node_count):
|
||||
value = layer.bias[j]
|
||||
np.multiply(layer.offsets[j], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
left = layers_scratch[i - 1][j * 2]
|
||||
right = layers_scratch[i - 1][j * 2 + 1]
|
||||
value ^= left * right
|
||||
scratch[j] = value
|
||||
return layers_scratch[-1][0]
|
||||
|
||||
@timeit
|
||||
def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch):
|
||||
global M, N
|
||||
scores.fill(0)
|
||||
unique_candidates = {}
|
||||
for j in range(0, num_candidates):
|
||||
create_candidate(probabilities, candidates[j])
|
||||
unique_candidates[candidate_str(candidates[j])] = j
|
||||
|
||||
for i in range(0, sample_size):
|
||||
populate_layers_scratch(layers, inputs[i], layers_scratch_base, int_scratch)
|
||||
for _, j in unique_candidates.items():
|
||||
candidate = candidates[j]
|
||||
outputs[j][i] = evaluate_cached(candidate, inputs[i], layers_scratch, layers_scratch_base, int_scratch)
|
||||
# if outputs[j][i] != evaluate(layers, candidate, inputs[i], layers_scratch, int_scratch):
|
||||
# print('Uh-oh')
|
||||
for _, j in unique_candidates.items():
|
||||
candidate = candidates[j]
|
||||
np.subtract(outputs[j], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
scores[j] = coherence(output_xor, distances)
|
||||
|
||||
@timeit
|
||||
def update_probabilities(probabilities, candidates, scores):
|
||||
global N
|
||||
num_candidates = len(candidates)
|
||||
|
||||
for i in range(0, 2):
|
||||
for j in range(0, probabilities.node_count):
|
||||
bias_max = 0
|
||||
bias_sum = 0
|
||||
bias_count = 0
|
||||
for p in range(0, num_candidates):
|
||||
candidate = candidates[p]
|
||||
if candidate.bias[j] != i:
|
||||
continue
|
||||
if scores[p] == 0:
|
||||
continue
|
||||
bias_max = max(bias_max, scores[p])
|
||||
bias_sum += scores[p]
|
||||
bias_count += 1
|
||||
if bias_max == 0:
|
||||
continue
|
||||
# weight = bias_count / num_candidates
|
||||
weight = 0.1
|
||||
bias_avg = bias_sum / bias_count
|
||||
probabilities.bias_coherences[i][j] = (1.0 - weight) * probabilities.bias_coherences[i][j] + weight * bias_max
|
||||
# probabilities.bias_coherences[i][j] = bias_max
|
||||
|
||||
for i in range(0, 2):
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, N):
|
||||
offset_max = 0
|
||||
offset_sum = 0
|
||||
offset_count = 0
|
||||
for p in range(0, num_candidates):
|
||||
candidate = candidates[p]
|
||||
if candidate.offsets[j][k] != i:
|
||||
continue
|
||||
if scores[p] == 0:
|
||||
continue
|
||||
offset_max = max(offset_max, scores[p])
|
||||
offset_sum += scores[p]
|
||||
offset_count += 1
|
||||
if offset_max == 0:
|
||||
continue
|
||||
# weight = offset_count / num_candidates
|
||||
weight = 0.1
|
||||
offset_avg = offset_sum / offset_count
|
||||
probabilities.offset_coherences[i][j][k] = (1.0 - weight) * probabilities.offset_coherences[i][j][k] + weight * offset_max
|
||||
# probabilities.offset_coherences[i][j][k] = offset_max
|
||||
|
||||
for j in range(0, probabilities.node_count):
|
||||
base_delta = probabilities.bias_coherences[1][j] - probabilities.bias_coherences[0][j]
|
||||
delta = base_delta
|
||||
q = j ^ 0b1
|
||||
if probabilities.node_count > 1:
|
||||
q_delta = probabilities.bias_coherences[1][q] - probabilities.bias_coherences[0][q]
|
||||
if base_delta > 0 and q_delta > 0:
|
||||
delta -= 0.5 * q_delta
|
||||
|
||||
p_bias_next = clamp(probabilities.p_bias[j] + delta, 0, 1)
|
||||
probabilities.p_bias[j] = 0.9 * probabilities.p_bias[j] + 0.1 * p_bias_next
|
||||
for k in range(0, N):
|
||||
base_delta = probabilities.offset_coherences[1][j][k] - probabilities.offset_coherences[0][j][k]
|
||||
delta = base_delta
|
||||
if probabilities.node_count > 1:
|
||||
q_delta = probabilities.offset_coherences[1][q][k] - probabilities.offset_coherences[0][q][k]
|
||||
if base_delta > 0 and q_delta > 0:
|
||||
delta -= 0.5 * q_delta
|
||||
|
||||
p_offset_next = clamp(probabilities.p_offsets[j][k] + delta, 0, 1)
|
||||
probabilities.p_offsets[j][k] = 0.9 * probabilities.p_offsets[j][k] + 0.1 * p_offset_next
|
||||
|
||||
def create_candidate(probabilities, candidate):
|
||||
global N
|
||||
for i in range(0, probabilities.node_count):
|
||||
candidate.bias[i] = 1 if random.random() < probabilities.p_bias[i] else 0
|
||||
# candidate.bias[i] = 0
|
||||
for j in range(0, N):
|
||||
candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0
|
||||
# candidate.normalize()
|
||||
|
||||
def copy_candidate(src, dest):
|
||||
global N
|
||||
for i in range(0, src.node_count):
|
||||
dest.bias[i] = src.bias[i]
|
||||
for i in range(0, src.node_count):
|
||||
for j in range(0, N):
|
||||
dest.offsets[i][j] = src.offsets[i][j]
|
||||
|
||||
def p(x):
|
||||
return math.ceil(x * 100) / 100
|
||||
|
||||
def p_a(x):
|
||||
return [p(z) for z in x]
|
||||
|
||||
def print_probabilities(probabilities):
|
||||
print('=====================')
|
||||
for i in range(0, probabilities.node_count):
|
||||
print(i, p(probabilities.p_bias[i]), p_a(probabilities.p_offsets[i]))
|
||||
print('=====================')
|
||||
|
||||
def candidate_str(candidate):
|
||||
global N
|
||||
build_str = ''
|
||||
for i in range(0, candidate.node_count):
|
||||
build_str += str(candidate.bias[i])
|
||||
for j in range(0, N):
|
||||
build_str += str(candidate.offsets[i][j])
|
||||
return build_str
|
||||
|
||||
def main():
|
||||
global N, M
|
||||
sample_size = 64
|
||||
num_candidates = 100
|
||||
num_survivors = 8
|
||||
output_xor = np.zeros(sample_size,)
|
||||
scratch = np.zeros((N,))
|
||||
int_scratch = np.zeros((N,)).astype(np.int32)
|
||||
g = sha
|
||||
expected_outputs = np.zeros((sample_size,))
|
||||
inputs = random_sample(sample_size, N)
|
||||
distances = np.zeros((sample_size, sample_size))
|
||||
populate_distances(inputs, distances, scratch)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
outputs = np.zeros((num_candidates + num_survivors, sample_size,))
|
||||
scores = np.zeros((num_candidates + num_survivors,))
|
||||
|
||||
layers = []
|
||||
layers_scratch = [np.zeros(1, ).astype(np.int32)]
|
||||
layers_scratch_base = [np.zeros(1, ).astype(np.int32)]
|
||||
layer = 0
|
||||
|
||||
# for i in range(0, sample_size):
|
||||
# outputs[0][i] = candidate_fn(inputs[i])
|
||||
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
# print(score)
|
||||
|
||||
# for i in range(0, sample_size):
|
||||
# outputs[0][i] = true_fn(inputs[i])
|
||||
|
||||
# np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
# np.mod(output_xor, M, output_xor)
|
||||
# score = coherence(output_xor, distances)
|
||||
# print(score)
|
||||
# return
|
||||
|
||||
while score < 1:
|
||||
probabilities = Probabilities(layer)
|
||||
candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)]
|
||||
inertia = 1
|
||||
while inertia > 1e-2:
|
||||
compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch)
|
||||
update_probabilities(probabilities, candidates, scores)
|
||||
inertia = 0.9 * inertia + 0.1 * probabilities.inertia()
|
||||
|
||||
print_probabilities(probabilities)
|
||||
for candidate in layers:
|
||||
print(candidate.bias, candidate.offsets)
|
||||
print(np.max(scores), probabilities.inertia(), inertia)
|
||||
|
||||
top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
src_index = top_n[i]
|
||||
dest_index = num_candidates + i
|
||||
if src_index == dest_index:
|
||||
continue
|
||||
src = candidates[src_index]
|
||||
dest = candidates[dest_index]
|
||||
candidates[dest_index] = src
|
||||
candidates[src_index] = dest
|
||||
|
||||
inputs = random_sample(sample_size, N)
|
||||
populate_distances(inputs, distances, scratch)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
|
||||
candidate = probabilities.flatten()
|
||||
for j in range(0, sample_size):
|
||||
outputs[0][j] = evaluate(layers, candidate, inputs[j], layers_scratch, int_scratch)
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
|
||||
layers.insert(0, candidate)
|
||||
layer += 1
|
||||
layers_scratch.insert(0, np.zeros(2 ** layer,).astype(np.int32))
|
||||
layers_scratch_base.insert(0, np.zeros(2 ** layer,).astype(np.int32))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
455
mutations7.py
Normal file
455
mutations7.py
Normal file
@ -0,0 +1,455 @@
|
||||
from enum import unique
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
import time
|
||||
|
||||
N = 8
|
||||
M = 2
|
||||
|
||||
def vec_to_int(x):
|
||||
global N
|
||||
z = 0
|
||||
for i in range(0, N + 1):
|
||||
z <<= 1
|
||||
z |= x[i]
|
||||
return z
|
||||
|
||||
def timeit(f):
|
||||
def timed(*args, **kw):
|
||||
ts = time.time()
|
||||
result = f(*args, **kw)
|
||||
te = time.time()
|
||||
|
||||
print('func:%r took: %2.4f sec' % (f.__name__, te-ts))
|
||||
return result
|
||||
return timed
|
||||
|
||||
class Candidate:
|
||||
def __init__(self, layer):
|
||||
global N
|
||||
self.layer = layer
|
||||
self.node_count = 2 ** layer
|
||||
self.offsets = np.zeros((self.node_count, N + 1)).astype(np.int32)
|
||||
|
||||
class Probabilities:
|
||||
def __init__(self, layer):
|
||||
global N
|
||||
self.layer = layer
|
||||
self.node_count = 2 ** layer
|
||||
self.p_offsets = np.zeros((self.node_count, N + 1))
|
||||
self.p_offsets.fill(0.5)
|
||||
self.offset_coherences = np.zeros((2, self.node_count, N + 1, 2, self.node_count, N + 1))
|
||||
self.offset_coherences.fill(-1)
|
||||
|
||||
def inertia(self):
|
||||
global N
|
||||
total = 0
|
||||
for i in range(0, self.node_count):
|
||||
for j in range(0, N + 1):
|
||||
if self.p_offsets[i][j] > 1e-2 and self.p_offsets[i][j] < (1 - 1e-2):
|
||||
total += abs(self.offset_coherences[1][i][j][1][i][j] - self.offset_coherences[0][i][j][0][i][j])
|
||||
return total
|
||||
|
||||
def flatten(self):
|
||||
candidate = Candidate(self.layer)
|
||||
for i in range(0, self.node_count):
|
||||
for j in range(0, N + 1):
|
||||
candidate.offsets[i][j] = 1 if self.p_offsets[i][j] >= 0.5 else 0
|
||||
if self.node_count > 1:
|
||||
for i in range(0, self.node_count):
|
||||
if not candidate.offsets[i].any():
|
||||
q = i ^ 0b1
|
||||
candidate.offsets[q].fill(0)
|
||||
return candidate
|
||||
|
||||
def clamp(x, min_value = 0.01, max_value = 1):
|
||||
return min(max(x, min_value), max_value)
|
||||
|
||||
def encode(v):
|
||||
global N
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(N / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
# 00100111 x4
|
||||
# 00000110 x1
|
||||
def sha(v):
|
||||
global M
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] % M
|
||||
|
||||
def xor(x):
|
||||
num_one_bits = 0
|
||||
for i in range(0, len(x)):
|
||||
if i == 0:
|
||||
continue
|
||||
num_one_bits += x[i]
|
||||
return num_one_bits % 2
|
||||
|
||||
def test_fn(x):
|
||||
# 0 1
|
||||
# 2 | 3
|
||||
# 4 | 5 | 6 | 7
|
||||
# | | 0 | 7 | | | |
|
||||
return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7])))
|
||||
|
||||
def candidate_fn(x):
|
||||
return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2])
|
||||
|
||||
def true_fn(x):
|
||||
return x[0] ^ x[1] ^ (x[3] * x[2])
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def coherence(outputs, distances):
|
||||
coherences = []
|
||||
for i in range(0, len(outputs)):
|
||||
y_a = outputs[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(outputs)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = outputs[j]
|
||||
weight = distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def random_sample(m, n):
|
||||
inputs = np.zeros((m, n + 1)).astype(np.int32)
|
||||
for i in range(0, m):
|
||||
for j in range(0, n):
|
||||
inputs[i][j] = random.randint(0, 1)
|
||||
inputs[i][n] = 1
|
||||
return inputs
|
||||
|
||||
def populate_distances(inputs, distances, scratch):
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, scratch)
|
||||
distances[i][j] = 1.0 / (2 ** distance)
|
||||
|
||||
def populate_layers_scratch(layers, x, layers_scratch, compute_scratch):
|
||||
layers_scratch[0].fill(0)
|
||||
for i in range(1, len(layers_scratch)):
|
||||
scratch = layers_scratch[i]
|
||||
layer = layers[i - 1]
|
||||
for j in range(0, layer.node_count):
|
||||
value = 0
|
||||
np.multiply(layer.offsets[j], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
left = layers_scratch[i - 1][j * 2]
|
||||
right = layers_scratch[i - 1][j * 2 + 1]
|
||||
value ^= left * right
|
||||
scratch[j] = value
|
||||
return layers_scratch[-1][0]
|
||||
|
||||
def evaluate_cached(candidate, x, layers_scratch, layers_scratch_base, compute_scratch):
|
||||
global N
|
||||
maybe_evaluate = set()
|
||||
for j in range(0, candidate.node_count, 2):
|
||||
value = 0
|
||||
np.multiply(candidate.offsets[j], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
layers_scratch[0][j] = value
|
||||
if candidate.node_count > 1:
|
||||
value = 0
|
||||
np.multiply(candidate.offsets[j + 1], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
layers_scratch[0][j + 1] = value
|
||||
if layers_scratch[0][j] == 1 and layers_scratch[0][j + 1] == 1:
|
||||
maybe_evaluate.add(int(j / 2))
|
||||
|
||||
for i in range(1, len(layers_scratch)):
|
||||
np.copyto(layers_scratch[i], layers_scratch_base[i])
|
||||
maybe_evaluate_next = set()
|
||||
for j in maybe_evaluate:
|
||||
left = layers_scratch[i - 1][j * 2]
|
||||
right = layers_scratch[i - 1][j * 2 + 1]
|
||||
child_value = left * right
|
||||
left_base = layers_scratch_base[i - 1][j * 2]
|
||||
right_base = layers_scratch_base[i - 1][j * 2 + 1]
|
||||
child_base_value = left_base * right_base
|
||||
if child_value != child_base_value:
|
||||
layers_scratch[i][j] ^= 1
|
||||
maybe_evaluate_next.add(int(j / 2))
|
||||
maybe_evaluate = maybe_evaluate_next
|
||||
return layers_scratch[-1][0]
|
||||
|
||||
def evaluate(layers, candidate, x, layers_scratch, compute_scratch):
|
||||
global N
|
||||
for i in range(0, len(layers_scratch)):
|
||||
scratch = layers_scratch[i]
|
||||
if i == 0:
|
||||
for j in range(0, candidate.node_count):
|
||||
value = 0
|
||||
np.multiply(candidate.offsets[j], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
scratch[j] = value
|
||||
else:
|
||||
layer = layers[i - 1]
|
||||
for j in range(0, layer.node_count):
|
||||
value = 0
|
||||
np.multiply(layer.offsets[j], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
left = layers_scratch[i - 1][j * 2]
|
||||
right = layers_scratch[i - 1][j * 2 + 1]
|
||||
value ^= left * right
|
||||
scratch[j] = value
|
||||
return layers_scratch[-1][0]
|
||||
|
||||
@timeit
|
||||
def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch):
|
||||
global M, N
|
||||
scores.fill(0)
|
||||
unique_candidates = {}
|
||||
for j in range(0, num_candidates):
|
||||
create_candidate(probabilities, candidates[j])
|
||||
unique_candidates[candidate_str(candidates[j])] = j
|
||||
|
||||
for i in range(0, sample_size):
|
||||
populate_layers_scratch(layers, inputs[i], layers_scratch_base, int_scratch)
|
||||
for _, j in unique_candidates.items():
|
||||
candidate = candidates[j]
|
||||
outputs[j][i] = evaluate_cached(candidate, inputs[i], layers_scratch, layers_scratch_base, int_scratch)
|
||||
# if outputs[j][i] != evaluate(layers, candidate, inputs[i], layers_scratch, int_scratch):
|
||||
# print('Uh-oh')
|
||||
for _, j in unique_candidates.items():
|
||||
candidate = candidates[j]
|
||||
np.subtract(outputs[j], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
scores[j] = coherence(output_xor, distances)
|
||||
|
||||
@timeit
|
||||
def update_probabilities(probabilities, candidates, inputs, scores):
|
||||
global N
|
||||
num_candidates = len(candidates)
|
||||
|
||||
variance = np.zeros((N + 1,))
|
||||
for x in inputs:
|
||||
variance += x
|
||||
|
||||
probabilities.offset_coherences.fill(-1)
|
||||
for p in range(0, num_candidates):
|
||||
candidate = candidates[p]
|
||||
score = scores[p]
|
||||
if score == 0:
|
||||
continue
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, N + 1):
|
||||
i = candidate.offsets[j][k]
|
||||
for m in range(0, probabilities.node_count):
|
||||
for n in range(0, N + 1):
|
||||
l = candidate.offsets[m][n]
|
||||
probabilities.offset_coherences[i][j][k][l][m][n] = max(score, probabilities.offset_coherences[i][j][k][l][m][n])
|
||||
|
||||
# for i in range(0, 2):
|
||||
# for j in range(0, probabilities.node_count):
|
||||
# for k in range(0, N + 1):
|
||||
# for l in range(0, 2):
|
||||
# for m in range(0, probabilities.node_count):
|
||||
# for n in range(0, N + 1):
|
||||
# offset_max = 0
|
||||
# offset_sum = 0
|
||||
# offset_count = 0
|
||||
# for p in range(0, num_candidates):
|
||||
# candidate = candidates[p]
|
||||
# if candidate.offsets[j][k] != i:
|
||||
# continue
|
||||
# if candidate.offsets[m][n] != l:
|
||||
# continue
|
||||
# if scores[p] == 0:
|
||||
# continue
|
||||
# offset_max = max(offset_max, scores[p])
|
||||
# offset_sum += scores[p]
|
||||
# offset_count += 1
|
||||
# if offset_max == 0:
|
||||
# continue
|
||||
# probabilities.offset_coherences[i][j][k][l][m][n] = offset_max
|
||||
|
||||
p_offsets_next = np.zeros((probabilities.node_count, N + 1))
|
||||
inertia = 0
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, N + 1):
|
||||
delta = 0
|
||||
count = 0
|
||||
for m in range(0, probabilities.node_count):
|
||||
for n in range(0, N + 1):
|
||||
if j == m and k == n:
|
||||
continue
|
||||
# confidence = variance[k] * variance[n] / (len(inputs) ** 2)
|
||||
confidence = 1.0
|
||||
p_j1_if_m0 = probabilities.offset_coherences[1][j][k][0][m][n]
|
||||
p_j0_if_m0 = probabilities.offset_coherences[0][j][k][0][m][n]
|
||||
p_j1_if_m1 = probabilities.offset_coherences[1][j][k][1][m][n]
|
||||
p_j0_if_m1 = probabilities.offset_coherences[0][j][k][1][m][n]
|
||||
if p_j1_if_m0 >= 0 and p_j0_if_m0 >= 0:
|
||||
delta_if_m0 = p_j1_if_m0 - p_j0_if_m0
|
||||
delta += delta_if_m0 * (1.0 - probabilities.p_offsets[m][n]) * confidence
|
||||
count += 1
|
||||
if p_j1_if_m1 >= 0 and p_j0_if_m1 >= 0:
|
||||
delta_if_m1 = p_j1_if_m1 - p_j0_if_m1
|
||||
delta += delta_if_m1 * probabilities.p_offsets[m][n] * confidence
|
||||
count += 1
|
||||
if count > 0:
|
||||
delta /= count
|
||||
p_offsets_next[j][k] = clamp(probabilities.p_offsets[j][k] + delta, 0, 1)
|
||||
inertia += abs(p_offsets_next[j][k] - probabilities.p_offsets[j][k])
|
||||
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, N + 1):
|
||||
p_offset_next = p_offsets_next[j][k]
|
||||
probabilities.p_offsets[j][k] = 0.9 * probabilities.p_offsets[j][k] + 0.1 * p_offset_next
|
||||
|
||||
# if probabilities.node_count > 1:
|
||||
# for j in range(0, probabilities.node_count):
|
||||
# q = j ^ 0b1
|
||||
# for k in range(0, N + 1):
|
||||
# if probabilities.p_offsets[j][k] > 0.5:
|
||||
# probabilities.p_offsets[q][k] = min(probabilities.p_offsets[q][k], 1 - probabilities.p_offsets[j][k])
|
||||
|
||||
return inertia
|
||||
|
||||
def create_candidate(probabilities, candidate):
|
||||
global N
|
||||
for i in range(0, probabilities.node_count):
|
||||
for j in range(0, N + 1):
|
||||
candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0
|
||||
|
||||
def copy_candidate(src, dest):
|
||||
global N
|
||||
for i in range(0, src.node_count):
|
||||
for j in range(0, N + 1):
|
||||
dest.offsets[i][j] = src.offsets[i][j]
|
||||
|
||||
def p(x):
|
||||
return math.ceil(x * 100) / 100
|
||||
|
||||
def p_a(x):
|
||||
return [p(z) for z in x]
|
||||
|
||||
def print_probabilities(probabilities):
|
||||
print('=====================')
|
||||
for i in range(0, probabilities.node_count):
|
||||
print(i, p_a(probabilities.p_offsets[i]))
|
||||
print('=====================')
|
||||
|
||||
def candidate_str(candidate):
|
||||
global N
|
||||
build_str = ''
|
||||
for i in range(0, candidate.node_count):
|
||||
for j in range(0, N + 1):
|
||||
build_str += str(candidate.offsets[i][j])
|
||||
return build_str
|
||||
|
||||
def main():
|
||||
global N, M
|
||||
sample_size = 64
|
||||
num_candidates = 100
|
||||
num_survivors = 8
|
||||
output_xor = np.zeros(sample_size,)
|
||||
scratch = np.zeros((N + 1,))
|
||||
int_scratch = np.zeros((N + 1,)).astype(np.int32)
|
||||
g = test_fn
|
||||
expected_outputs = np.zeros((sample_size,))
|
||||
inputs = random_sample(sample_size, N)
|
||||
distances = np.zeros((sample_size, sample_size))
|
||||
populate_distances(inputs, distances, scratch)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
outputs = np.zeros((num_candidates + num_survivors, sample_size,))
|
||||
scores = np.zeros((num_candidates + num_survivors,))
|
||||
|
||||
layers = []
|
||||
layers_scratch = [np.zeros(1, ).astype(np.int32)]
|
||||
layers_scratch_base = [np.zeros(1, ).astype(np.int32)]
|
||||
layer = 0
|
||||
|
||||
# for i in range(0, sample_size):
|
||||
# outputs[0][i] = candidate_fn(inputs[i])
|
||||
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
# print(score)
|
||||
|
||||
# for i in range(0, sample_size):
|
||||
# outputs[0][i] = true_fn(inputs[i])
|
||||
|
||||
# np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
# np.mod(output_xor, M, output_xor)
|
||||
# score = coherence(output_xor, distances)
|
||||
# print(score)
|
||||
# return
|
||||
|
||||
while score < 1:
|
||||
probabilities = Probabilities(layer)
|
||||
candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)]
|
||||
inertia = 1
|
||||
while inertia > 0.01:
|
||||
compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch)
|
||||
round_inertia = update_probabilities(probabilities, candidates, inputs, scores)
|
||||
inertia = 0.9 * inertia + 0.1 * round_inertia
|
||||
|
||||
print_probabilities(probabilities)
|
||||
for candidate in layers:
|
||||
print(candidate.offsets)
|
||||
print(np.max(scores), round_inertia, inertia)
|
||||
|
||||
top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
src_index = top_n[i]
|
||||
dest_index = num_candidates + i
|
||||
if src_index == dest_index:
|
||||
continue
|
||||
src = candidates[src_index]
|
||||
dest = candidates[dest_index]
|
||||
candidates[dest_index] = src
|
||||
candidates[src_index] = dest
|
||||
|
||||
inputs = random_sample(sample_size, N)
|
||||
populate_distances(inputs, distances, scratch)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
|
||||
candidate = probabilities.flatten()
|
||||
for j in range(0, sample_size):
|
||||
outputs[0][j] = evaluate(layers, candidate, inputs[j], layers_scratch, int_scratch)
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
|
||||
layers.insert(0, candidate)
|
||||
layer += 1
|
||||
layers_scratch.insert(0, np.zeros(2 ** layer,).astype(np.int32))
|
||||
layers_scratch_base.insert(0, np.zeros(2 ** layer,).astype(np.int32))
|
||||
|
||||
for candidate in layers:
|
||||
print(candidate.offsets)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
451
mutations8.py
Normal file
451
mutations8.py
Normal file
@ -0,0 +1,451 @@
|
||||
from enum import unique
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
import time
|
||||
|
||||
N = 8
|
||||
M = 2
|
||||
|
||||
def vec_to_int(x):
|
||||
global N
|
||||
z = 0
|
||||
for i in range(0, N + 1):
|
||||
z <<= 1
|
||||
z |= x[i]
|
||||
return z
|
||||
|
||||
def timeit(f):
|
||||
def timed(*args, **kw):
|
||||
ts = time.time()
|
||||
result = f(*args, **kw)
|
||||
te = time.time()
|
||||
|
||||
print('func:%r took: %2.4f sec' % (f.__name__, te-ts))
|
||||
return result
|
||||
return timed
|
||||
|
||||
class Candidate:
|
||||
def __init__(self, layer):
|
||||
global N
|
||||
self.layer = layer
|
||||
self.node_count = 2 ** layer
|
||||
self.offsets = np.zeros((self.node_count, N + 1)).astype(np.int32)
|
||||
|
||||
class Probabilities:
|
||||
def __init__(self, layer):
|
||||
global N
|
||||
self.layer = layer
|
||||
self.node_count = 2 ** layer
|
||||
self.p_offsets = np.zeros((self.node_count, N + 1))
|
||||
self.p_offsets.fill(0.5)
|
||||
self.offset_coherences = np.zeros((2, self.node_count, N + 1, 2, self.node_count, N + 1))
|
||||
self.offset_coherences.fill(-1)
|
||||
self.deltas = np.zeros((self.node_count, N + 1, 2, self.node_count, N + 1))
|
||||
|
||||
def inertia(self):
|
||||
global N
|
||||
total = 0
|
||||
for i in range(0, self.node_count):
|
||||
for j in range(0, N + 1):
|
||||
if self.p_offsets[i][j] > 1e-2 and self.p_offsets[i][j] < (1 - 1e-2):
|
||||
total += abs(self.offset_coherences[1][i][j][1][i][j] - self.offset_coherences[0][i][j][0][i][j])
|
||||
return total
|
||||
|
||||
def flatten(self):
|
||||
candidate = Candidate(self.layer)
|
||||
for i in range(0, self.node_count):
|
||||
for j in range(0, N + 1):
|
||||
candidate.offsets[i][j] = 1 if self.p_offsets[i][j] >= 0.5 else 0
|
||||
if self.node_count > 1:
|
||||
for i in range(0, self.node_count):
|
||||
if not candidate.offsets[i].any():
|
||||
q = i ^ 0b1
|
||||
candidate.offsets[q].fill(0)
|
||||
return candidate
|
||||
|
||||
def clamp(x, min_value = 0.01, max_value = 1):
|
||||
return min(max(x, min_value), max_value)
|
||||
|
||||
def encode(v):
|
||||
global N
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(N / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
# 00100111 x4
|
||||
# 00000110 x1
|
||||
def sha(v):
|
||||
global M
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] % M
|
||||
|
||||
def xor(x):
|
||||
num_one_bits = 0
|
||||
for i in range(0, len(x)):
|
||||
if i == 0:
|
||||
continue
|
||||
num_one_bits += x[i]
|
||||
return num_one_bits % 2
|
||||
|
||||
def test_fn(x):
|
||||
# 0 1
|
||||
# 2 | 3
|
||||
# 4 | 5 | 6 | 7
|
||||
# | | 0 | 7 | | | |
|
||||
return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7])))
|
||||
|
||||
def candidate_fn(x):
|
||||
return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2])
|
||||
|
||||
def true_fn(x):
|
||||
return x[0] ^ x[1] ^ (x[3] * x[2])
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def coherence(outputs, distances):
|
||||
coherences = []
|
||||
for i in range(0, len(outputs)):
|
||||
y_a = outputs[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(outputs)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = outputs[j]
|
||||
weight = distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def random_sample(m, n):
|
||||
inputs = np.zeros((m, n + 1)).astype(np.int32)
|
||||
for i in range(0, m):
|
||||
for j in range(0, n):
|
||||
inputs[i][j] = random.randint(0, 1)
|
||||
inputs[i][n] = 1
|
||||
return inputs
|
||||
|
||||
def populate_distances(inputs, distances, scratch):
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, scratch)
|
||||
distances[i][j] = 1.0 / (2 ** distance)
|
||||
|
||||
def populate_layers_scratch(layers, x, layers_scratch, compute_scratch):
|
||||
layers_scratch[0].fill(0)
|
||||
for i in range(1, len(layers_scratch)):
|
||||
scratch = layers_scratch[i]
|
||||
layer = layers[i - 1]
|
||||
for j in range(0, layer.node_count):
|
||||
value = 0
|
||||
np.multiply(layer.offsets[j], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
left = layers_scratch[i - 1][j * 2]
|
||||
right = layers_scratch[i - 1][j * 2 + 1]
|
||||
value ^= left * right
|
||||
scratch[j] = value
|
||||
return layers_scratch[-1][0]
|
||||
|
||||
def evaluate_cached(candidate, x, layers_scratch, layers_scratch_base, compute_scratch):
|
||||
global N
|
||||
maybe_evaluate = set()
|
||||
for j in range(0, candidate.node_count, 2):
|
||||
value = 0
|
||||
np.multiply(candidate.offsets[j], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
layers_scratch[0][j] = value
|
||||
if candidate.node_count > 1:
|
||||
value = 0
|
||||
np.multiply(candidate.offsets[j + 1], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
layers_scratch[0][j + 1] = value
|
||||
if layers_scratch[0][j] == 1 and layers_scratch[0][j + 1] == 1:
|
||||
maybe_evaluate.add(int(j / 2))
|
||||
|
||||
for i in range(1, len(layers_scratch)):
|
||||
np.copyto(layers_scratch[i], layers_scratch_base[i])
|
||||
maybe_evaluate_next = set()
|
||||
for j in maybe_evaluate:
|
||||
left = layers_scratch[i - 1][j * 2]
|
||||
right = layers_scratch[i - 1][j * 2 + 1]
|
||||
child_value = left * right
|
||||
left_base = layers_scratch_base[i - 1][j * 2]
|
||||
right_base = layers_scratch_base[i - 1][j * 2 + 1]
|
||||
child_base_value = left_base * right_base
|
||||
if child_value != child_base_value:
|
||||
layers_scratch[i][j] ^= 1
|
||||
maybe_evaluate_next.add(int(j / 2))
|
||||
maybe_evaluate = maybe_evaluate_next
|
||||
return layers_scratch[-1][0]
|
||||
|
||||
def evaluate(layers, candidate, x, layers_scratch, compute_scratch):
|
||||
global N
|
||||
for i in range(0, len(layers_scratch)):
|
||||
scratch = layers_scratch[i]
|
||||
if i == 0:
|
||||
for j in range(0, candidate.node_count):
|
||||
value = 0
|
||||
np.multiply(candidate.offsets[j], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
scratch[j] = value
|
||||
else:
|
||||
layer = layers[i - 1]
|
||||
for j in range(0, layer.node_count):
|
||||
value = 0
|
||||
np.multiply(layer.offsets[j], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
left = layers_scratch[i - 1][j * 2]
|
||||
right = layers_scratch[i - 1][j * 2 + 1]
|
||||
value ^= left * right
|
||||
scratch[j] = value
|
||||
return layers_scratch[-1][0]
|
||||
|
||||
@timeit
|
||||
def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch):
|
||||
global M, N
|
||||
scores.fill(0)
|
||||
unique_candidates = {}
|
||||
for j in range(0, num_candidates):
|
||||
create_candidate(probabilities, candidates[j])
|
||||
unique_candidates[candidate_str(candidates[j])] = j
|
||||
|
||||
for i in range(0, sample_size):
|
||||
populate_layers_scratch(layers, inputs[i], layers_scratch_base, int_scratch)
|
||||
for _, j in unique_candidates.items():
|
||||
candidate = candidates[j]
|
||||
outputs[j][i] = evaluate_cached(candidate, inputs[i], layers_scratch, layers_scratch_base, int_scratch)
|
||||
# if outputs[j][i] != evaluate(layers, candidate, inputs[i], layers_scratch, int_scratch):
|
||||
# print('Uh-oh')
|
||||
for _, j in unique_candidates.items():
|
||||
candidate = candidates[j]
|
||||
np.subtract(outputs[j], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
scores[j] = coherence(output_xor, distances)
|
||||
|
||||
@timeit
|
||||
def update_probabilities(probabilities, candidates, inputs, scores, scale):
|
||||
global N
|
||||
num_candidates = len(candidates)
|
||||
|
||||
probabilities.offset_coherences.fill(-1)
|
||||
for p in range(0, num_candidates):
|
||||
candidate = candidates[p]
|
||||
score = scores[p]
|
||||
if score == 0:
|
||||
continue
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, N + 1):
|
||||
i = candidate.offsets[j][k]
|
||||
for m in range(0, probabilities.node_count):
|
||||
for n in range(0, N + 1):
|
||||
l = candidate.offsets[m][n]
|
||||
probabilities.offset_coherences[i][j][k][l][m][n] = max(score, probabilities.offset_coherences[i][j][k][l][m][n])
|
||||
|
||||
# for i in range(0, 2):
|
||||
# for j in range(0, probabilities.node_count):
|
||||
# for k in range(0, N + 1):
|
||||
# for l in range(0, 2):
|
||||
# for m in range(0, probabilities.node_count):
|
||||
# for n in range(0, N + 1):
|
||||
# offset_max = 0
|
||||
# offset_sum = 0
|
||||
# offset_count = 0
|
||||
# for p in range(0, num_candidates):
|
||||
# candidate = candidates[p]
|
||||
# if candidate.offsets[j][k] != i:
|
||||
# continue
|
||||
# if candidate.offsets[m][n] != l:
|
||||
# continue
|
||||
# if scores[p] == 0:
|
||||
# continue
|
||||
# offset_max = max(offset_max, scores[p])
|
||||
# offset_sum += scores[p]
|
||||
# offset_count += 1
|
||||
# if offset_max == 0:
|
||||
# continue
|
||||
# probabilities.offset_coherences[i][j][k][l][m][n] = offset_max
|
||||
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, N + 1):
|
||||
for m in range(0, probabilities.node_count):
|
||||
for n in range(0, N + 1):
|
||||
# if j == m and k == n:
|
||||
# continue
|
||||
p_j1_if_m0 = probabilities.offset_coherences[1][j][k][0][m][n]
|
||||
p_j0_if_m0 = probabilities.offset_coherences[0][j][k][0][m][n]
|
||||
p_j1_if_m1 = probabilities.offset_coherences[1][j][k][1][m][n]
|
||||
p_j0_if_m1 = probabilities.offset_coherences[0][j][k][1][m][n]
|
||||
if p_j1_if_m0 >= 0 and p_j0_if_m0 >= 0:
|
||||
delta_if_m0 = p_j1_if_m0 - p_j0_if_m0
|
||||
probabilities.deltas[j][k][0][m][n] = delta_if_m0
|
||||
if p_j1_if_m1 >= 0 and p_j0_if_m1 >= 0:
|
||||
delta_if_m1 = p_j1_if_m1 - p_j0_if_m1
|
||||
probabilities.deltas[j][k][1][m][n] = delta_if_m1
|
||||
|
||||
p_offsets_next = np.zeros((probabilities.node_count, N + 1))
|
||||
p_offsets_next.fill(0.5)
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, N + 1):
|
||||
for m in range(0, probabilities.node_count):
|
||||
for n in range(0, N + 1):
|
||||
# if j == m and k == n:
|
||||
# continue
|
||||
delta = probabilities.deltas[j][k][1][m][n] * probabilities.p_offsets[m][n] + probabilities.deltas[j][k][0][m][n] * (1 - probabilities.p_offsets[m][n])
|
||||
p_offsets_next[j][k] += delta * scale
|
||||
# if delta > 0 and probabilities.node_count > 1:
|
||||
# q = j ^ 0b1
|
||||
# p_offsets_next[q][k] -= delta * scale
|
||||
|
||||
inertia = 0
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, N + 1):
|
||||
value = clamp(p_offsets_next[j][k], 0, 1)
|
||||
inertia += abs(probabilities.p_offsets[j][k] - value)
|
||||
probabilities.p_offsets[j][k] = value
|
||||
|
||||
return inertia
|
||||
|
||||
def create_candidate(probabilities, candidate):
|
||||
global N
|
||||
for i in range(0, probabilities.node_count):
|
||||
for j in range(0, N + 1):
|
||||
candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0
|
||||
|
||||
def copy_candidate(src, dest):
|
||||
global N
|
||||
for i in range(0, src.node_count):
|
||||
for j in range(0, N + 1):
|
||||
dest.offsets[i][j] = src.offsets[i][j]
|
||||
|
||||
def p(x):
|
||||
return math.ceil(x * 100) / 100
|
||||
|
||||
def p_a(x):
|
||||
return [p(z) for z in x]
|
||||
|
||||
def print_probabilities(probabilities):
|
||||
print('=====================')
|
||||
for i in range(0, probabilities.node_count):
|
||||
print(i, p_a(probabilities.p_offsets[i]))
|
||||
print('=====================')
|
||||
|
||||
def candidate_str(candidate):
|
||||
global N
|
||||
build_str = ''
|
||||
for i in range(0, candidate.node_count):
|
||||
for j in range(0, N + 1):
|
||||
build_str += str(candidate.offsets[i][j])
|
||||
return build_str
|
||||
|
||||
def main():
|
||||
global N, M
|
||||
sample_size = 64
|
||||
num_candidates = 100
|
||||
num_survivors = 8
|
||||
output_xor = np.zeros(sample_size,)
|
||||
scratch = np.zeros((N + 1,))
|
||||
int_scratch = np.zeros((N + 1,)).astype(np.int32)
|
||||
g = test_fn
|
||||
expected_outputs = np.zeros((sample_size,))
|
||||
inputs = random_sample(sample_size, N)
|
||||
distances = np.zeros((sample_size, sample_size))
|
||||
populate_distances(inputs, distances, scratch)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
outputs = np.zeros((num_candidates + num_survivors, sample_size,))
|
||||
scores = np.zeros((num_candidates + num_survivors,))
|
||||
|
||||
layers = []
|
||||
layers_scratch = [np.zeros(1, ).astype(np.int32)]
|
||||
layers_scratch_base = [np.zeros(1, ).astype(np.int32)]
|
||||
layer = 0
|
||||
|
||||
# for i in range(0, sample_size):
|
||||
# outputs[0][i] = candidate_fn(inputs[i])
|
||||
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
# print(score)
|
||||
|
||||
# for i in range(0, sample_size):
|
||||
# outputs[0][i] = true_fn(inputs[i])
|
||||
|
||||
# np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
# np.mod(output_xor, M, output_xor)
|
||||
# score = coherence(output_xor, distances)
|
||||
# print(score)
|
||||
# return
|
||||
|
||||
while score < 1:
|
||||
probabilities = Probabilities(layer)
|
||||
candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)]
|
||||
inertia = 1
|
||||
epoch = 1
|
||||
while inertia > 0.001:
|
||||
compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, layers_scratch, layers_scratch_base, int_scratch, scratch)
|
||||
round_inertia = update_probabilities(probabilities, candidates, inputs, scores, epoch / 1000.0)
|
||||
inertia = 0.9 * inertia + 0.1 * round_inertia
|
||||
|
||||
print_probabilities(probabilities)
|
||||
for candidate in layers:
|
||||
print(candidate.offsets)
|
||||
print(np.max(scores), round_inertia, inertia)
|
||||
|
||||
top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
src_index = top_n[i]
|
||||
dest_index = num_candidates + i
|
||||
if src_index == dest_index:
|
||||
continue
|
||||
src = candidates[src_index]
|
||||
dest = candidates[dest_index]
|
||||
candidates[dest_index] = src
|
||||
candidates[src_index] = dest
|
||||
|
||||
inputs = random_sample(sample_size, N)
|
||||
populate_distances(inputs, distances, scratch)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
epoch += 1
|
||||
|
||||
candidate = probabilities.flatten()
|
||||
for j in range(0, sample_size):
|
||||
outputs[0][j] = evaluate(layers, candidate, inputs[j], layers_scratch, int_scratch)
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
|
||||
layers.insert(0, candidate)
|
||||
layer += 1
|
||||
layers_scratch.insert(0, np.zeros(2 ** layer,).astype(np.int32))
|
||||
layers_scratch_base.insert(0, np.zeros(2 ** layer,).astype(np.int32))
|
||||
|
||||
for candidate in layers:
|
||||
print(candidate.offsets)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
414
mutations9.py
Normal file
414
mutations9.py
Normal file
@ -0,0 +1,414 @@
|
||||
from enum import unique
|
||||
import hashlib
|
||||
import math
|
||||
import numpy as np
|
||||
import random
|
||||
import time
|
||||
|
||||
N = 8
|
||||
M = 2
|
||||
|
||||
def vec_to_int(x):
|
||||
global N
|
||||
z = 0
|
||||
for i in range(0, N + 1):
|
||||
z <<= 1
|
||||
z |= x[i]
|
||||
return z
|
||||
|
||||
def timeit(f):
|
||||
def timed(*args, **kw):
|
||||
ts = time.time()
|
||||
result = f(*args, **kw)
|
||||
te = time.time()
|
||||
|
||||
print('func:%r took: %2.4f sec' % (f.__name__, te-ts))
|
||||
return result
|
||||
return timed
|
||||
|
||||
class Candidate:
|
||||
def __init__(self, layer):
|
||||
global N
|
||||
self.layer = layer
|
||||
self.node_count = layer
|
||||
self.offsets = np.zeros((self.node_count, N + 1)).astype(np.int32)
|
||||
|
||||
class Probabilities:
|
||||
def __init__(self, layer):
|
||||
global N
|
||||
self.layer = layer
|
||||
self.node_count = layer
|
||||
self.p_offsets = np.zeros((self.node_count, N + 1))
|
||||
self.p_offsets.fill(0.5)
|
||||
self.offset_coherences = np.zeros((2, self.node_count, N + 1, 2, self.node_count, N + 1))
|
||||
self.offset_coherences.fill(-1)
|
||||
self.deltas = np.zeros((self.node_count, N + 1, 2, self.node_count, N + 1))
|
||||
|
||||
def has_converged(self):
|
||||
global N
|
||||
for i in range(0,self.node_count):
|
||||
for j in range(0, N + 1):
|
||||
if self.p_offsets[i][j] > 0.05 and self.p_offsets[i][j] < 0.95:
|
||||
return False
|
||||
return True
|
||||
|
||||
def flatten(self):
|
||||
global N
|
||||
candidate = Candidate(self.layer)
|
||||
for i in range(0, self.node_count):
|
||||
for j in range(0, N + 1):
|
||||
candidate.offsets[i][j] = 1 if self.p_offsets[i][j] >= 0.95 else 0
|
||||
return candidate
|
||||
|
||||
def clamp(x, min_value = 0.01, max_value = 1):
|
||||
return min(max(x, min_value), max_value)
|
||||
|
||||
def encode(v):
|
||||
global N
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(N / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if index >= len(v):
|
||||
continue
|
||||
x <<= 1
|
||||
x |= int(v[index])
|
||||
byte_values.append(x)
|
||||
return bytearray(byte_values)
|
||||
|
||||
# 00100111 x4
|
||||
# 00000110 x1
|
||||
def sha(v):
|
||||
global M
|
||||
x = encode(v)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] % M
|
||||
|
||||
def xor(x):
|
||||
num_one_bits = 0
|
||||
for i in range(0, len(x)):
|
||||
if i == 0:
|
||||
continue
|
||||
num_one_bits += x[i]
|
||||
return num_one_bits % 2
|
||||
|
||||
|
||||
# 0 ^ 1 ^ (2 ^ (4 * (5 ^ 0 * 7))) * (3 ^ 6 * 7)
|
||||
# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * (5 ^ 0 * 7)) ^ 4 * 6 * 7 * (5 ^ 0 * 7)
|
||||
# 0 ^ 1 ^ 2 * 3 ^ 2 * 6 * 7 ^ 3 * 4 * 5 ^ 0 * 3 * 4 * 7 ^ 4 * 5 * 6 * 7 ^ 0 * 4 * 6 * 7
|
||||
|
||||
# 0 ^ 1 ^ 2*3 ^ 2*6*7 ^ 3*4*5 ^ 0*3*4*7 ^ 4*5*6*7 ^ 0*4*6*7
|
||||
def test_fn(x):
|
||||
# 0 1
|
||||
# 2 | 3
|
||||
# 4 | 5 | 6 | 7
|
||||
# | | 0 | 7 | | | |
|
||||
return x[0] ^ x[1] ^ ((x[2] ^ (x[4] * (x[5] ^ (x[0] * x[7])))) * (x[3] ^ (x[6] * x[7])))
|
||||
|
||||
def candidate_fn(x):
|
||||
return x[0] ^ x[1] ^ (~(x[2] ^ x[3]) * x[2])
|
||||
|
||||
def true_fn(x):
|
||||
return x[0] ^ x[1] ^ (x[3] * x[2])
|
||||
|
||||
def hamming_distance(a, b, scratch):
|
||||
np.logical_xor(a, b, scratch)
|
||||
return sum(scratch)
|
||||
|
||||
def coherence(outputs, distances):
|
||||
coherences = []
|
||||
for i in range(0, len(outputs)):
|
||||
y_a = outputs[i]
|
||||
numerator = 0
|
||||
denominator = 0
|
||||
for j in range(0, len(outputs)):
|
||||
if i == j:
|
||||
continue
|
||||
y_b = outputs[j]
|
||||
weight = distances[i][j]
|
||||
denominator += weight
|
||||
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
|
||||
numerator += weight
|
||||
coherence = numerator / denominator if denominator > 0 else 0
|
||||
coherences.append(coherence)
|
||||
return sum(coherences) / len(coherences)
|
||||
|
||||
def random_sample(m, n):
|
||||
inputs = np.zeros((m, n + 1)).astype(np.int32)
|
||||
for i in range(0, m):
|
||||
for j in range(0, n):
|
||||
inputs[i][j] = random.randint(0, 1)
|
||||
inputs[i][n] = 1
|
||||
return inputs
|
||||
|
||||
def populate_distances(inputs, distances, scratch):
|
||||
for i in range(0, len(inputs)):
|
||||
x_a = inputs[i]
|
||||
for j in range(0, len(inputs)):
|
||||
if i == j:
|
||||
continue
|
||||
x_b = inputs[j]
|
||||
distance = hamming_distance(x_a, x_b, scratch)
|
||||
distances[i][j] = 1.0 / (2 ** distance)
|
||||
|
||||
def evaluate(layers, candidate, x, compute_scratch):
|
||||
global N
|
||||
z = evaluate_layers(layers, x, compute_scratch)
|
||||
z ^= evaluate_candidate(candidate, x, compute_scratch)
|
||||
return z
|
||||
|
||||
def evaluate_layers(layers, x, compute_scratch):
|
||||
global N
|
||||
z = 0
|
||||
for layer in layers:
|
||||
z ^= evaluate_candidate(layer, x, compute_scratch)
|
||||
return z
|
||||
|
||||
def evaluate_candidate(candidate, x, compute_scratch):
|
||||
y = 1
|
||||
for j in range(0, candidate.node_count):
|
||||
value = 0
|
||||
np.multiply(candidate.offsets[j], x, compute_scratch)
|
||||
value ^= np.sum(compute_scratch) % 2
|
||||
y &= value
|
||||
return y
|
||||
|
||||
@timeit
|
||||
def compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch):
|
||||
global M, N
|
||||
|
||||
for i in range(0, sample_size):
|
||||
outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch)
|
||||
for j in range(1, num_candidates):
|
||||
np.copyto(outputs[j], outputs[0])
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
base_score = coherence(output_xor, distances)
|
||||
|
||||
scores.fill(0)
|
||||
unique_candidates = {}
|
||||
for j in range(0, num_candidates):
|
||||
create_candidate(probabilities, candidates[j])
|
||||
unique_candidates[candidate_str(candidates[j])] = j
|
||||
|
||||
for i in range(0, sample_size):
|
||||
for _, j in unique_candidates.items():
|
||||
candidate = candidates[j]
|
||||
outputs[j][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
|
||||
for _, j in unique_candidates.items():
|
||||
candidate = candidates[j]
|
||||
np.subtract(outputs[j], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
scores[j] = score
|
||||
return base_score
|
||||
|
||||
|
||||
def compute_uplift(candidate, layers, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch):
|
||||
global M, N
|
||||
|
||||
for i in range(0, sample_size):
|
||||
outputs[0][i] = evaluate_layers(layers, inputs[i], int_scratch)
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
base_score = coherence(output_xor, distances)
|
||||
|
||||
for i in range(0, sample_size):
|
||||
outputs[0][i] ^= evaluate_candidate(candidate, inputs[i], int_scratch)
|
||||
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
return (base_score, score)
|
||||
|
||||
@timeit
|
||||
def update_probabilities(probabilities, candidates, inputs, base_score, scores, scale):
|
||||
global N
|
||||
num_candidates = len(candidates)
|
||||
|
||||
probabilities.offset_coherences.fill(-1)
|
||||
for p in range(0, num_candidates):
|
||||
candidate = candidates[p]
|
||||
if scores[p] == 0:
|
||||
continue
|
||||
# score = max(scores[p], base_score)
|
||||
score = scores[p]
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, N + 1):
|
||||
i = candidate.offsets[j][k]
|
||||
for m in range(0, probabilities.node_count):
|
||||
for n in range(0, N + 1):
|
||||
l = candidate.offsets[m][n]
|
||||
probabilities.offset_coherences[i][j][k][l][m][n] = max(score, probabilities.offset_coherences[i][j][k][l][m][n])
|
||||
|
||||
p_offsets_next = np.zeros((probabilities.node_count, N + 1))
|
||||
inertia = 0
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, N + 1):
|
||||
delta = 0
|
||||
count = 0
|
||||
for m in range(0, probabilities.node_count):
|
||||
for n in range(0, N + 1):
|
||||
# if j == m and k == n:
|
||||
# continue
|
||||
p_j1_if_m0 = probabilities.offset_coherences[1][j][k][0][m][n]
|
||||
p_j0_if_m0 = probabilities.offset_coherences[0][j][k][0][m][n]
|
||||
p_j1_if_m1 = probabilities.offset_coherences[1][j][k][1][m][n]
|
||||
p_j0_if_m1 = probabilities.offset_coherences[0][j][k][1][m][n]
|
||||
if p_j1_if_m0 >= 0 and p_j0_if_m0 >= 0:
|
||||
# delta_if_m0 = (p_j1_if_m0 - base_score) - (p_j0_if_m0 - base_score)
|
||||
delta_if_m0 = p_j1_if_m0 - p_j0_if_m0
|
||||
delta += delta_if_m0 * (1.0 - probabilities.p_offsets[m][n]) * scale
|
||||
count += 1
|
||||
if p_j1_if_m1 >= 0 and p_j0_if_m1 >= 0:
|
||||
# delta_if_m1 = (p_j1_if_m1 - base_score) - (p_j0_if_m1 - base_score)
|
||||
delta_if_m1 = p_j1_if_m1 - p_j0_if_m1
|
||||
delta += delta_if_m1 * probabilities.p_offsets[m][n] * scale
|
||||
count += 1
|
||||
if count > 0:
|
||||
delta /= count
|
||||
p_offsets_next[j][k] = clamp(probabilities.p_offsets[j][k] + delta, 0, 1)
|
||||
inertia += abs(p_offsets_next[j][k] - probabilities.p_offsets[j][k])
|
||||
|
||||
for j in range(0, probabilities.node_count):
|
||||
for k in range(0, N + 1):
|
||||
p_offset_next = 0.9 * probabilities.p_offsets[j][k] + 0.1 * p_offsets_next[j][k]
|
||||
# if p_offset_next <= 0.05:
|
||||
# p_offset_next = 0.0
|
||||
# elif p_offset_next >= 0.95:
|
||||
# p_offset_next = 1.0
|
||||
probabilities.p_offsets[j][k] = p_offset_next
|
||||
|
||||
return inertia
|
||||
|
||||
def create_candidate(probabilities, candidate):
|
||||
global N
|
||||
for i in range(0, probabilities.node_count):
|
||||
for j in range(0, N + 1):
|
||||
candidate.offsets[i][j] = 1 if random.random() < probabilities.p_offsets[i][j] else 0
|
||||
|
||||
def copy_candidate(src, dest):
|
||||
global N
|
||||
for i in range(0, src.node_count):
|
||||
for j in range(0, N + 1):
|
||||
dest.offsets[i][j] = src.offsets[i][j]
|
||||
|
||||
def p(x):
|
||||
return math.ceil(x * 100) / 100
|
||||
|
||||
def p_a(x):
|
||||
return [p(z) for z in x]
|
||||
|
||||
def print_probabilities(probabilities):
|
||||
print('=====================')
|
||||
for i in range(0, probabilities.node_count):
|
||||
print(i, p_a(probabilities.p_offsets[i]))
|
||||
print('=====================')
|
||||
|
||||
def candidate_str(candidate):
|
||||
global N
|
||||
build_str = ''
|
||||
for i in range(0, candidate.node_count):
|
||||
for j in range(0, N + 1):
|
||||
build_str += str(candidate.offsets[i][j])
|
||||
return build_str
|
||||
|
||||
def main():
|
||||
global N, M
|
||||
sample_size = 64
|
||||
num_candidates = 100
|
||||
num_survivors = 1
|
||||
uplift_sample_size = 100
|
||||
output_xor = np.zeros(sample_size,)
|
||||
scratch = np.zeros((N + 1,))
|
||||
int_scratch = np.zeros((N + 1,)).astype(np.int32)
|
||||
g = test_fn
|
||||
expected_outputs = np.zeros((sample_size,))
|
||||
inputs = random_sample(sample_size, N)
|
||||
distances = np.zeros((sample_size, sample_size))
|
||||
populate_distances(inputs, distances, scratch)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
outputs = np.zeros((num_candidates + num_survivors, sample_size,)).astype(np.int32)
|
||||
scores = np.zeros((num_candidates + num_survivors,))
|
||||
|
||||
layers = []
|
||||
layer = 1
|
||||
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
|
||||
while score < 1:
|
||||
probabilities = Probabilities(layer)
|
||||
candidates = [Candidate(layer) for _ in range(0, num_candidates + num_survivors)]
|
||||
inertia = 1
|
||||
epoch = 1
|
||||
while inertia > 0.001 and epoch < 1000 and not probabilities.has_converged():
|
||||
base_score = compute_scores(probabilities, candidates, num_candidates, layers, scores, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch)
|
||||
round_inertia = update_probabilities(probabilities, candidates, inputs, base_score, scores, 1 + 0.01 * epoch)
|
||||
inertia = 0.9 * inertia + 0.1 * round_inertia
|
||||
|
||||
print_probabilities(probabilities)
|
||||
for candidate in layers:
|
||||
print(candidate.offsets)
|
||||
max_score = np.max(scores)
|
||||
print(base_score, max_score,round_inertia, inertia)
|
||||
|
||||
top_n = sorted(range(len(scores)), key=lambda i: scores[i])[-num_survivors:]
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
src_index = top_n[i]
|
||||
dest_index = num_candidates + i
|
||||
if src_index == dest_index:
|
||||
continue
|
||||
src = candidates[src_index]
|
||||
dest = candidates[dest_index]
|
||||
candidates[dest_index] = src
|
||||
candidates[src_index] = dest
|
||||
|
||||
inputs = random_sample(sample_size, N)
|
||||
populate_distances(inputs, distances, scratch)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
epoch += 1
|
||||
|
||||
candidate = probabilities.flatten()
|
||||
print(candidate.offsets)
|
||||
for j in range(0, sample_size):
|
||||
outputs[0][j] = evaluate(layers, candidate, inputs[j], int_scratch)
|
||||
np.subtract(outputs[0], expected_outputs, output_xor)
|
||||
np.mod(output_xor, M, output_xor)
|
||||
score = coherence(output_xor, distances)
|
||||
|
||||
average_base_score = 0
|
||||
average_score = 0
|
||||
for i in range(0, uplift_sample_size):
|
||||
inputs = random_sample(sample_size, N)
|
||||
populate_distances(inputs, distances, scratch)
|
||||
for i in range(0, sample_size):
|
||||
expected_outputs[i] = g(inputs[i])
|
||||
(base_score, score) = compute_uplift(candidate, layers, distances, inputs, outputs, output_xor, expected_outputs, sample_size, int_scratch)
|
||||
average_base_score += base_score
|
||||
average_score += score
|
||||
average_base_score /= uplift_sample_size
|
||||
average_score /= uplift_sample_size
|
||||
uplift = average_score - average_base_score
|
||||
print(uplift)
|
||||
|
||||
if uplift <= 0:
|
||||
layer += 1
|
||||
continue
|
||||
|
||||
layers.insert(0, candidate)
|
||||
if layer == 1:
|
||||
layer += 1
|
||||
|
||||
for candidate in layers:
|
||||
print(candidate.offsets)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
269
mutations_cuda.py
Normal file
269
mutations_cuda.py
Normal file
@ -0,0 +1,269 @@
|
||||
# Sample source code from the Tutorial Introduction in the documentation.
|
||||
|
||||
import hashlib
|
||||
import numpy as np
|
||||
import math
|
||||
import pycuda.driver as cuda
|
||||
from pycuda.driver import Stream
|
||||
import pycuda.autoinit
|
||||
from pycuda.compiler import SourceModule
|
||||
import pycuda.gpuarray as gpuarray
|
||||
import random
|
||||
|
||||
'''
|
||||
a = numpy.random.randn(4,4)
|
||||
|
||||
a = a.astype(numpy.float32)
|
||||
|
||||
a_gpu = cuda.mem_alloc(a.size * a.dtype.itemsize)
|
||||
|
||||
cuda.memcpy_htod(a_gpu, a)
|
||||
|
||||
mod = SourceModule("""
|
||||
__global__ void doublify(float *a)
|
||||
{
|
||||
int idx = threadIdx.x + threadIdx.y*4;
|
||||
a[idx] *= 2;
|
||||
}
|
||||
""")
|
||||
|
||||
func = mod.get_function("doublify")
|
||||
func(a_gpu, block=(4,4,1))
|
||||
|
||||
a_doubled = numpy.empty_like(a)
|
||||
cuda.memcpy_dtoh(a_doubled, a_gpu)
|
||||
print("original array:")
|
||||
print(a)
|
||||
print("doubled with kernel:")
|
||||
print(a_doubled)
|
||||
|
||||
# alternate kernel invocation -------------------------------------------------
|
||||
|
||||
func(cuda.InOut(a), block=(4, 4, 1))
|
||||
print("doubled with InOut:")
|
||||
print(a)
|
||||
|
||||
# part 2 ----------------------------------------------------------------------
|
||||
|
||||
a_gpu = gpuarray.to_gpu(numpy.random.randn(4,4).astype(numpy.float32))
|
||||
a_doubled = (2*a_gpu).get()
|
||||
|
||||
print("original array:")
|
||||
print(a_gpu)
|
||||
print("doubled with gpuarray:")
|
||||
print(a_doubled)
|
||||
'''
|
||||
|
||||
N = 8
|
||||
M = 2
|
||||
sample_size = 64
|
||||
|
||||
def encode(v, offset):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(N / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
if offset + index >= len(v):
|
||||
break
|
||||
x <<= 1
|
||||
x |= int(v[offset + index])
|
||||
byte_values.append(x)
|
||||
return bytearray(x)
|
||||
|
||||
def sha(v, offset):
|
||||
global M
|
||||
x = encode(v, offset)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] % M
|
||||
|
||||
def create_program_r(model, output_var):
|
||||
global N, M
|
||||
(constant, scalars, child) = model
|
||||
program = 'int ' + output_var + ' = ' + str(constant) + ';\n'
|
||||
scalars_part = ' + '.join([str(scalars[i]) + ' * x[gid * ' + str(N) + ' + ' + str(i) + ']' for i in range(0, len(scalars)) if scalars[i] > 0])
|
||||
if len(scalars_part) > 0:
|
||||
program += output_var + ' += ' + scalars_part + ';\n'
|
||||
if not child is None:
|
||||
left_output = output_var + '0'
|
||||
right_output = output_var + '1'
|
||||
(left, right) = child
|
||||
program += create_program_r(left, left_output)
|
||||
program += create_program_r(right, right_output)
|
||||
program += output_var + ' += ' + left_output + ' * ' + right_output + ';\n'
|
||||
program += output_var + ' %= ' + str(M) + ';\n'
|
||||
return program
|
||||
|
||||
def create_program(model, name, offset):
|
||||
output_var = 'output'
|
||||
program = '__global__ void ' + name + '(const int *x, int *out) {\n'
|
||||
program += 'int gid = threadIdx.x + blockIdx.x * blockDim.x;\n'
|
||||
program += create_program_r(model, output_var)
|
||||
program += 'out[' + str(offset) + ' + gid] = ' + output_var + ';\n'
|
||||
program += '}\n'
|
||||
return program
|
||||
|
||||
def distances_program():
|
||||
global N, sample_size
|
||||
program = "__global__ void p(const int *x, double *distances) {\n"
|
||||
program += " int gid = threadIdx.x + blockIdx.x * blockDim.x;\n"
|
||||
program += " int i = gid / " + str(sample_size) + ";\n"
|
||||
program += " int j = gid % " + str(sample_size) + ";\n"
|
||||
program += " if (i == j) {\n"
|
||||
program += " distances[gid] = 0;\n"
|
||||
program += " return;\n"
|
||||
program += " }\n"
|
||||
program += " int distance = 0;\n"
|
||||
program += " for (int k = 0; k < " + str(N) + "; k++) {\n"
|
||||
program += " distance += x[i * " + str(N) + " + k] ^ x[j * " + str(N) + " + k];\n"
|
||||
program += " }\n"
|
||||
program += " distances[gid] = pow((double)2.0, (double)-distance);\n"
|
||||
program += "}\n"
|
||||
return program
|
||||
|
||||
def coherence_program():
|
||||
global sample_size
|
||||
program = "__global__ void p(const int *y, const int *z, const double *distances, double *coherences) {\n"
|
||||
program += " int gid = threadIdx.x + blockIdx.x * blockDim.x;\n"
|
||||
program += " double numerator = 0;\n"
|
||||
program += " double denominator = 0;\n"
|
||||
program += " for (int i = 0; i < " + str(sample_size) + "; i++) {\n"
|
||||
program += " int p = z[i] ^ y[gid * " + str(sample_size) + " + i];\n"
|
||||
program += " for (int j = 0; j < " + str(sample_size) + "; j++) {\n"
|
||||
program += " int q = z[j] ^ y[gid * " + str(sample_size) + " + j];\n"
|
||||
program += " double distance = distances[i * " + str(sample_size) + " + j];\n"
|
||||
program += " denominator += distance;\n"
|
||||
program += " if (p == q) {\n"
|
||||
program += " numerator += distance;\n"
|
||||
program += " }\n"
|
||||
program += " }\n"
|
||||
program += " }\n"
|
||||
program += " coherences[gid] = numerator / denominator;\n"
|
||||
program += "}\n"
|
||||
return program
|
||||
|
||||
def random_sample():
|
||||
global N, sample_size
|
||||
x = np.zeros((N * sample_size,)).astype(np.int32)
|
||||
for i in range(0, len(x)):
|
||||
x[i] = random.randint(0, 1)
|
||||
return x
|
||||
|
||||
def clone_model(model, p_mutation):
|
||||
global N, M
|
||||
|
||||
p_constant = p_mutation * random.random()
|
||||
p_flip = p_mutation * random.random()
|
||||
p_add_child = p_mutation * random.random()
|
||||
p_drop_child = p_mutation * random.random()
|
||||
|
||||
(constant, xors, child) = model
|
||||
if random.random() < p_constant:
|
||||
constant += random.randint(0, M - 1)
|
||||
constant %= M
|
||||
clone_xors = np.zeros((N,))
|
||||
np.copyto(clone_xors, xors)
|
||||
for i in range(0, N):
|
||||
if random.random() < p_flip:
|
||||
offset = 1 if M == 2 else random.randint(1, M - 1)
|
||||
clone_xors[i] += offset
|
||||
clone_xors[i] %= M
|
||||
if child is None:
|
||||
if random.random() < p_add_child:
|
||||
left = random_child(p_mutation)
|
||||
right = random_child(p_mutation)
|
||||
return (constant, clone_xors, (left, right))
|
||||
return (constant, clone_xors, None)
|
||||
if random.random() < p_drop_child:
|
||||
return (constant, clone_xors, None)
|
||||
(left, right) = child
|
||||
clone_left = clone_model(left, p_mutation)
|
||||
clone_right = clone_model(right, p_mutation)
|
||||
return (constant, clone_xors, (clone_left, clone_right))
|
||||
|
||||
def random_child(p_mutation):
|
||||
global N, M
|
||||
constant = random.randint(0, M - 1)
|
||||
xors = np.zeros((N,))
|
||||
|
||||
p_flip = p_mutation * random.random()
|
||||
p_child = p_mutation * random.random()
|
||||
|
||||
index = random.randint(0, N - 1)
|
||||
xors[index] = 1 if M == 2 else random.randint(1, M - 1)
|
||||
for i in range(0, N):
|
||||
if i != index and random.random() < p_flip:
|
||||
xors[i] = 1 if M == 2 else random.randint(1, M - 1)
|
||||
if random.random() < p_child:
|
||||
left = random_child(p_mutation * random.random())
|
||||
right = random_child(p_mutation * random.random())
|
||||
return (constant, xors, (left, right))
|
||||
return (constant, xors, None)
|
||||
|
||||
def null_candidate():
|
||||
global N
|
||||
return (0, np.zeros((N,)), None)
|
||||
|
||||
def main():
|
||||
global N, M, sample_size
|
||||
epochs = 1000
|
||||
num_survivors = 100
|
||||
num_offspring = 10
|
||||
num_candidates = num_survivors + num_survivors * num_offspring
|
||||
block_size = 1
|
||||
|
||||
x = random_sample()
|
||||
z = np.zeros((sample_size,)).astype(np.int32)
|
||||
coherences = np.zeros((num_candidates,)).astype(np.float64)
|
||||
candidates = [null_candidate() for _ in range(0, num_candidates)]
|
||||
|
||||
for i in range(0, sample_size):
|
||||
z[i] = sha(x, N * i)
|
||||
# print(z)
|
||||
|
||||
x_gpu = cuda.mem_alloc(4 * N * sample_size)
|
||||
cuda.memcpy_htod(x_gpu, x)
|
||||
z_gpu = cuda.mem_alloc(4 * sample_size)
|
||||
cuda.memcpy_htod(z_gpu, z)
|
||||
distances_gpu = cuda.mem_alloc(8 * sample_size * sample_size)
|
||||
coherences_gpu = cuda.mem_alloc(8 * num_candidates)
|
||||
outputs_gpu = cuda.mem_alloc(4 * sample_size * num_candidates)
|
||||
|
||||
distances_kernel = SourceModule(distances_program()).get_function('p')
|
||||
coherence_kernel = SourceModule(coherence_program()).get_function('p')
|
||||
|
||||
distances_kernel(x_gpu, distances_gpu, block=(block_size, 1, 1), grid=(int(sample_size * sample_size / block_size), 1, 1))
|
||||
# distances = np.zeros((sample_size,sample_size)).astype(np.double)
|
||||
# cuda.memcpy_dtoh(distances, distances_gpu)
|
||||
# print(distances)
|
||||
|
||||
for epoch in range(0, epochs):
|
||||
mod = SourceModule('\n'.join([create_program(candidates[i], 'k' + str(i), i * sample_size) for i in range(0, num_candidates)]))
|
||||
stream = Stream()
|
||||
for i in range(0, num_candidates):
|
||||
f = mod.get_function('k' + str(i))
|
||||
f(x_gpu, outputs_gpu, stream=stream, block=(block_size, 1, 1), grid=(int(sample_size / block_size), 1, 1))
|
||||
stream.synchronize()
|
||||
|
||||
# outputs = np.zeros((sample_size * num_candidates,)).astype(np.int32)
|
||||
# cuda.memcpy_dtoh(outputs, outputs_gpu)
|
||||
# print(outputs)
|
||||
|
||||
coherence_kernel(outputs_gpu, z_gpu, distances_gpu, coherences_gpu, block=(block_size, 1, 1), grid=(int(num_candidates / block_size), 1, 1))
|
||||
cuda.memcpy_dtoh(coherences, coherences_gpu)
|
||||
|
||||
top_n = sorted(range(len(coherences)), key=lambda i: coherences[i])[-num_survivors:]
|
||||
survivors = [candidates[index] for index in top_n]
|
||||
print(epoch, coherences[top_n[-1]])
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
candidate = survivors[i]
|
||||
candidates[i] = candidate
|
||||
for j in range(0, num_offspring):
|
||||
index = num_survivors + j * num_survivors + i
|
||||
candidates[index] = clone_model(candidate, random.random())
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
207
mutations_gpu.py
Normal file
207
mutations_gpu.py
Normal file
@ -0,0 +1,207 @@
|
||||
import hashlib
|
||||
import numpy as np
|
||||
import math
|
||||
import pyopencl as cl
|
||||
import random
|
||||
|
||||
N = 8
|
||||
M = 2
|
||||
sample_size = 64
|
||||
|
||||
def encode(v, offset):
|
||||
byte_values = []
|
||||
for i in range(0, math.ceil(N / 8)):
|
||||
x = 0
|
||||
for j in range(0, 8):
|
||||
index = i * 8 + j
|
||||
x <<= 1
|
||||
x |= int(v[offset + index])
|
||||
byte_values.append(x)
|
||||
return bytearray(x)
|
||||
|
||||
def sha(v, offset):
|
||||
global M
|
||||
x = encode(v, offset)
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] % M
|
||||
|
||||
def create_program_r(model, output_var):
|
||||
global N, M
|
||||
(constant, scalars, child) = model
|
||||
program = 'int ' + output_var + ' = ' + str(constant) + ';\n'
|
||||
scalars_part = ' + '.join([str(scalars[i]) + ' * x[gid * ' + str(N) + ' + ' + str(i) + ']' for i in range(0, len(scalars)) if scalars[i] > 0])
|
||||
if len(scalars_part) > 0:
|
||||
program += output_var + ' += ' + scalars_part + ';\n'
|
||||
if not child is None:
|
||||
left_output = output_var + '0'
|
||||
right_output = output_var + '1'
|
||||
(left, right) = child
|
||||
program += create_program_r(left, left_output)
|
||||
program += create_program_r(right, right_output)
|
||||
program += output_var + ' += ' + left_output + ' * ' + right_output + ';\n'
|
||||
program += output_var + ' %= ' + str(M) + ';\n'
|
||||
return program
|
||||
|
||||
def create_program(model, name, offset):
|
||||
output_var = 'output'
|
||||
program = '__kernel void ' + name + '(__global const int *x, __global int *out) {\n'
|
||||
program += 'int gid = get_global_id(0);\n'
|
||||
program += create_program_r(model, output_var)
|
||||
program += 'out[' + str(offset) + ' + gid] = ' + output_var + ';\n'
|
||||
program += '}\n'
|
||||
return program
|
||||
|
||||
def distances_program():
|
||||
global N, sample_size
|
||||
program = "__kernel void p(__global const int *x, __global float *distances) {\n"
|
||||
program += " int gid = get_global_id(0);\n"
|
||||
program += " int i = gid / " + str(sample_size) + ";\n"
|
||||
program += " int j = gid % " + str(sample_size) + ";\n"
|
||||
program += " float distance = 0;\n"
|
||||
program += " if (i == j) {\n"
|
||||
program += " distances[gid] = distance;\n"
|
||||
program += " return;\n"
|
||||
program += " }\n"
|
||||
program += " for (int k = 0; k < " + str(N) + "; k++) {\n"
|
||||
program += " distance += x[i * " + str(N) + " + k] ^ x[j * " + str(N) + " + k];\n"
|
||||
program += " }\n"
|
||||
program += " distances[gid] = pow(2, -distance);\n"
|
||||
program += "}\n"
|
||||
return program
|
||||
|
||||
def coherence_program():
|
||||
global sample_size
|
||||
program = "__kernel void p(__global const int *y, __global const int *z, __global const float *distances, __global float *coherences) {\n"
|
||||
program += " int gid = get_global_id(0);\n"
|
||||
program += " float numerator = 0;\n"
|
||||
program += " float denominator = 0;\n"
|
||||
program += " for (int i = 0; i < " + str(sample_size) + "; i++) {\n"
|
||||
program += " int p = z[i] ^ y[gid * " + str(sample_size) + " + i];\n"
|
||||
program += " for (int j = 0; j < " + str(sample_size) + "; j++) {\n"
|
||||
program += " int q = z[j] ^ y[gid * " + str(sample_size) + " + j];\n"
|
||||
program += " float distance = distances[i * " + str(sample_size) + " + j];\n"
|
||||
program += " denominator += distance;\n"
|
||||
program += " if (p == q) {\n"
|
||||
program += " numerator += distance;\n"
|
||||
program += " }\n"
|
||||
program += " }\n"
|
||||
program += " }\n"
|
||||
program += " coherences[gid] = numerator / denominator;\n"
|
||||
program += "}\n"
|
||||
return program
|
||||
|
||||
def random_sample():
|
||||
global N, sample_size
|
||||
x = np.zeros((N * sample_size,)).astype(np.int32)
|
||||
for i in range(0, len(x)):
|
||||
x[i] = random.randint(0, 1)
|
||||
return x
|
||||
|
||||
def clone_model(model, p_mutation):
|
||||
global N, M
|
||||
|
||||
p_constant = p_mutation * random.random()
|
||||
p_flip = p_mutation * random.random()
|
||||
p_add_child = p_mutation * random.random()
|
||||
p_drop_child = p_mutation * random.random()
|
||||
|
||||
(constant, xors, child) = model
|
||||
if random.random() < p_constant:
|
||||
constant += random.randint(0, M - 1)
|
||||
constant %= M
|
||||
clone_xors = np.zeros((N,))
|
||||
np.copyto(clone_xors, xors)
|
||||
for i in range(0, N):
|
||||
if random.random() < p_flip:
|
||||
offset = 1 if M == 2 else random.randint(1, M - 1)
|
||||
clone_xors[i] += offset
|
||||
clone_xors[i] %= M
|
||||
if child is None:
|
||||
if random.random() < p_add_child:
|
||||
left = random_child(p_mutation)
|
||||
right = random_child(p_mutation)
|
||||
return (constant, clone_xors, (left, right))
|
||||
return (constant, clone_xors, None)
|
||||
if random.random() < p_drop_child:
|
||||
return (constant, clone_xors, None)
|
||||
(left, right) = child
|
||||
clone_left = clone_model(left, p_mutation)
|
||||
clone_right = clone_model(right, p_mutation)
|
||||
return (constant, clone_xors, (clone_left, clone_right))
|
||||
|
||||
def random_child(p_mutation):
|
||||
global N, M
|
||||
constant = random.randint(0, M - 1)
|
||||
xors = np.zeros((N,))
|
||||
|
||||
p_flip = p_mutation * random.random()
|
||||
p_child = p_mutation * random.random()
|
||||
|
||||
index = random.randint(0, N - 1)
|
||||
xors[index] = 1 if M == 2 else random.randint(1, M - 1)
|
||||
for i in range(0, N):
|
||||
if i != index and random.random() < p_flip:
|
||||
xors[i] = 1 if M == 2 else random.randint(1, M - 1)
|
||||
if random.random() < p_child:
|
||||
left = random_child(p_mutation * random.random())
|
||||
right = random_child(p_mutation * random.random())
|
||||
return (constant, xors, (left, right))
|
||||
return (constant, xors, None)
|
||||
|
||||
def null_candidate():
|
||||
global N
|
||||
return (0, np.zeros((N,)), None)
|
||||
|
||||
def main():
|
||||
global N, M, sample_size
|
||||
epochs = 1000
|
||||
num_survivors = 100
|
||||
num_offspring = 10
|
||||
num_candidates = num_survivors + num_survivors * num_offspring
|
||||
local_work_size = (512,)
|
||||
|
||||
x = random_sample()
|
||||
z = np.zeros((sample_size,)).astype(np.int32)
|
||||
coherences = np.zeros((num_candidates,)).astype(np.float32)
|
||||
ctx = cl.create_some_context()
|
||||
queue = cl.CommandQueue(ctx)
|
||||
mf = cl.mem_flags
|
||||
candidates = [null_candidate() for _ in range(0, num_candidates)]
|
||||
|
||||
for i in range(0, sample_size):
|
||||
z[i] = sha(x, N * i)
|
||||
|
||||
x_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=x)
|
||||
z_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=z)
|
||||
distances_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, 4 * sample_size * sample_size)
|
||||
coherences_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, 4 * num_candidates)
|
||||
outputs_gpu = cl.Buffer(ctx, mf.WRITE_ONLY, 4 * sample_size * num_candidates)
|
||||
|
||||
distances_kernel = cl.Program(ctx, distances_program()).build().p
|
||||
coherence_kernel = cl.Program(ctx, coherence_program()).build().p
|
||||
|
||||
distances_kernel(queue, (sample_size * sample_size,), local_work_size, x_gpu, distances_gpu)
|
||||
|
||||
for epoch in range(0, epochs):
|
||||
program = cl.Program(ctx, '\n'.join([create_program(candidates[i], 'k' + '{:0>9}'.format(i), i * sample_size) for i in range(0, num_candidates)])).build()
|
||||
for knl in program.all_kernels():
|
||||
knl(queue, (sample_size,), local_work_size, x_gpu, outputs_gpu)
|
||||
|
||||
coherence_kernel(queue, (num_candidates,), local_work_size, outputs_gpu, z_gpu, distances_gpu, coherences_gpu)
|
||||
cl.enqueue_copy(queue, coherences, coherences_gpu)
|
||||
|
||||
top_n = sorted(range(len(coherences)), key=lambda i: coherences[i])[-num_survivors:]
|
||||
survivors = [candidates[index] for index in top_n]
|
||||
print(epoch, coherences[top_n[-1]])
|
||||
|
||||
for i in range(0, num_survivors):
|
||||
candidate = survivors[i]
|
||||
candidates[i] = candidate
|
||||
for j in range(0, num_offspring):
|
||||
index = num_survivors + j * num_survivors + i
|
||||
candidates[index] = clone_model(candidate, random.random())
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
5
mutations_opencl.py
Normal file
5
mutations_opencl.py
Normal file
@ -0,0 +1,5 @@
|
||||
def main():
|
||||
print('test')
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
29
shifts.py
Normal file
29
shifts.py
Normal file
@ -0,0 +1,29 @@
|
||||
def remove_bit(i, n):
|
||||
return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)
|
||||
|
||||
def main():
|
||||
N = 65
|
||||
mappings = {}
|
||||
for i in range(0, N):
|
||||
n = 0
|
||||
g = remove_bit(i, n)
|
||||
paths_set = set()
|
||||
while g < i:
|
||||
paths_set.add(g)
|
||||
n += 1
|
||||
g = remove_bit(i, n)
|
||||
paths = sorted(list(paths_set))
|
||||
mappings[i] = paths
|
||||
|
||||
visited_set = set()
|
||||
stack = [paths[:]]
|
||||
while len(stack) > 0:
|
||||
for h in stack.pop():
|
||||
if not h in visited_set:
|
||||
visited_set.add(h)
|
||||
stack.append(mappings[h])
|
||||
visited = sorted(list(visited_set))
|
||||
print(i, len(visited))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
142
space_analysis.py
Normal file
142
space_analysis.py
Normal file
@ -0,0 +1,142 @@
|
||||
import numpy as np
|
||||
|
||||
def decode(x, N):
|
||||
index = 0
|
||||
output = np.zeros((N))
|
||||
while x > 0 and index < N:
|
||||
output[index] = x & 0b1
|
||||
x >>= 1
|
||||
index += 1
|
||||
return output
|
||||
|
||||
def hamming_distance(a, b):
|
||||
return np.sum(np.logical_xor(a, b))
|
||||
|
||||
def xor(x, bits):
|
||||
return np.sum(x[:bits]) % 2
|
||||
|
||||
# 2
|
||||
# 4, 4,
|
||||
# 6, 8, 6
|
||||
# 8, 12, 12, 8
|
||||
# 10, 16, 18, 16, 10
|
||||
# 12, 20, 24, 24, 20, 12
|
||||
# 14, 24, 30, 32, 30, 24, 14
|
||||
# 16, 28, 36, 40, 40, 36, 28, 16
|
||||
|
||||
# 1
|
||||
# 2, 2
|
||||
# 3, 4, 3
|
||||
# 4, 6, 6, 4
|
||||
# 5, 8, 9, 8, 5
|
||||
# 6, 10, 12, 12, 10, 6
|
||||
# 7, 12, 15, 16, 15, 12, 7
|
||||
|
||||
# 6, 0, 6
|
||||
# 24, 12, 12, 24
|
||||
# 60, 48, 36, 48, 60
|
||||
# 120, 120, 96, 96, 120, 120
|
||||
# 210, 240, 210, 192, 210, 240, 210
|
||||
# 336, 420, 396, 360, 360, 396, 420, 336
|
||||
# 504, 672, 672, 624, 600, 624, 672, 672, 504
|
||||
|
||||
|
||||
# 1, 0, 1
|
||||
# 4, 2, 2, 4
|
||||
# 10, 8, 6, 8, 10
|
||||
# 20, 20, 16, 16, 20, 20
|
||||
# 35, 40, 35, 32, 35, 40, 35
|
||||
# 56, 70, 66, 60, 60, 66, 70, 56
|
||||
# 84, 112, 112, 104, 100, 104, 112, 112, 84
|
||||
|
||||
#
|
||||
# 20, 0, 20, 0, 20,
|
||||
# 120, 40, 80, 80, 40, 120
|
||||
# 420, 240, 260, 320, 260, 240, 420
|
||||
# 1120, 840, 760, 880, 880, 760, 840, 1120
|
||||
|
||||
# 1, 0, 1, 0, 1
|
||||
# 6, 2, 4, 4, 2, 6
|
||||
# 21, 12, 13, 16, 13, 12, 21
|
||||
# 56, 42, 38, 44, 44, 38, 42, 56
|
||||
|
||||
# 70, 0, 70, 0, 70, 0, 70
|
||||
# 560, 140, 420, 280, 280, 420, 140, 560
|
||||
|
||||
# 252, 0, 252, 0, 252, 0, 252, 0, 252
|
||||
# 2520, 504, 2016, 1008, 1512, 1512, 1008, 2016, 504, 2520
|
||||
|
||||
# 1, 2, 3, 4,
|
||||
# 1, 3, 6, 10
|
||||
# 1, 4, 10, 20
|
||||
# 1, 5, 15, 35
|
||||
# 1, 6,
|
||||
|
||||
# 1, 2, 1
|
||||
# 1, 3, 3, 1
|
||||
# 1, 4, 6, 4, 1
|
||||
# 1, 5, 10, 10, 5, 1
|
||||
# 1, 6, 15, 20, 15, 6, 1
|
||||
|
||||
# 2, 6, 12, 20, 30, 42, 56
|
||||
# 6, 30, 90, 210, 420
|
||||
# 20, 140, 560,
|
||||
# 70
|
||||
|
||||
# 1, 3, 6, 10, 15, 21, 28
|
||||
# 1, 5, 15, 35
|
||||
|
||||
def main():
|
||||
N = 8
|
||||
points = []
|
||||
for i in range(0, 2 ** N):
|
||||
points.append(decode(i, N))
|
||||
|
||||
bands = [[[] for _ in range(0, N + 1)] for _ in range(0, len(points))]
|
||||
for i in range(0, len(points)):
|
||||
a = points[i]
|
||||
for j in range(0, len(points)):
|
||||
if i == j:
|
||||
continue
|
||||
b = points[j]
|
||||
distance = hamming_distance(a, b)
|
||||
bands[i][distance].append(b)
|
||||
|
||||
incoherent_distances = np.zeros((N + 1, N + 1))
|
||||
for k in range(0, N + 1):
|
||||
print(k, '================================')
|
||||
for t in range(0, 1):
|
||||
x_a = points[t]
|
||||
y_a = xor(x_a, k)
|
||||
incoherent_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
total_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
for distance in range(0, N + 1):
|
||||
band = bands[0][distance]
|
||||
for x_b in band:
|
||||
y_b = xor(x_b, k)
|
||||
if y_a != y_b:
|
||||
incoherent_distances[k][distance] += 1
|
||||
|
||||
if len(band) < 2:
|
||||
continue
|
||||
for band_origin in range(0, len(band)):
|
||||
x_p = band[band_origin]
|
||||
y_p = xor(x_p, k)
|
||||
for i in range(0, len(band)):
|
||||
if i == band_origin:
|
||||
continue
|
||||
x_q = band[i]
|
||||
y_q = xor(x_q, k)
|
||||
band_distance = hamming_distance(x_p, x_q)
|
||||
total_bands[distance][band_distance] += 1
|
||||
if y_p != y_q:
|
||||
incoherent_bands[distance][band_distance] += 1
|
||||
print(incoherent_bands)
|
||||
print(total_bands)
|
||||
# print(distance, hamming_distance(x_p, x_q), y_p, y_q)
|
||||
|
||||
print(incoherent_distances)
|
||||
# print(bands)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
255
space_analysis2.py
Normal file
255
space_analysis2.py
Normal file
@ -0,0 +1,255 @@
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
def decode(x, N):
|
||||
index = 0
|
||||
output = np.zeros((N))
|
||||
while x > 0 and index < N:
|
||||
output[index] = x & 0b1
|
||||
x >>= 1
|
||||
index += 1
|
||||
return output
|
||||
|
||||
def hamming_distance(a, b):
|
||||
return np.sum(np.logical_xor(a, b))
|
||||
|
||||
def xor(x, bits):
|
||||
return np.sum(x[:bits]) % 2
|
||||
|
||||
def compute_pyramids(N):
|
||||
num_orders = max(int(N / 2), 1)
|
||||
pyramids = np.zeros((num_orders, N, N)).astype(np.int32)
|
||||
for i in range(2, N):
|
||||
for j in range(1, i):
|
||||
pyramids[0][i][j] = j
|
||||
for order in range(1, num_orders):
|
||||
# build out the first column
|
||||
acc = 0
|
||||
for i in range(order * 2 + 2, N):
|
||||
acc += pyramids[order - 1][i - 2][1]
|
||||
pyramids[order][i][1] = acc
|
||||
# accumulate the first column and place it on the diagonal(s)
|
||||
for k in range(0, int(order / 2) + 1):
|
||||
acc = 0
|
||||
for i in range(order * 2 + 2, N):
|
||||
acc += pyramids[order][i][1]
|
||||
pyramids[order][i][i - 1 - 2 * k] = acc
|
||||
# for odd, copy the first column to the first diagonal
|
||||
if order % 2 == 1:
|
||||
k += 1
|
||||
for i in range(order * 2 + 2, N):
|
||||
pyramids[order][i][i - 1 - 2 * k] = pyramids[order][i][1]
|
||||
# integrate under the diagonal
|
||||
inset = 1
|
||||
for j in reversed(range(2, N - 2 * k - 2)):
|
||||
acc = pyramids[order][N - inset - 1][j]
|
||||
for i in range(N - inset, N):
|
||||
acc += pyramids[order - 1][i - 2][j]
|
||||
pyramids[order][i][j] = acc
|
||||
if order * 2 + 2 < N - inset:
|
||||
inset += 1
|
||||
return pyramids
|
||||
|
||||
def compute_pyramids_full(N):
|
||||
num_orders = max(int(N / 2), 1)
|
||||
pyramids = np.zeros((num_orders, N, N)).astype(np.int32)
|
||||
# 1st order can be filled in as multiplication and forms the base case
|
||||
for i in range(0, N):
|
||||
for j in range(0, i + 1):
|
||||
pyramids[0][i][j] = (i - j + 1) * (j + 1)
|
||||
for order in range(1, num_orders):
|
||||
offset = order * 2
|
||||
|
||||
# fill in the LHS and diagonal
|
||||
for i in range(0, N - offset):
|
||||
value = math.comb(2 * (order + 1) + i - 1, i)
|
||||
pyramids[order][i + offset][0] = value
|
||||
# mirror
|
||||
pyramids[order][i + offset][i + offset] = value
|
||||
|
||||
# accumulate along the diagonals
|
||||
for i in range(1, N):
|
||||
value = pyramids[order][i][0]
|
||||
acc = value
|
||||
for j in range(1, N - i):
|
||||
value += acc
|
||||
pyramids[order][i + j][j] = value
|
||||
acc += pyramids[order - 1][i + j - 1][j - 1]
|
||||
|
||||
return pyramids
|
||||
|
||||
def get_total_band_count_2(distance, band_distance, N):
|
||||
if band_distance % 2 == 1:
|
||||
return 0
|
||||
order = int(band_distance / 2) - 1
|
||||
if order < 0:
|
||||
return 0
|
||||
if distance < order + 1:
|
||||
return 0
|
||||
if distance > N - order - 1:
|
||||
return 0
|
||||
order_root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2
|
||||
scale = math.comb(N - (order + 1) * 2, distance - order - 1)
|
||||
value = math.comb(2 * (order + 1) + N - 2 * (order + 1), N - 2 * (order + 1))
|
||||
return order_root * scale * value
|
||||
|
||||
def get_incoherent_band_count_2(pyramids, distance, band_distance, k, N):
|
||||
if k == 0 or k == N or band_distance % 2 == 1:
|
||||
return 0
|
||||
order = int(band_distance / 2) - 1
|
||||
if order < 0:
|
||||
return 0
|
||||
if distance < order + 1:
|
||||
return 0
|
||||
if distance > N - order - 1:
|
||||
return 0
|
||||
order_root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2
|
||||
scale = math.comb(N - (order + 1) * 2, distance - order - 1)
|
||||
value = pyramids[order][N - 2][k - 1]
|
||||
return order_root * scale * value
|
||||
|
||||
# pyramid = pyramids[order]
|
||||
# offset = (N - 1 - order) - distance
|
||||
# multiplier = pyramid[2 * order + offset][2 * order + 1 + offset]
|
||||
# row = N - offset
|
||||
# column = k
|
||||
# value = pyramid[row][column]
|
||||
# return multiplier * value
|
||||
|
||||
def get_incoherent_band_count(pyramids, distance, band_distance, k, N):
|
||||
if k == 0 or k == N or band_distance % 2 == 1:
|
||||
return 0
|
||||
order = int(band_distance / 2) - 1
|
||||
if order < 0:
|
||||
return 0
|
||||
if distance < order + 1:
|
||||
return 0
|
||||
if distance > N - order - 1:
|
||||
return 0
|
||||
if distance < k:
|
||||
distance = N - distance
|
||||
k = N - k
|
||||
pyramid = pyramids[order]
|
||||
offset = (N - 1 - order) - distance
|
||||
multiplier = pyramid[2 * order + 2 + offset][2 * order + 1 + offset]
|
||||
row = N - offset
|
||||
column = k
|
||||
value = pyramid[row][column]
|
||||
return multiplier * value
|
||||
|
||||
def get_total_band_count(pyramids, distance, band_distance, N):
|
||||
if band_distance % 2 == 1:
|
||||
return 0
|
||||
order = int(band_distance / 2) - 1
|
||||
if order < 0:
|
||||
return 0
|
||||
if distance < order + 1:
|
||||
return 0
|
||||
if distance > N - order - 1:
|
||||
return 0
|
||||
pyramid = pyramids[order]
|
||||
offset = (N - 1 - order) - distance
|
||||
length = N + 1 - 2 * (order + 1)
|
||||
a = pyramid[2 * order + 2 + offset][2 * order + 1 + offset]
|
||||
b = pyramid[2 * order + 2 + (length - offset - 1)][2 * order + 1 + (length - offset - 1)]
|
||||
return a * b
|
||||
|
||||
# def compute_band_distances(pyramids, N):
|
||||
# num_orders = max(int(N / 2), 1)
|
||||
# incoherent_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32)
|
||||
# for order in range(0, num_orders):
|
||||
# band_distance = (order + 1) * 2
|
||||
# for k in range(1, N):
|
||||
|
||||
|
||||
# for k in range(0, N + 1):
|
||||
# for distance in range()
|
||||
|
||||
def main():
|
||||
# N = 8
|
||||
# print(compute_pyramids_full(N))
|
||||
# total_distances = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
# for i in range(0, N + 1):
|
||||
# for j in range(0, N + 1):
|
||||
# total_distances[i][j] = get_total_band_count_2(i, j, N)
|
||||
# print(total_distances)
|
||||
# return
|
||||
|
||||
max_N = 8
|
||||
orders = [np.zeros((max_N + 1, max_N + 1)).astype(np.int32) for _ in range(0, max_N)]
|
||||
|
||||
print('Attempting discrete solution...')
|
||||
pyramids = compute_pyramids_full(max_N + 1)
|
||||
|
||||
for N in range(max_N, max_N + 1):
|
||||
# for N in range(2, max_N + 1):
|
||||
print('=============================')
|
||||
print('N@', N)
|
||||
print('Generating points...')
|
||||
points = []
|
||||
for i in range(0, 2 ** N):
|
||||
points.append(decode(i, N))
|
||||
|
||||
print('Computing bands...')
|
||||
bands = [[] for _ in range(0, N + 1)]
|
||||
for i in range(1, len(points)):
|
||||
distance = hamming_distance(points[0], points[i])
|
||||
bands[distance].append(points[i])
|
||||
|
||||
print('Computing band distances...')
|
||||
incoherent_distances = np.zeros((N + 1, N + 1))
|
||||
for k in range(0, N + 1):
|
||||
print('k@', k)
|
||||
# print(k, '================================')
|
||||
x_a = points[0]
|
||||
y_a = xor(x_a, k)
|
||||
incoherent_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
precomputed_incoherent_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
total_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
precomputed_total_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
for distance in range(0, N + 1):
|
||||
band = bands[distance]
|
||||
for x_b in band:
|
||||
y_b = xor(x_b, k)
|
||||
if y_a != y_b:
|
||||
incoherent_distances[k][distance] += 1
|
||||
|
||||
if len(band) < 2:
|
||||
continue
|
||||
for band_origin in range(0, len(band)):
|
||||
x_p = band[band_origin]
|
||||
# print(x_p)
|
||||
y_p = xor(x_p, k)
|
||||
for i in range(0, len(band)):
|
||||
if i == band_origin:
|
||||
continue
|
||||
x_q = band[i]
|
||||
y_q = xor(x_q, k)
|
||||
band_distance = hamming_distance(x_p, x_q)
|
||||
total_bands[distance][band_distance] += 1
|
||||
if y_p != y_q:
|
||||
incoherent_bands[distance][band_distance] += 1
|
||||
for band_distance in range(0, N + 1):
|
||||
precomputed_incoherent_bands[distance][band_distance] = get_incoherent_band_count_2(pyramids, distance, band_distance, k, N)
|
||||
precomputed_total_bands[distance][band_distance] = get_total_band_count_2(distance, band_distance, N)
|
||||
# print(incoherent_bands)
|
||||
for order in range(0, int(N / 2)):
|
||||
root = math.factorial(2 * (order + 1)) / math.factorial(order + 1) ** 2
|
||||
index = order * 2 + 2
|
||||
orders[order][N][k] = incoherent_bands[-2 - order][index] / root
|
||||
|
||||
print(incoherent_bands)
|
||||
print(precomputed_incoherent_bands)
|
||||
print(total_bands)
|
||||
print(precomputed_total_bands)
|
||||
# print(total_bands)
|
||||
# print(distance, hamming_distance(x_p, x_q), y_p, y_q)
|
||||
# for i in range(0, len(orders)):
|
||||
# print(orders[i])
|
||||
# # print(pyramids[i])
|
||||
# print('========================================')
|
||||
# print(incoherent_distances)
|
||||
# print(bands)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
385
space_analysis3.py
Normal file
385
space_analysis3.py
Normal file
@ -0,0 +1,385 @@
|
||||
import math
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
np.set_printoptions(threshold=sys.maxsize)
|
||||
|
||||
cache = {}
|
||||
def p_bernoulli(n, k, m, j):
|
||||
key = (n, k, m, j)
|
||||
if key in cache:
|
||||
return cache[key]
|
||||
probabilities = np.zeros((n + 1, n + 1))
|
||||
probabilities.fill(-1)
|
||||
stack = [(0,0)]
|
||||
while len(stack) > 0:
|
||||
(a, b) = stack.pop()
|
||||
if a + b == n:
|
||||
probabilities[a][b] = 1 if a == k else 0
|
||||
elif a > j:
|
||||
probabilities[a][b] = 0
|
||||
elif b > (m - j):
|
||||
probabilities[a][b] = 0
|
||||
else:
|
||||
p_left = probabilities[a + 1][b]
|
||||
p_right = probabilities[a][b + 1]
|
||||
if p_left >= 0 and p_right >= 0:
|
||||
p = (j - a) / (m - a - b)
|
||||
probabilities[a][b] = p_left * p + p_right * (1 - p)
|
||||
else:
|
||||
stack.append((a, b))
|
||||
if p_left < 0:
|
||||
stack.append((a + 1, b))
|
||||
if p_right < 0:
|
||||
stack.append((a, b + 1))
|
||||
# if len(cache) % 100 == 0:
|
||||
# print('Cache size: ', len(cache), math.floor(10000 * hits / (hits + misses)) / 100, '%')
|
||||
cache[key] = probabilities[0][0]
|
||||
return probabilities[0][0]
|
||||
|
||||
def decode(x, N):
|
||||
index = 0
|
||||
output = np.zeros((N))
|
||||
while x > 0 and index < N:
|
||||
output[index] = x & 0b1
|
||||
x >>= 1
|
||||
index += 1
|
||||
return output
|
||||
|
||||
def hamming_distance(a, b):
|
||||
return np.sum(np.logical_xor(a, b))
|
||||
|
||||
def xor(x, bits):
|
||||
return np.sum(x[:bits]) % 2
|
||||
|
||||
def compute_pseudopascal(N):
|
||||
dist = np.zeros((N, N))
|
||||
for j in range(0, N):
|
||||
dist[0][j] = math.comb(N - 1, j)
|
||||
dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
|
||||
for i in range(1, N):
|
||||
for j in range(0, i + 1):
|
||||
dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
|
||||
for k in range(i + 1, N):
|
||||
for j in reversed(range(0, k)):
|
||||
dist[i][j+1] = dist[i][j] + dist[i][j+1]
|
||||
return dist
|
||||
|
||||
def compute_pyramids(N):
|
||||
num_orders = max(int(N / 2), 1)
|
||||
pyramids = np.zeros((num_orders, N, N)).astype(np.int32)
|
||||
# 1st order can be filled in as multiplication and forms the base case
|
||||
for i in range(0, N):
|
||||
for j in range(0, i + 1):
|
||||
pyramids[0][i][j] = (i - j + 1) * (j + 1)
|
||||
for order in range(1, num_orders):
|
||||
offset = order * 2
|
||||
|
||||
# fill in the LHS and diagonal
|
||||
for i in range(0, N - offset):
|
||||
value = math.comb(2 * (order + 1) + i - 1, i)
|
||||
pyramids[order][i + offset][0] = value
|
||||
# mirror
|
||||
pyramids[order][i + offset][i + offset] = value
|
||||
|
||||
# accumulate along the diagonals
|
||||
for i in range(1, N):
|
||||
value = pyramids[order][i][0]
|
||||
acc = value
|
||||
for j in range(1, N - i):
|
||||
value += acc
|
||||
pyramids[order][i + j][j] = value
|
||||
acc += pyramids[order - 1][i + j - 1][j - 1]
|
||||
|
||||
return pyramids
|
||||
|
||||
def compute_string_key(key):
|
||||
return ','.join([str(x) for x in key])
|
||||
|
||||
def generate_bands(points):
|
||||
all_bands = [{} for _ in range(0, len(points))]
|
||||
for origin_index in range(0, len(points)):
|
||||
bands = all_bands[origin_index]
|
||||
key = []
|
||||
group = [index for index in range(0, len(points)) if index != origin_index]
|
||||
stack = [(origin_index, key, group)]
|
||||
while len(stack) > 0:
|
||||
(origin_index, key, group) = stack.pop()
|
||||
distance = hamming_distance(points[origin_index], points[group[0]])
|
||||
in_band = []
|
||||
out_of_band = []
|
||||
for index in group:
|
||||
if distance == hamming_distance(points[origin_index], points[index]):
|
||||
in_band.append(index)
|
||||
else:
|
||||
out_of_band.append(index)
|
||||
if len(out_of_band) > 0:
|
||||
stack.append((origin_index, key, out_of_band))
|
||||
key = key[:]
|
||||
key.append(distance)
|
||||
string_key = compute_string_key(key)
|
||||
if string_key not in bands:
|
||||
bands[string_key] = 0
|
||||
bands[string_key] += len(in_band)
|
||||
if len(in_band) < 2:
|
||||
continue
|
||||
for origin_index in in_band:
|
||||
group = [index for index in in_band if index != origin_index]
|
||||
stack.append((origin_index, key, group))
|
||||
return all_bands
|
||||
|
||||
def test():
|
||||
N = 8
|
||||
points = [decode(x, N) for x in range(0, 2 ** N)]
|
||||
print(generate_bands(points)[0])
|
||||
|
||||
|
||||
# 2
|
||||
# 4, 4,
|
||||
# 6, 8, 6
|
||||
# 8, 12, 12, 8
|
||||
# 10, 16, 18, 16, 10
|
||||
# 12, 20, 24, 24, 20, 12
|
||||
# 14, 24, 30, 32, 30, 24, 14
|
||||
# 16, 28, 36, 40, 40, 36, 28, 16
|
||||
|
||||
# 1
|
||||
# 2, 2
|
||||
# 3, 4, 3
|
||||
# 4, 6, 6, 4
|
||||
# 5, 8, 9, 8, 5
|
||||
# 6, 10, 12, 12, 10, 6
|
||||
# 7, 12, 15, 16, 15, 12, 7
|
||||
|
||||
# 6, 0, 6
|
||||
# 24, 12, 12, 24
|
||||
# 60, 48, 36, 48, 60
|
||||
# 120, 120, 96, 96, 120, 120
|
||||
# 210, 240, 210, 192, 210, 240, 210
|
||||
# 336, 420, 396, 360, 360, 396, 420, 336
|
||||
# 504, 672, 672, 624, 600, 624, 672, 672, 504
|
||||
|
||||
|
||||
# 1, 0, 1
|
||||
# 4, 2, 2, 4
|
||||
# 10, 8, 6, 8, 10
|
||||
# 20, 20, 16, 16, 20, 20
|
||||
# 35, 40, 35, 32, 35, 40, 35
|
||||
# 56, 70, 66, 60, 60, 66, 70, 56
|
||||
# 84, 112, 112, 104, 100, 104, 112, 112, 84
|
||||
|
||||
#
|
||||
# 20, 0, 20, 0, 20,
|
||||
# 120, 40, 80, 80, 40, 120
|
||||
# 420, 240, 260, 320, 260, 240, 420
|
||||
# 1120, 840, 760, 880, 880, 760, 840, 1120
|
||||
|
||||
# 1, 0, 1, 0, 1
|
||||
# 6, 2, 4, 4, 2, 6
|
||||
# 21, 12, 13, 16, 13, 12, 21
|
||||
# 56, 42, 38, 44, 44, 38, 42, 56
|
||||
|
||||
# 70, 0, 70, 0, 70, 0, 70
|
||||
# 560, 140, 420, 280, 280, 420, 140, 560
|
||||
|
||||
# 252, 0, 252, 0, 252, 0, 252, 0, 252
|
||||
# 2520, 504, 2016, 1008, 1512, 1512, 1008, 2016, 504, 2520
|
||||
|
||||
# 1, 2, 3, 4,
|
||||
# 1, 3, 6, 10
|
||||
# 1, 4, 10, 20
|
||||
# 1, 5, 15, 35
|
||||
# 1, 6,
|
||||
|
||||
# 1, 2, 1
|
||||
# 1, 3, 3, 1
|
||||
# 1, 4, 6, 4, 1
|
||||
# 1, 5, 10, 10, 5, 1
|
||||
# 1, 6, 15, 20, 15, 6, 1
|
||||
|
||||
# 2, 6, 12, 20, 30, 42, 56
|
||||
# 6, 30, 90, 210, 420
|
||||
# 20, 140, 560,
|
||||
# 70
|
||||
|
||||
# 1, 3, 6, 10, 15, 21, 28
|
||||
# 1, 5, 15, 35
|
||||
|
||||
def main():
|
||||
test()
|
||||
return
|
||||
|
||||
N = 5
|
||||
|
||||
# print(compute_pseudopascal(10))
|
||||
# print(compute_pyramids(10))
|
||||
|
||||
points = []
|
||||
for i in range(0, 2 ** N):
|
||||
points.append(decode(i, N))
|
||||
|
||||
bands = [[[] for _ in range(0, N + 1)] for _ in range(0, len(points))]
|
||||
for i in range(0, len(points)):
|
||||
a = points[i]
|
||||
for j in range(0, len(points)):
|
||||
if i == j:
|
||||
continue
|
||||
b = points[j]
|
||||
distance = hamming_distance(a, b)
|
||||
bands[i][distance].append(b)
|
||||
|
||||
golden_incoherent_distances = None
|
||||
golden_total_distances = None
|
||||
golden_incoherent_bands = None
|
||||
golden_total_bands = None
|
||||
golden_incoherent_sub_bands = None
|
||||
golden_total_sub_bands = None
|
||||
# for t in range(0, len(points)):
|
||||
for t in range(0, 1):
|
||||
incoherent_distances = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
total_distances = np.zeros((N + 1)).astype(np.int32)
|
||||
if t == 0:
|
||||
golden_incoherent_distances = incoherent_distances
|
||||
golden_total_distances = total_distances
|
||||
incoherent_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32)
|
||||
total_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
if t == 0:
|
||||
golden_incoherent_bands = incoherent_bands
|
||||
golden_total_bands = total_bands
|
||||
incoherent_sub_bands = np.zeros((N + 1, N + 1, N + 1, N + 1)).astype(np.int32)
|
||||
total_sub_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32)
|
||||
if t == 0:
|
||||
golden_incoherent_sub_bands = incoherent_sub_bands
|
||||
golden_total_sub_bands = total_sub_bands
|
||||
# print(t)
|
||||
for k in range(1, N + 1):
|
||||
# print(k, '================================')
|
||||
x_a = points[t]
|
||||
y_a = xor(x_a, k)
|
||||
for distance in range(0, N + 1):
|
||||
# print('distance', distance)
|
||||
band = bands[t][distance]
|
||||
for x_b in band:
|
||||
y_b = xor(x_b, k)
|
||||
if k == 1:
|
||||
total_distances[distance] += 1
|
||||
if y_a != y_b:
|
||||
incoherent_distances[k][distance] += 1
|
||||
|
||||
if len(band) < 2:
|
||||
continue
|
||||
for band_origin in range(0, len(band)):
|
||||
x_p = band[band_origin]
|
||||
y_p = xor(x_p, k)
|
||||
sub_bands = [[] for _ in range(0, N + 1)]
|
||||
for i in range(0, len(band)):
|
||||
if i == band_origin:
|
||||
continue
|
||||
x_q = band[i]
|
||||
y_q = xor(x_q, k)
|
||||
band_distance = hamming_distance(x_p, x_q)
|
||||
if k == 1:
|
||||
total_bands[distance][band_distance] += 1
|
||||
if y_p != y_q:
|
||||
incoherent_bands[k][distance][band_distance] += 1
|
||||
sub_bands[band_distance].append(x_q)
|
||||
|
||||
# incoherent_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
# total_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
for band_distance in range(0, N + 1):
|
||||
sub_band = sub_bands[band_distance]
|
||||
if len(sub_band) < 2:
|
||||
continue
|
||||
for sub_band_origin in range(0, len(sub_band)):
|
||||
x_u = sub_band[sub_band_origin]
|
||||
y_u = xor(x_u, k)
|
||||
for i in range(0, len(sub_band)):
|
||||
if i == sub_band_origin:
|
||||
continue
|
||||
x_v = sub_band[i]
|
||||
y_v = xor(x_v, k)
|
||||
sub_band_distance = hamming_distance(x_v, x_u)
|
||||
if k == 1:
|
||||
total_sub_bands[band_distance][sub_band_distance] += 1
|
||||
if y_u != y_v:
|
||||
incoherent_sub_bands[k][distance][band_distance][sub_band_distance] += 1
|
||||
# print(incoherent_sub_bands)
|
||||
# print(total_sub_bands)
|
||||
# print('==========================')
|
||||
if t != 0:
|
||||
if not np.array_equal(golden_incoherent_sub_bands, incoherent_sub_bands):
|
||||
print(golden_incoherent_sub_bands)
|
||||
print(incoherent_sub_bands)
|
||||
raise Exception('Not symmetric')
|
||||
|
||||
if not np.array_equal(golden_incoherent_bands, incoherent_bands):
|
||||
print(golden_incoherent_bands)
|
||||
print(incoherent_bands)
|
||||
raise Exception('Not symmetric')
|
||||
# print(incoherent_bands)
|
||||
# print(total_bands)
|
||||
# print(distance, hamming_distance(x_p, x_q), y_p, y_q)
|
||||
if not np.array_equal(golden_incoherent_distances, incoherent_distances):
|
||||
print(golden_incoherent_distances)
|
||||
print(incoherent_distances)
|
||||
raise Exception('Not symmetric')
|
||||
|
||||
# print(golden_total_distances)
|
||||
# print(golden_incoherent_distances)
|
||||
|
||||
# print(golden_total_bands)
|
||||
# print(golden_incoherent_bands)
|
||||
# print(golden_total_bands)
|
||||
|
||||
p = np.ones((2 ** N, N + 1))
|
||||
for sample_size in range(0, 2 ** N):
|
||||
for k in range(0, N + 1):
|
||||
for d1 in range(0, N + 1):
|
||||
if golden_total_distances[d1] == 0:
|
||||
continue
|
||||
m = golden_total_distances[d1]
|
||||
j = golden_incoherent_distances[k][d1]
|
||||
n = min(sample_size, m)
|
||||
l = int(n * j / m)
|
||||
p[sample_size][k] *= p_bernoulli(n, l, m, j)
|
||||
print(np.around(p, 2))
|
||||
|
||||
p = np.ones((4 ** N, N + 1))
|
||||
for sample_size in range(0, 4 ** N):
|
||||
for k in range(0, N + 1):
|
||||
for d1 in range(0, N + 1):
|
||||
for d2 in range(0, N + 1):
|
||||
if golden_total_bands[d1][d2] == 0:
|
||||
continue
|
||||
m = golden_total_bands[d1][d2]
|
||||
j = golden_incoherent_bands[k][d1][d2]
|
||||
n = min(sample_size, m)
|
||||
l = int(n * j / m)
|
||||
p[sample_size][k] *= p_bernoulli(n, l, m, j)
|
||||
print(np.around(p, 3))
|
||||
|
||||
# p = np.ones((N + 1))
|
||||
# for k in range(0, N + 1):
|
||||
# for d1 in range(0, N + 1):
|
||||
# for d2 in range(0, N + 1):
|
||||
# if golden_total_bands[d1][d2] == 0:
|
||||
# continue
|
||||
# partial = golden_incoherent_bands[k][d1][d2] / golden_total_bands[d1][d2]
|
||||
# p[k] *= max(partial, 1 - partial)
|
||||
# print(p)
|
||||
|
||||
# p = np.ones((N + 1))
|
||||
# for k in range(0, N + 1):
|
||||
# for d1 in range(0, N + 1):
|
||||
# for d2 in range(0, N + 1):
|
||||
# for d3 in range(0, N + 1):
|
||||
# if golden_total_sub_bands[d1][d2][d3] == 0:
|
||||
# continue
|
||||
# partial = golden_incoherent_sub_bands[k][d1][d2][d3] / golden_total_sub_bands[d1][d2][d3]
|
||||
# p[k] *= max(partial, 1 - partial)
|
||||
# print(p)
|
||||
|
||||
# print(bands)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
229
space_analysis4.py
Normal file
229
space_analysis4.py
Normal file
@ -0,0 +1,229 @@
|
||||
import math
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
np.set_printoptions(threshold=sys.maxsize)
|
||||
|
||||
def decode(x, N):
|
||||
index = 0
|
||||
output = np.zeros((N))
|
||||
while x > 0 and index < N:
|
||||
output[index] = x & 0b1
|
||||
x >>= 1
|
||||
index += 1
|
||||
return output
|
||||
|
||||
def hamming_distance(a, b):
|
||||
return np.sum(np.logical_xor(a, b))
|
||||
|
||||
def xor(x, bits):
|
||||
return np.sum(x[:bits]) % 2
|
||||
|
||||
def compute_pseudopascal(N):
|
||||
dist = np.zeros((N, N))
|
||||
for j in range(0, N):
|
||||
dist[0][j] = math.comb(N - 1, j)
|
||||
dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
|
||||
for i in range(1, N):
|
||||
for j in range(0, i + 1):
|
||||
dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
|
||||
for k in range(i + 1, N):
|
||||
for j in reversed(range(0, k)):
|
||||
dist[i][j+1] = dist[i][j] + dist[i][j+1]
|
||||
return dist
|
||||
|
||||
def compute_pyramids(N):
|
||||
num_orders = max(int(N / 2), 1)
|
||||
pyramids = np.zeros((num_orders, N, N)).astype(np.int32)
|
||||
# 1st order can be filled in as multiplication and forms the base case
|
||||
for i in range(0, N):
|
||||
for j in range(0, i + 1):
|
||||
pyramids[0][i][j] = (i - j + 1) * (j + 1)
|
||||
for order in range(1, num_orders):
|
||||
offset = order * 2
|
||||
|
||||
# fill in the LHS and diagonal
|
||||
for i in range(0, N - offset):
|
||||
value = math.comb(2 * (order + 1) + i - 1, i)
|
||||
pyramids[order][i + offset][0] = value
|
||||
# mirror
|
||||
pyramids[order][i + offset][i + offset] = value
|
||||
|
||||
# accumulate along the diagonals
|
||||
for i in range(1, N):
|
||||
value = pyramids[order][i][0]
|
||||
acc = value
|
||||
for j in range(1, N - i):
|
||||
value += acc
|
||||
pyramids[order][i + j][j] = value
|
||||
acc += pyramids[order - 1][i + j - 1][j - 1]
|
||||
|
||||
return pyramids
|
||||
|
||||
# 2
|
||||
# 4, 4,
|
||||
# 6, 8, 6
|
||||
# 8, 12, 12, 8
|
||||
# 10, 16, 18, 16, 10
|
||||
# 12, 20, 24, 24, 20, 12
|
||||
# 14, 24, 30, 32, 30, 24, 14
|
||||
# 16, 28, 36, 40, 40, 36, 28, 16
|
||||
|
||||
# 1
|
||||
# 2, 2
|
||||
# 3, 4, 3
|
||||
# 4, 6, 6, 4
|
||||
# 5, 8, 9, 8, 5
|
||||
# 6, 10, 12, 12, 10, 6
|
||||
# 7, 12, 15, 16, 15, 12, 7
|
||||
|
||||
# 6, 0, 6
|
||||
# 24, 12, 12, 24
|
||||
# 60, 48, 36, 48, 60
|
||||
# 120, 120, 96, 96, 120, 120
|
||||
# 210, 240, 210, 192, 210, 240, 210
|
||||
# 336, 420, 396, 360, 360, 396, 420, 336
|
||||
# 504, 672, 672, 624, 600, 624, 672, 672, 504
|
||||
|
||||
|
||||
# 1, 0, 1
|
||||
# 4, 2, 2, 4
|
||||
# 10, 8, 6, 8, 10
|
||||
# 20, 20, 16, 16, 20, 20
|
||||
# 35, 40, 35, 32, 35, 40, 35
|
||||
# 56, 70, 66, 60, 60, 66, 70, 56
|
||||
# 84, 112, 112, 104, 100, 104, 112, 112, 84
|
||||
|
||||
#
|
||||
# 20, 0, 20, 0, 20,
|
||||
# 120, 40, 80, 80, 40, 120
|
||||
# 420, 240, 260, 320, 260, 240, 420
|
||||
# 1120, 840, 760, 880, 880, 760, 840, 1120
|
||||
|
||||
# 1, 0, 1, 0, 1
|
||||
# 6, 2, 4, 4, 2, 6
|
||||
# 21, 12, 13, 16, 13, 12, 21
|
||||
# 56, 42, 38, 44, 44, 38, 42, 56
|
||||
|
||||
# 70, 0, 70, 0, 70, 0, 70
|
||||
# 560, 140, 420, 280, 280, 420, 140, 560
|
||||
|
||||
# 252, 0, 252, 0, 252, 0, 252, 0, 252
|
||||
# 2520, 504, 2016, 1008, 1512, 1512, 1008, 2016, 504, 2520
|
||||
|
||||
# 1, 2, 3, 4,
|
||||
# 1, 3, 6, 10
|
||||
# 1, 4, 10, 20
|
||||
# 1, 5, 15, 35
|
||||
# 1, 6,
|
||||
|
||||
# 1, 2, 1
|
||||
# 1, 3, 3, 1
|
||||
# 1, 4, 6, 4, 1
|
||||
# 1, 5, 10, 10, 5, 1
|
||||
# 1, 6, 15, 20, 15, 6, 1
|
||||
|
||||
# 2, 6, 12, 20, 30, 42, 56
|
||||
# 6, 30, 90, 210, 420
|
||||
# 20, 140, 560,
|
||||
# 70
|
||||
|
||||
# 1, 3, 6, 10, 15, 21, 28
|
||||
# 1, 5, 15, 35
|
||||
|
||||
def main():
|
||||
last_incoherent_distances = None
|
||||
last_incoherent_bands = None
|
||||
last_incoherent_sub_bands = None
|
||||
for N in range(4, 5):
|
||||
# print(compute_pseudopascal(10))
|
||||
# print(compute_pyramids(10))
|
||||
|
||||
points = []
|
||||
for i in range(0, 2 ** N):
|
||||
points.append(decode(i, N))
|
||||
|
||||
bands = [[[] for _ in range(0, N + 1)] for _ in range(0, len(points))]
|
||||
for i in range(0, len(points)):
|
||||
a = points[i]
|
||||
for j in range(0, len(points)):
|
||||
if i == j:
|
||||
continue
|
||||
b = points[j]
|
||||
distance = hamming_distance(a, b)
|
||||
bands[i][distance].append(b)
|
||||
|
||||
# for t in range(0, len(points)):
|
||||
for t in range(0, 1):
|
||||
incoherent_distances = np.zeros((N + 1, N + 1))
|
||||
incoherent_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32)
|
||||
incoherent_sub_bands = np.zeros((N + 1, N + 1, N + 1, N + 1)).astype(np.int32)
|
||||
for k in range(1, N + 1):
|
||||
# print(k, '================================')
|
||||
x_a = points[t]
|
||||
y_a = xor(x_a, k)
|
||||
total_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
for distance in range(0, N + 1):
|
||||
# print('distance', distance)
|
||||
band = bands[t][distance]
|
||||
for x_b in band:
|
||||
y_b = xor(x_b, k)
|
||||
if y_a != y_b:
|
||||
incoherent_distances[k][distance] += 1
|
||||
|
||||
if len(band) < 2:
|
||||
continue
|
||||
for band_origin in range(0, len(band)):
|
||||
x_p = band[band_origin]
|
||||
y_p = xor(x_p, k)
|
||||
sub_bands = [[] for _ in range(0, N + 1)]
|
||||
for i in range(0, len(band)):
|
||||
if i == band_origin:
|
||||
continue
|
||||
x_q = band[i]
|
||||
y_q = xor(x_q, k)
|
||||
band_distance = hamming_distance(x_p, x_q)
|
||||
total_bands[distance][band_distance] += 1
|
||||
if y_p != y_q:
|
||||
incoherent_bands[k][distance][band_distance] += 1
|
||||
sub_bands[band_distance].append(x_q)
|
||||
|
||||
# incoherent_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
# total_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
|
||||
for band_distance in range(0, N + 1):
|
||||
sub_band = sub_bands[band_distance]
|
||||
if len(sub_band) < 2:
|
||||
continue
|
||||
for sub_band_origin in range(0, len(sub_band)):
|
||||
x_u = sub_band[sub_band_origin]
|
||||
y_u = xor(x_u, k)
|
||||
for i in range(0, len(sub_band)):
|
||||
if i == sub_band_origin:
|
||||
continue
|
||||
x_v = sub_band[i]
|
||||
y_v = xor(x_v, k)
|
||||
sub_band_distance = hamming_distance(x_v, x_u)
|
||||
# total_sub_bands[band_distance][sub_band_distance] += 1
|
||||
if y_u != y_v:
|
||||
incoherent_sub_bands[k][distance][band_distance][sub_band_distance] += 1
|
||||
# print(incoherent_sub_bands)
|
||||
# print(total_sub_bands)
|
||||
# print('==========================')
|
||||
|
||||
if last_incoherent_sub_bands is not None:
|
||||
for distance in range(1, int(N / 2) + 1):
|
||||
for band_distance in range(0, N + 1):
|
||||
for sub_band_distance in range (0, N + 1):
|
||||
if band_distance >= N or sub_band_distance >= N or last_incoherent_sub_bands[1][distance][band_distance][sub_band_distance] == 0:
|
||||
value = incoherent_sub_bands[1][distance][band_distance][sub_band_distance]
|
||||
if value > 0:
|
||||
print(N, value, (distance, band_distance, sub_band_distance))
|
||||
|
||||
last_incoherent_distances = incoherent_distances
|
||||
last_incoherent_bands = incoherent_bands
|
||||
last_incoherent_sub_bands = incoherent_sub_bands
|
||||
|
||||
# print(bands)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
164
train_generator.py
Normal file
164
train_generator.py
Normal file
@ -0,0 +1,164 @@
|
||||
import hashlib
|
||||
import secrets
|
||||
from struct import pack, pack_into, unpack_from
|
||||
|
||||
def sha(x):
|
||||
m = hashlib.sha256()
|
||||
m.update(x)
|
||||
result = m.digest()
|
||||
return result[0] & 0b1
|
||||
|
||||
def bit_at_index(buffer, index):
|
||||
offset = (index >> 3) % len(buffer)
|
||||
return buffer[offset] & (1 << (index & 0b111)) != 0
|
||||
|
||||
def evaluate(f, x):
|
||||
stack = []
|
||||
offset = 0
|
||||
value = 0
|
||||
while offset < len(f):
|
||||
opcode = f[offset]
|
||||
offset += 1
|
||||
if opcode == 0 or opcode == 1:
|
||||
stack.append((opcode, value))
|
||||
value = 0
|
||||
elif opcode == 2:
|
||||
if len(stack) == 0:
|
||||
return (value, offset)
|
||||
(last_opcode, _) = stack[-1]
|
||||
if last_opcode > 0:
|
||||
stack.append((0, value))
|
||||
value = 0
|
||||
continue
|
||||
right = value
|
||||
(_, left) = stack.pop()
|
||||
(opcode, value) = stack.pop()
|
||||
value ^= ((left & right) ^ (opcode & 0b1))
|
||||
else:
|
||||
try:
|
||||
index = unpack_from('I', f, offset)[0]
|
||||
offset += 4
|
||||
if bit_at_index(x, index):
|
||||
value ^= 1
|
||||
except:
|
||||
break
|
||||
|
||||
while len(stack) > 0:
|
||||
(opcode, other_value) = stack.pop()
|
||||
if opcode == 0:
|
||||
right = other_value
|
||||
(opcode, left) = stack.pop()
|
||||
value ^= ((left & right) ^ (opcode & 0b1))
|
||||
value ^= other_value ^ (opcode & 0b1)
|
||||
return (value, offset)
|
||||
|
||||
def random_generator():
|
||||
return secrets.token_bytes(256)
|
||||
|
||||
def random_input():
|
||||
return secrets.token_bytes(4)
|
||||
|
||||
def generate(generator, sample):
|
||||
f_size = 1024
|
||||
f = bytearray(f_size)
|
||||
x = bytearray(4) + sample
|
||||
for i in range(0, f_size):
|
||||
build_value = 0
|
||||
for j in range(0, 8):
|
||||
step = i * 8 + j
|
||||
pack_into('H', x, 0, step)
|
||||
(value, _) = evaluate(generator, x)
|
||||
build_value <<= 1
|
||||
build_value |= value
|
||||
f[i] = build_value
|
||||
return f
|
||||
|
||||
def sample(N):
|
||||
inputs = [random_input() for i in range(0, N)]
|
||||
outputs = [sha(x) for x in inputs]
|
||||
return (inputs, outputs)
|
||||
|
||||
def augment_inputs(inputs, layers):
|
||||
augmented_inputs = []
|
||||
for x in inputs:
|
||||
x_n = bytearray(1) + x
|
||||
for layer in layers:
|
||||
build_value = 0
|
||||
for candidate in layer:
|
||||
(value, _) = evaluate(candidate, x_n)
|
||||
build_value <<= 1
|
||||
build_value |= value
|
||||
x_n[0] = build_value
|
||||
augmented_inputs.append(x_n)
|
||||
return augmented_inputs
|
||||
|
||||
def pack_sample(inputs, outputs):
|
||||
sample = bytearray()
|
||||
for i in range(0, len(inputs)):
|
||||
sample += inputs[i]
|
||||
sample += bytearray([outputs[i]])
|
||||
return sample
|
||||
|
||||
def compute_score(f, inputs, outputs):
|
||||
correct = 0.0
|
||||
for i in range(0, len(inputs)):
|
||||
(value, _) = evaluate(f, inputs[i])
|
||||
if value == outputs[i]:
|
||||
correct += 1
|
||||
return correct / len(outputs)
|
||||
|
||||
def evaluate_generator(g):
|
||||
num_candidates = 8
|
||||
num_train_samples = 64
|
||||
num_test_samples = 1000
|
||||
num_epochs = 10
|
||||
threshold = 0
|
||||
|
||||
layers = []
|
||||
for epoch in range(0, num_epochs):
|
||||
difficulty = 0
|
||||
layer = []
|
||||
candidate = 0
|
||||
scores = []
|
||||
while candidate < num_candidates:
|
||||
(x, y) = sample(num_train_samples)
|
||||
x_n = augment_inputs(x, layers)
|
||||
f = generate(g, pack_sample(x_n, y))
|
||||
print(f)
|
||||
|
||||
(x, y) = sample(num_test_samples)
|
||||
x_n = augment_inputs(x, layers)
|
||||
score = compute_score(f, x_n, y)
|
||||
|
||||
if score < threshold - difficulty * 0.0001:
|
||||
difficulty += 1
|
||||
continue
|
||||
|
||||
print(epoch, score, difficulty)
|
||||
|
||||
layer.append(f)
|
||||
scores.append(score)
|
||||
difficulty = 0
|
||||
candidate += 1
|
||||
threshold = sum(scores) / len(scores)
|
||||
layers.append(layer)
|
||||
return threshold
|
||||
|
||||
def main():
|
||||
num_random_candidates = 1000
|
||||
|
||||
g = None
|
||||
score = 0
|
||||
|
||||
for i in range(0, num_random_candidates):
|
||||
g_n = random_generator()
|
||||
print(g_n)
|
||||
score_n = evaluate_generator(g_n)
|
||||
print(i, score_n)
|
||||
if score > score_n:
|
||||
score = score_n
|
||||
g = g_n
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user