probabilities/mutations17.py
2023-01-01 18:45:51 -05:00

669 lines
24 KiB
Python

import bisect
from cmath import isnan
from email.mime import base
import matplotlib.pyplot as plt
import hashlib
import math
import numpy as np
import random
import statistics
from pkg_resources import get_distribution
from scipy import stats
def encode(v):
byte_values = []
for i in range(0, math.ceil(len(v) / 8)):
x = 0
for j in range(0, 8):
index = i * 8 + j
if index >= len(v):
continue
x <<= 1
x |= int(v[index])
byte_values.append(x)
return bytearray(byte_values)
def sha(v):
x = encode(v)
m = hashlib.sha256()
m.update(x)
result = m.digest()
return result[0] & 0b1
def xor(v):
total = np.sum(v)
value = total % 2
return np.sum(v) % 2
def hamming_distance(a, b, scratch):
np.logical_xor(a, b, scratch)
return sum(scratch)
def index_hash(indices):
return ','.join([str(index) for index in sorted(indices)])
def bin_div(a, b):
if a == 0 and b == 0:
return 2
if a == 1 and b == 0:
return -1
if a == 0 and b == 1:
return 0
return 1
class Candidate():
def __init__(self, indices):
self.indices = indices[:]
self.uplift = 0
def evaluate(self, x):
if len(x) in self.indices:
return 0
value = 1
for index in self.indices:
value *= x[index]
return value
def id(self):
return index_hash(self.indices)
def eval_str(self):
parts = []
for index in self.indices:
parts.append('x[' + str(index) + ']')
return '*'.join(parts)
class Probabilities():
def __init__(self):
self.N = 8
self.actual_N = self.N * 2
self.num_terms = 1
self.num_candidates = 100
# self.sample_size = self.N ** 2
self.sample_size = 64
self.p = np.zeros((self.actual_N + 1,))
self.p_temp = np.empty_like(self.p)
self.next_p = np.empty_like(self.p)
self.knowns = []
self.stops = set()
self.reset_p()
self.epoch = 0
self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
self.raw_inputs = np.zeros((self.sample_size, self.N)).astype(np.int32)
self.masked_distances = np.zeros((self.sample_size, self.sample_size))
self.distances = np.zeros((self.sample_size, self.sample_size))
self.xor_square = np.zeros((self.sample_size, self.sample_size))
self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
self.outputs = np.zeros((self.sample_size)).astype(np.int32)
self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
self.mask = np.zeros((self.sample_size))
self.numerators = np.zeros((self.sample_size))
self.denominators = np.zeros((self.sample_size))
self.coherences = np.zeros((self.sample_size))
self.max_coherences = np.zeros((self.actual_N + 1))
self.max_candidates = [None for _ in range(0, self.actual_N)]
self.uplifts = np.zeros((self.actual_N))
self.uplift_means = np.zeros((self.actual_N))
self.uplift_medians = np.zeros((self.actual_N))
self.uplift_convergences = np.zeros((self.actual_N))
# self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)]
self.superspace_uplift_samples = []
self.subspace_uplifts = np.zeros((self.actual_N))
self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)]
self.uplift_stddevs = np.zeros((self.actual_N))
self.samples = 1000
# self.samples = 200
self.base_coherence_samples = np.zeros((self.samples))
self.coherence_samples = np.zeros((self.actual_N, self.samples))
self.subspace_uplift_left_samples = np.zeros((self.actual_N, self.samples))
self.subspace_uplift_right_samples = np.zeros((self.actual_N, self.samples))
self.layers = []
self.layer_confidence = {}
self.base = None
self.scratch = np.zeros((self.N,))
self.last_value = -1
self.rounds = 0
self.average_delta_over_null = 0
self.visited = set()
self.candidate_pool = []
self.candidate_ids = set()
self.has_added_layer = False
def randomize_inputs(self):
for i in range(0, self.sample_size):
for j in range(0, self.N):
val = random.randint(0, 1)
self.raw_inputs[i][j] = val
self.inputs[i][j * 2] = val
self.inputs[i][j * 2 + 1] = val ^ 1
def populate_distances(self):
for i in range(0, len(self.raw_inputs)):
x_a = self.raw_inputs[i]
for j in range(0, len(self.raw_inputs)):
if i == j:
continue
x_b = self.raw_inputs[j]
distance = hamming_distance(x_a, x_b, self.scratch)
self.distances[i][j] = 1.0 / (2 ** (distance - 1)) if distance > 0 else 0
# self.distances[i][j] = 1.0 / (distance ** 2) if distance > 0 else 0
def compute_expected_outputs(self):
for i in range(0, len(self.raw_inputs)):
self.expected_outputs[i] = xor(self.raw_inputs[i])
def compute_base_outputs(self):
if self.base is None:
self.base_outputs.fill(0)
return
for i in range(0, len(self.inputs)):
self.base_outputs[i] = self.base(self.inputs[i])
def mat_coherence(self):
np.abs(self.output_xor, self.mask)
np.subtract(self.output_xor, self.mask, self.mask)
np.divide(self.mask, 2.0, self.mask)
np.add(1.0, self.mask, self.mask)
self.xor_square.fill(0)
np.copyto(self.masked_distances, self.distances)
masked_distances_t = self.masked_distances.transpose()
for i in range(0, len(self.xor_square)):
self.xor_square[i] = self.output_xor
np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i])
np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i])
np.sum(self.masked_distances, axis=0, out=self.denominators)
self.xor_square = self.xor_square.transpose()
np.logical_xor(self.xor_square, self.output_xor, self.xor_square)
np.multiply(self.xor_square, self.masked_distances, self.xor_square)
np.sum(self.xor_square, axis=0, out=self.numerators)
np.divide(self.numerators, self.denominators, self.coherences)
mean = np.nanmean(self.coherences)
if isnan(mean):
mean = 1.0
return 1.0 - mean
def coherence(self, outputs=None):
if outputs is None:
outputs = self.outputs
np.logical_xor(outputs, self.expected_outputs, self.output_xor)
return self.mat_coherence()
coherences = []
for i in range(0, len(self.output_xor)):
y_a = self.output_xor[i]
numerator = 0
denominator = 0
for j in range(0, len(self.output_xor)):
if i == j:
continue
y_b = self.output_xor[j]
weight = self.distances[i][j]
denominator += weight
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
numerator += weight
coherence = numerator / denominator if denominator > 0 else 0
coherences.append(coherence)
raw_coherence = sum(coherences) / len(coherences)
check_coherence = self.mat_coherence()
return raw_coherence
def div_coherence(self):
coherences = []
for i in range(0, len(self.output_xor)):
y_a = self.output_xor[i]
if y_a < 0:
continue
numerator = 0
denominator = 0
for j in range(0, len(self.output_xor)):
if i == j:
continue
y_b = self.output_xor[j]
if y_b < 0:
continue
weight = self.distances[i][j]
denominator += weight
if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
numerator += weight
# if y_a < 0 or y_b < 0:
# numerator += weight
coherence = numerator / denominator if denominator > 0 else 0
coherences.append(coherence)
if len(coherences) == 0:
return 1.0
return sum(coherences) / len(coherences)
def normalize_p(self):
check = self.knowns[:]
for i in range(0, len(self.p)):
if self.p[i] < 0:
self.p[i] = 0
for i in range(0, len(self.p)):
if i in self.knowns:
flip = i ^ 0b1
self.p[i] = 0.0
self.p[flip] = 0.0
else:
check.append(i)
stop_id = index_hash(check)
check.pop()
if stop_id in self.stops:
self.p[i] = 0.0
total = np.sum(self.p)
if total > 0:
for i in range(0, len(self.p)):
self.p[i] = self.p[i] / total
def reset_p(self):
self.p.fill(1.0)
self.normalize_p()
def threshold(self):
# return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
return 1.0 - (self.epoch / 1000)
def get_converged_index(self):
for i in range(0, len(self.p)):
if self.p[i] > self.threshold():
return i
return None
def add_layer(self):
self.has_added_layer = True
self.add_stop()
layer = Candidate(self.knowns)
self.layers.append(layer)
self.base = self.cache_layers()
self.knowns.pop()
self.reset_p()
def random_sample(self):
self.randomize_inputs()
self.populate_distances()
self.compute_expected_outputs()
self.compute_base_outputs()
return self.coherence(self.base_outputs)
def random_candidate(self):
indices = self.knowns[:]
np.copyto(self.p_temp, self.p)
self.p_temp[self.actual_N] = 0
total = np.sum(self.p_temp)
if total == 0:
return None
np.divide(self.p_temp, total, self.p_temp)
for _ in range(0, self.num_terms - len(self.knowns)):
index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
indices.append(index)
flip = index ^ 0b1
self.p_temp[index] = 0
self.p_temp[flip] = 0
for i in range(0, len(self.p_temp)):
if i not in indices:
indices.append(i)
stop_id = index_hash(indices)
indices.pop()
if stop_id in self.stops:
self.p_temp[i] = 0.0
total = np.sum(self.p_temp)
if total == 0:
return None
np.divide(self.p_temp, total, self.p_temp)
return Candidate(indices)
def seed_candidate_pool(self):
for _ in range(0, self.num_candidates):
candidate = self.random_candidate()
if candidate is None:
continue
candidate_id = candidate.id()
if candidate_id in self.candidate_ids:
continue
self.candidate_pool.append(candidate)
self.candidate_ids.add(candidate_id)
def add_stop(self):
stop_id = index_hash(self.knowns)
self.stops.add(stop_id)
def get_distribution(self, candidate, half = 1):
count = 0
for i in range(0, len(self.inputs)):
value = candidate.evaluate(self.inputs[i])
if value == half:
self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i]
count += 1
else:
self.output_xor[i] = -1
return (count, self.mat_coherence())
def update(self):
sample = self.epoch
self.epoch += 1
base_coherence = self.random_sample()
self.base_coherence_samples[sample] = base_coherence - 0.5
candidate = Candidate(self.knowns[:])
for i in range(0, self.actual_N):
candidate.indices.append(i)
try:
count_0, subspace_coherence_0 = self.get_distribution(candidate, 0)
count_1, subspace_coherence_1 = self.get_distribution(candidate, 1)
# delta = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size
# delta = subspace_coherence_0 - subspace_coherence_1
self.subspace_uplift_left_samples[i][sample] = subspace_coherence_0 - 0.5
self.subspace_uplift_right_samples[i][sample] = subspace_coherence_1 - 0.5
# if index_hash(candidate.indices) in self.stops:
# continue
for j in range(0, len(self.inputs)):
self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j])
coherence = self.coherence()
self.coherence_samples[i][sample] = coherence - 0.5
finally:
candidate.indices.pop()
if self.epoch >= self.samples:
# for i in range(0, self.actual_N):
# parameters = stats.norm.fit(self.uplift_samples[i])
# print(i, parameters)
# print(i, stats.kstest(self.uplift_samples[i], "norm", parameters))
added = False
# parameters = stats.norm.fit(self.base_coherence_samples)
# (base_mu, _) = parameters
try:
index = -1
lowest_pvalue = -1
is_subspace = False
for i in range(0, self.actual_N):
if i in self.knowns:
continue
result = stats.kstest(self.base_coherence_samples, self.coherence_samples[i], alternative='greater')
print(i, result)
# value = result.pvalue * (1 - result.statistic)
if index < 0 or result.pvalue < lowest_pvalue:
# if index < 0 or value < lowest_pvalue:
index = i
lowest_pvalue = result.pvalue
for i in range(0, self.actual_N):
if i in self.knowns:
continue
result = stats.kstest(self.base_coherence_samples, self.subspace_uplift_left_samples[i], alternative='greater')
# result = stats.kstest(self.subspace_uplift_left_samples[i], self.subspace_uplift_right_samples[i], alternative='greater')
print(i, result)
# value = result.pvalue * (1 - result.statistic)
if index < 0 or result.pvalue < lowest_pvalue:
# if index < 0 or value < lowest_pvalue:
index = i
lowest_pvalue = result.pvalue
is_subspace = True
# if result.pvalue > 0.95:
# index = i
# parameters = stats.norm.fit(self.subspace_uplift_samples[i])
# (mu, _) = parameters
# if mu > base_mu:
# if index < 0 or mu > highest_mu:
# index = i
# highest_mu = mu
if index >= 0:
if is_subspace:
# print('subspace')
self.knowns.append(index)
print(self.knowns, lowest_pvalue)
else:
# print('flat')
self.knowns.append(index)
# self.layer_confidence[index_hash(self.knowns)] = confidence
# num_terms = len(self.knowns)
print(self.knowns, lowest_pvalue)
print(base_coherence)
self.add_layer()
# if num_terms > self.num_terms:
# self.stops = set()
# self.num_terms = num_terms
self.knowns = []
return
# if len(self.knowns) > 0:
# # self.add_stop()
# self.knowns = []
finally:
fig, axs = plt.subplots(4, 4)
for i in range(0, 4):
for j in range(0, 4):
axs[i][j].hist(self.base_coherence_samples, 50, density=True, facecolor='r', alpha=0.5)
n, bins, patches = axs[i][j].hist(self.coherence_samples[i * 4 + j], 50, density=True, facecolor='g', alpha=0.5)
n, bins, patches = axs[i][j].hist(self.subspace_uplift_left_samples[i * 4 + j], 50, density=True, facecolor='b', alpha=0.5)
# n, bins, patches = axs[i][j].hist(self.subspace_uplift_right_samples[i * 4 + j], 50, density=True, facecolor='b', alpha=0.5)
plt.show()
self.epoch = 0
return
# print('=====' + str(base_coherence))
# print(self.uplifts)
# print(self.uplift_means)
# print(self.uplift_medians)
# print(self.uplift_stddevs)
# print(self.uplift_ranges)
# print(self.uplift_convergences)
# print(self.subspace_uplifts)
if index >= 0:
self.knowns.append(index)
print(base_coherence)
print(self.knowns, self.epoch)
# print(self.uplift_medians)
# print(self.uplifts)
# print(self.subspace_uplifts)
self.add_layer()
self.uplifts.fill(0)
self.subspace_uplifts.fill(0)
self.uplift_medians.fill(0)
self.uplift_convergences.fill(0)
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
self.epoch = 0
return
if subspace_index >= 0:
self.knowns.append(subspace_index)
print(self.knowns, self.epoch)
# print(self.uplifts)
# print(self.subspace_uplifts)
self.uplifts.fill(0)
self.subspace_uplifts.fill(0)
self.uplift_medians.fill(0)
self.uplift_convergences.fill(0)
self.uplift_samples = [[] for _ in range(0, self.actual_N)]
self.epoch = 0
return
# print('======')
# print(self.epoch, base_coherence)
# print('======')
# if len(self.candidate_pool) == 0:
# print(self.p)
# for i in range(0, min(5, len(self.candidate_pool))):
# candidate = self.candidate_pool[i]
# print(candidate.id(), candidate.uplift)
# if self.epoch < 15:
# return
if self.candidate_pool[0].uplift > 0.3:
candidate = self.candidate_pool[0]
candidate_id = candidate.id()
self.candidate_ids.remove(candidate_id)
print(candidate_id)
self.knowns = candidate.indices
self.add_layer()
self.knowns = []
self.reset_p()
self.epoch = 0
self.candidate_pool = []
self.candidate_ids = set()
elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
self.epoch = 0
self.num_terms += 1
self.candidate_pool = []
self.candidate_ids = set()
self.knowns = []
self.stops = set()
self.reset_p()
return
# np.copyto(self.next_p, self.p)
for _ in range(0, self.num_candidates):
candidate = self.random_candidate()
if candidate is None:
continue
candidate_id = candidate.id()
if candidate_id in visited:
continue
visited.add(candidate_id)
if self.actual_N in candidate.indices:
continue
has_candidate = True
for i in range(0, len(self.inputs)):
self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
# coherence = self.ring_coherence()
coherence = self.coherence()
# if coherence <= base_coherence:
# continue
# for index in candidate.indices:
# self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
# self.p_temp[index] += 0
for index in candidate.indices:
if coherence > self.max_coherences[index]:
self.max_coherences[index] = coherence
self.max_candidates[index] = candidate
# self.max_coherences[index] = max(self.max_coherences[index], coherence)
# np.copyto(self.p, self.next_p)
# np.copyto(self.p_temp, self.p)
for i in range(0, self.actual_N):
candidate = self.max_candidates[i]
if candidate is None:
continue
for index in candidate.indices:
self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
# print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
self.normalize_p()
# print(self.p)
# np.subtract(self.p_temp, self.p, self.p_temp)
# np.abs(self.p_temp, self.p_temp)
# delta = np.sum(self.p_temp) / len(self.p_temp)
# print(delta, np.argmax(self.p))
# np.copyto(self.p_temp, self.p)
# for i in range(0, len(self.p_temp)):
# self.p_temp[i] = round(self.p_temp[i] * 100) / 100
# print(self.p_temp)
index = np.argmax(self.p)
delta_over_null = self.p[index] - self.p[self.actual_N]
if self.epoch == 0:
self.average_delta_over_null = delta_over_null
else:
self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
diff = self.num_terms - len(self.knowns)
print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))
# Always iterate for a minimum number of epochs
if self.epoch < 15:
return
if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
return
if self.average_delta_over_null < 0.001:
index = self.actual_N
else:
index = np.argmax(self.p)
# index = np.argmax(self.p)
# if index == self.last_value:
# self.rounds += 1
# else:
# self.rounds = 0
# self.last_value = index
# if self.rounds < 10 and self.epoch < 100:
# return
# if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
# return
# index = np.argmax(self.p)
# print(self.p)
# print(self.threshold())
# print(self.p)
# index = self.get_converged_index()
if not index is None or not has_candidate:
# print(index, delta, np.argmax(self.p))
self.epoch = 0
if index == self.actual_N or not has_candidate:
if len(self.knowns) > 0:
self.add_stop()
self.knowns.pop()
print('Backtrack: ' + str(self.knowns))
self.reset_p()
return
self.num_terms += 1
self.knowns = []
self.stops = set()
self.reset_p()
print(self.num_terms)
return
self.knowns.append(index)
# bisect.insort(self.knowns, index)
if len(self.knowns) == self.num_terms:
print('Add layer: ' + str(self.knowns))
self.add_layer()
else:
print('Found term: ' + str(self.knowns))
self.reset_p()
print(base_coherence)
return
def cache_layers(self):
expr = 'def f(x):\n\tresult=0\n'
for layer in self.layers:
expr += '\tresult^=' + layer.eval_str() + '\n'
expr += '\treturn result\n'
scope = {}
exec(expr, scope)
return scope['f']
def main():
probabilities = Probabilities()
# probabilities.knowns = [14]
# probabilities.add_layer()
# probabilities.knowns = [8]
# probabilities.add_layer()
# probabilities.knowns = [4]
# probabilities.add_layer()
while probabilities.num_terms <= probabilities.N:
probabilities.update()
if __name__ == "__main__":
main()