probabilities/mutations21.py

from cmath import isnan
import numpy as np
import random
import hashlib
import math

def get_state_id(state):
    return ','.join([str(x) for x in sorted(state)])

class Point():
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def id(self):
        return ','.join([str(int(x)) for x in self.x])

class Influence():
    def __init__(self, a, b):
        self.a = a
        self.b = b
        self.original_dof = set()
        self.dof = set()
        for i in range(0, len(a.x)):
            if a.x[i] != b.x[i]:
                self.original_dof.add(i)
                self.dof.add(i)

    def coherent(self):
        return self.a.y == self.b.y

    def id(self):
        return ','.join(sorted([self.a.id(), self.b.id()]))

def encode(v):
    byte_values = []
    for i in range(0, math.ceil(len(v) / 8)):
        x = 0
        for j in range(0, 8):
            index = i * 8 + j
            if index >= len(v):
                continue
            x <<= 1
            x |= int(v[index])
        byte_values.append(x)
    return bytearray(byte_values)

def decode(x, N):
    index = 0
    output = np.zeros((N))
    while x > 0 and index < N:
        output[index] = x & 0b1
        x >>= 1
        index += 1
    return output

def sha(v):
    x = encode(v)
    m = hashlib.sha256()
    m.update(x)
    result = m.digest()
    return result[0] & 0b1

def hamming_distance(a, b):
    return np.sum(np.logical_xor(a.x, b.x))

def random_x(N):
    x = np.zeros((N))
    for i in range(0, N):
        x[i] = random.randint(0, 1)
    return x

def xor(x):
    # return sum(x[:4]) % 2
    return sum(x) % 2

def create_dof_map(influences):
    dof_map = {}
    for influence in influences:
        for i in influence.dof:
            if not i in dof_map:
                dof_map[i] = []
            dof_map[i].append(influence)
    return dof_map

def flip(influences, i):
    for influence in influences:
        if i in influence.dof:
            influence.a.y = int(influence.a.y) ^ 1

def remove_dof(dof_map, i, flip = False):
    for influence in dof_map[i]:
        influence.dof.remove(i)
        if flip:
            influence.a.y = int(influence.a.y) ^ 1
        # if len(influence.dof) == 0 and not influence.coherent():
        #     raise Exception('Invalid')
    del dof_map[i]

def solve(dof_map, all_influences, all_samples):
    eliminated = True
    while eliminated:
        eliminated = False
        for influence in all_influences:
            if len(influence.dof) == 1:
                i = next(iter(influence.dof))
                if influence.coherent:
                    remove_dof(dof_map, i)
                    eliminated = True
                else:
                    print('Forced', i)
                    remove_dof(dof_map, i, True)
                    eliminated = True
                
    lowest_dof = None
    for influence in all_influences:
        if not influence.coherent and len(influence.dof) > 1:
            if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):
                lowest_dof = influence

    flip = None
    highest_score = -1

    for i in lowest_dof.dof:
        per_point_scores = {}
        i_influences = dof_map[i]
        left = 0
        right = 0
        for influence in i_influences:
            if not influence.a in per_point_scores:
                per_point_scores[influence.a] = [0, 0]
            if not influence.b in per_point_scores:
                per_point_scores[influence.b] = [0, 0]
            if influence.coherent:
                per_point_scores[influence.a][0] += 1
                per_point_scores[influence.b][0] += 1
                left += 1
            else:
                per_point_scores[influence.a][1] += 1
                per_point_scores[influence.b][1] += 1
                right += 1
        print(i, left / (left + right))
        num = 0
        denom = 0
        for _, score in per_point_scores.items():
            if score[0] == score[1]:
                continue
            print(i, score)
            num += score[1] / (score[0] + score[1])
            denom += 1
        score = num / denom if denom > 0 else 0
        print(score)
    
    return None


# 1st row (n+1 choose k+1) * (1-(k mod 2))
# psuedopascal to compute the follow-on rows
# assuming solvability, we want to maximize the probability that our current state and our state with
# a particular single flip are one order apart in the correct direction


# 2, 0
# 2, 2, 0
# 2, 4, 2, 0
# 2, 6, 6, 2, 0
# 2, 8,12, 8, 2, 0
# 2,10,20,20,10, 2, 0

# 3,-9,19,-33,51,-73,99
# 3,-6,10,-14,18,-22,26
# 3,-3, 4, -4, 4, -4, 4
# 3, 0, 1,  0, 0,  0, 0
# 3, 3, 1,  1, 0,  0, 0
# 3, 6, 4,  2, 1,  0, 0
# 3, 9,10,  6, 3,  1, 0

#       4, 0, 4, 0
#     4, 4, 4, 4, 0
#   4, 8, 8, 8, 4, 0
# 4,12,16,16,12, 4, 0

#       5, 0,10, 0, 1
#     5, 5,10,10, 1, 1
#   5,
# 5,


# 3
#
# @1 [1, 2, 1]
# @2 [2, 2, 0]
# @3 [3, 0, 1]

# 5  [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)
#
# @1 [1, 4, 6,  4, 1], [4, 6,  4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),
# @2 [2, 6, 6,  2, 0], [3, 4,  4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)
# @3 [3, 6, 4,  2, 1], [2, 4,  6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)
# @4 [4, 4, 4,  4, 0], [1, 6,  6, 1, 1] - 16, 15 - 
# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 - 

# @0 [0.0, 0.0, 0.0, 0.0, 0.0]
# @1 [0.2, 0.4, 0.6, 0.8, 1.0]
# @2 [0.4, 0.6, 0.6, 0.4, 0.0]
# @3 [0.6, 0.6, 0.4, 0.4, 1.0]
# @4 [0.8, 0.4, 0.4, 0.8, 0.0]
# @5 [1.0, 0.0, 1.0, 0.0, 1.0]

# 6
#
# @1 [1, 5, 10, 10, 5, 1]
# @2 [2, 8, 12, 8,  2, 0]
# @3 [3, 9, 10, 6,  3, 1]
# @4 [4, 8, 8,  8,  4, 0]
# @5 [5, 5, 10, 10, 1, 1]
# @6 [6, 0, 20, 0,  6, 0]

# last row, 1 if odd, 0 if even
# second to last, subtract 2 on odds, add 2 on evens

def compute_distributions(N):
    dist = np.zeros((N, N))
    for j in range(0, N):
        dist[0][j] = math.comb(N - 1, j)
        dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
    for i in range(1, N):
        for j in range(0, i + 1):
            dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
        for k in range(i + 1, N):
            for j in reversed(range(0, k)):
                dist[i][j+1] = dist[i][j] + dist[i][j+1]
    for i in range(0, N):
        for j in range(0, N):
            denom = math.comb(N, j+1)
            dist[i][j] /= denom
    return dist
            

def main():
    N = 32
    sample_size = 2048
    sample_ids = set()
    samples = []

    dist = compute_distributions(N)
    print(dist)

    for i in range(0, sample_size):
        x = random_x(N)
        y = int(xor(x))
        p = Point(x, y)
        p_id = p.id()
        if p_id in sample_ids:
            continue
        sample_ids.add(p_id)
        samples.append(p)

    # for i in range(0, 2**N):
    #     x = decode(i, N)
    #     y = int(xor(x))
    #     samples.append(Point(x,y))

    base = np.zeros(N)
    current = np.zeros(N)

    for _ in range(0, N):
        lowest_err = -1
        use_flip = -1
        for flip in range(-1, N):
            coherent_distances = {}
            incoherent_distances = {}
            all_coherent = True
            for i in range(0, len(samples)):
                a = samples[i]
                for j in range(i + 1, len(samples)):
                    # if i == j:
                    #     continue
                    b = samples[j]
                    distance = hamming_distance(a, b)
                    if distance not in coherent_distances:
                        coherent_distances[distance] = 0
                    if distance not in incoherent_distances:
                        incoherent_distances[distance] = 0
                    is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)
                    if is_coherent:
                        coherent_distances[distance] += 1
                    else:
                        incoherent_distances[distance] += 1
                        all_coherent = False
            if all_coherent:
                print('Flip and halt', flip)
                return
            # print(coherent_distances, incoherent_distances)

            for k in range(0, N):
                known_incoherence_at_k = dist[k]
                err = 0
                # denom = 0
                for i in range(0, N):
                    if i not in coherent_distances:
                        continue
                    est_incoherence = incoherent_distances[i] / (coherent_distances[i] + incoherent_distances[i])
                    confidence = 1.0
                    # print(k, i, est_incoherence)
                    err += confidence * abs(est_incoherence - known_incoherence_at_k[i - 1])# / ((est_incoherence + known_incoherence_at_k[i - 1]) / 2)
                    # denom += 1
                # print(flip, k, err)
                # err /= denom
                if flip < 0:
                    base[k] = err
                else:
                    current[k] = err
            if flip >= 0:
                # np.divide(current, np.max(current), current)
                # print(flip, current)
                index = -1
                base_sum = 0
                current_sum = 0
                base_total = 0
                current_total = 0
                for k in range(0, N):
                    if base[k] > 0:
                        base_sum += k / base[k]
                        base_total += 1.0 / base[k]
                    else:
                        base_sum += k * 1e6
                        base_total += 1e6
                    if current[k] > 0:
                        current_sum += k / current[k]
                        current_total += 1.0 / current[k]
                    else:
                        current_sum += k * 1e6
                        current_total += 1e6
                # print(base_sum, base_total, current_sum, current_total)
                # print(current_sum / current_total, base_sum / base_total)
                rel_to_base = (current_sum / current_total) - (base_sum / base_total)
                
                # print(base_sum, base_total)
                # print(base_sum / base_total, current_sum / current_total)

                # for k in range(0, N - 2):
                #     # err = base[k + 1] * current[k] * 1.0 / (base[k + 1] * current[k + 2])
                #     err = base[k + 1] * current[k]
                #     if rel_to_base < 0 or err < rel_to_base:
                #         rel_to_base = err
                #         index = k

                if use_flip < 0 or rel_to_base < lowest_err:
                    lowest_err = rel_to_base
                    use_flip = flip
                print(flip, rel_to_base)
            else:
                pass
                # np.divide(base, np.max(base), base)
                # print(flip, base)
    
        if lowest_err > 0:
            return
        print('Flip', use_flip, lowest_err)
        for p in samples:
            if p.x[use_flip]:
                p.y ^= 1

if __name__ == "__main__":
    main()
Add probabilities work to git 2023-01-01 23:45:51 +00:00			`from cmath import isnan`
			`import numpy as np`
			`import random`
			`import hashlib`
			`import math`

			`def get_state_id(state):`
			`return ','.join([str(x) for x in sorted(state)])`

			`class Point():`
			`def __init__(self, x, y):`
			`self.x = x`
			`self.y = y`

			`def id(self):`
			`return ','.join([str(int(x)) for x in self.x])`

			`class Influence():`
			`def __init__(self, a, b):`
			`self.a = a`
			`self.b = b`
			`self.original_dof = set()`
			`self.dof = set()`
			`for i in range(0, len(a.x)):`
			`if a.x[i] != b.x[i]:`
			`self.original_dof.add(i)`
			`self.dof.add(i)`

			`def coherent(self):`
			`return self.a.y == self.b.y`

			`def id(self):`
			`return ','.join(sorted([self.a.id(), self.b.id()]))`

			`def encode(v):`
			`byte_values = []`
			`for i in range(0, math.ceil(len(v) / 8)):`
			`x = 0`
			`for j in range(0, 8):`
			`index = i * 8 + j`
			`if index >= len(v):`
			`continue`
			`x <<= 1`
			`x \|= int(v[index])`
			`byte_values.append(x)`
			`return bytearray(byte_values)`

			`def decode(x, N):`
			`index = 0`
			`output = np.zeros((N))`
			`while x > 0 and index < N:`
			`output[index] = x & 0b1`
			`x >>= 1`
			`index += 1`
			`return output`

			`def sha(v):`
			`x = encode(v)`
			`m = hashlib.sha256()`
			`m.update(x)`
			`result = m.digest()`
			`return result[0] & 0b1`

			`def hamming_distance(a, b):`
			`return np.sum(np.logical_xor(a.x, b.x))`

			`def random_x(N):`
			`x = np.zeros((N))`
			`for i in range(0, N):`
			`x[i] = random.randint(0, 1)`
			`return x`

			`def xor(x):`
			`# return sum(x[:4]) % 2`
			`return sum(x) % 2`

			`def create_dof_map(influences):`
			`dof_map = {}`
			`for influence in influences:`
			`for i in influence.dof:`
			`if not i in dof_map:`
			`dof_map[i] = []`
			`dof_map[i].append(influence)`
			`return dof_map`

			`def flip(influences, i):`
			`for influence in influences:`
			`if i in influence.dof:`
			`influence.a.y = int(influence.a.y) ^ 1`

			`def remove_dof(dof_map, i, flip = False):`
			`for influence in dof_map[i]:`
			`influence.dof.remove(i)`
			`if flip:`
			`influence.a.y = int(influence.a.y) ^ 1`
			`# if len(influence.dof) == 0 and not influence.coherent():`
			`# raise Exception('Invalid')`
			`del dof_map[i]`

			`def solve(dof_map, all_influences, all_samples):`
			`eliminated = True`
			`while eliminated:`
			`eliminated = False`
			`for influence in all_influences:`
			`if len(influence.dof) == 1:`
			`i = next(iter(influence.dof))`
			`if influence.coherent:`
			`remove_dof(dof_map, i)`
			`eliminated = True`
			`else:`
			`print('Forced', i)`
			`remove_dof(dof_map, i, True)`
			`eliminated = True`

			`lowest_dof = None`
			`for influence in all_influences:`
			`if not influence.coherent and len(influence.dof) > 1:`
			`if lowest_dof is None or len(influence.dof) < len(lowest_dof.dof):`
			`lowest_dof = influence`

			`flip = None`
			`highest_score = -1`

			`for i in lowest_dof.dof:`
			`per_point_scores = {}`
			`i_influences = dof_map[i]`
			`left = 0`
			`right = 0`
			`for influence in i_influences:`
			`if not influence.a in per_point_scores:`
			`per_point_scores[influence.a] = [0, 0]`
			`if not influence.b in per_point_scores:`
			`per_point_scores[influence.b] = [0, 0]`
			`if influence.coherent:`
			`per_point_scores[influence.a][0] += 1`
			`per_point_scores[influence.b][0] += 1`
			`left += 1`
			`else:`
			`per_point_scores[influence.a][1] += 1`
			`per_point_scores[influence.b][1] += 1`
			`right += 1`
			`print(i, left / (left + right))`
			`num = 0`
			`denom = 0`
			`for _, score in per_point_scores.items():`
			`if score[0] == score[1]:`
			`continue`
			`print(i, score)`
			`num += score[1] / (score[0] + score[1])`
			`denom += 1`
			`score = num / denom if denom > 0 else 0`
			`print(score)`

			`return None`


			`# 1st row (n+1 choose k+1) * (1-(k mod 2))`
			`# psuedopascal to compute the follow-on rows`
			`# assuming solvability, we want to maximize the probability that our current state and our state with`
			`# a particular single flip are one order apart in the correct direction`



			`# 2, 0`
			`# 2, 2, 0`
			`# 2, 4, 2, 0`
			`# 2, 6, 6, 2, 0`
			`# 2, 8,12, 8, 2, 0`
			`# 2,10,20,20,10, 2, 0`

			`# 3,-9,19,-33,51,-73,99`
			`# 3,-6,10,-14,18,-22,26`
			`# 3,-3, 4, -4, 4, -4, 4`
			`# 3, 0, 1, 0, 0, 0, 0`
			`# 3, 3, 1, 1, 0, 0, 0`
			`# 3, 6, 4, 2, 1, 0, 0`
			`# 3, 9,10, 6, 3, 1, 0`

			`# 4, 0, 4, 0`
			`# 4, 4, 4, 4, 0`
			`# 4, 8, 8, 8, 4, 0`
			`# 4,12,16,16,12, 4, 0`

			`# 5, 0,10, 0, 1`
			`# 5, 5,10,10, 1, 1`
			`# 5,`
			`# 5,`



			`# 3`
			`#`
			`# @1 [1, 2, 1]`
			`# @2 [2, 2, 0]`
			`# @3 [3, 0, 1]`

			`# 5 [5, 10, 10, 5, 1] (5 choose 1, 5 choose 2, ...)`
			`#`
			`# @1 [1, 4, 6, 4, 1], [4, 6, 4, 1, 0] - 16, 15 - binomial (4 choose 0, 4 choose 1, 4 choose 2),`
			`# @2 [2, 6, 6, 2, 0], [3, 4, 4, 3, 1] - 16, 15 - (4 choose 1) + (2 choose -1) - (2 choose 1)`
			`# @3 [3, 6, 4, 2, 1], [2, 4, 6, 3, 0] - 16, 15 - (4 choose 2) + (2 choose -2) - (2 choose 2) + (2 choose -1) - (2 choose 1)`
			`# @4 [4, 4, 4, 4, 0], [1, 6, 6, 1, 1] - 16, 15 -`
			`# @5 [5, 0, 10, 0, 1], [0, 10, 0, 5, 0] - 16, 15 -`

			`# @0 [0.0, 0.0, 0.0, 0.0, 0.0]`
			`# @1 [0.2, 0.4, 0.6, 0.8, 1.0]`
			`# @2 [0.4, 0.6, 0.6, 0.4, 0.0]`
			`# @3 [0.6, 0.6, 0.4, 0.4, 1.0]`
			`# @4 [0.8, 0.4, 0.4, 0.8, 0.0]`
			`# @5 [1.0, 0.0, 1.0, 0.0, 1.0]`

			`# 6`
			`#`
			`# @1 [1, 5, 10, 10, 5, 1]`
			`# @2 [2, 8, 12, 8, 2, 0]`
			`# @3 [3, 9, 10, 6, 3, 1]`
			`# @4 [4, 8, 8, 8, 4, 0]`
			`# @5 [5, 5, 10, 10, 1, 1]`
			`# @6 [6, 0, 20, 0, 6, 0]`

			`# last row, 1 if odd, 0 if even`
			`# second to last, subtract 2 on odds, add 2 on evens`

			`def compute_distributions(N):`
			`dist = np.zeros((N, N))`
			`for j in range(0, N):`
			`dist[0][j] = math.comb(N - 1, j)`
			`dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))`
			`for i in range(1, N):`
			`for j in range(0, i + 1):`
			`dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))`
			`for k in range(i + 1, N):`
			`for j in reversed(range(0, k)):`
			`dist[i][j+1] = dist[i][j] + dist[i][j+1]`
			`for i in range(0, N):`
			`for j in range(0, N):`
			`denom = math.comb(N, j+1)`
			`dist[i][j] /= denom`
			`return dist`


			`def main():`
			`N = 32`
			`sample_size = 2048`
			`sample_ids = set()`
			`samples = []`

			`dist = compute_distributions(N)`
			`print(dist)`

			`for i in range(0, sample_size):`
			`x = random_x(N)`
			`y = int(xor(x))`
			`p = Point(x, y)`
			`p_id = p.id()`
			`if p_id in sample_ids:`
			`continue`
			`sample_ids.add(p_id)`
			`samples.append(p)`

			`# for i in range(0, 2**N):`
			`# x = decode(i, N)`
			`# y = int(xor(x))`
			`# samples.append(Point(x,y))`

			`base = np.zeros(N)`
			`current = np.zeros(N)`

			`for _ in range(0, N):`
			`lowest_err = -1`
			`use_flip = -1`
			`for flip in range(-1, N):`
			`coherent_distances = {}`
			`incoherent_distances = {}`
			`all_coherent = True`
			`for i in range(0, len(samples)):`
			`a = samples[i]`
			`for j in range(i + 1, len(samples)):`
			`# if i == j:`
			`# continue`
			`b = samples[j]`
			`distance = hamming_distance(a, b)`
			`if distance not in coherent_distances:`
			`coherent_distances[distance] = 0`
			`if distance not in incoherent_distances:`
			`incoherent_distances[distance] = 0`
			`is_coherent = ((flip < 0 or a.x[flip] == b.x[flip]) and a.y == b.y) or ((flip >= 0 and a.x[flip] != b.x[flip]) and a.y != b.y)`
			`if is_coherent:`
			`coherent_distances[distance] += 1`
			`else:`
			`incoherent_distances[distance] += 1`
			`all_coherent = False`
			`if all_coherent:`
			`print('Flip and halt', flip)`
			`return`
			`# print(coherent_distances, incoherent_distances)`

			`for k in range(0, N):`
			`known_incoherence_at_k = dist[k]`
			`err = 0`
			`# denom = 0`
			`for i in range(0, N):`
			`if i not in coherent_distances:`
			`continue`
			`est_incoherence = incoherent_distances[i] / (coherent_distances[i] + incoherent_distances[i])`
			`confidence = 1.0`
			`# print(k, i, est_incoherence)`
			`err += confidence * abs(est_incoherence - known_incoherence_at_k[i - 1])# / ((est_incoherence + known_incoherence_at_k[i - 1]) / 2)`
			`# denom += 1`
			`# print(flip, k, err)`
			`# err /= denom`
			`if flip < 0:`
			`base[k] = err`
			`else:`
			`current[k] = err`
			`if flip >= 0:`
			`# np.divide(current, np.max(current), current)`
			`# print(flip, current)`
			`index = -1`
			`base_sum = 0`
			`current_sum = 0`
			`base_total = 0`
			`current_total = 0`
			`for k in range(0, N):`
			`if base[k] > 0:`
			`base_sum += k / base[k]`
			`base_total += 1.0 / base[k]`
			`else:`
			`base_sum += k * 1e6`
			`base_total += 1e6`
			`if current[k] > 0:`
			`current_sum += k / current[k]`
			`current_total += 1.0 / current[k]`
			`else:`
			`current_sum += k * 1e6`
			`current_total += 1e6`
			`# print(base_sum, base_total, current_sum, current_total)`
			`# print(current_sum / current_total, base_sum / base_total)`
			`rel_to_base = (current_sum / current_total) - (base_sum / base_total)`

			`# print(base_sum, base_total)`
			`# print(base_sum / base_total, current_sum / current_total)`

			`# for k in range(0, N - 2):`
			`# # err = base[k + 1] * current[k] * 1.0 / (base[k + 1] * current[k + 2])`
			`# err = base[k + 1] * current[k]`
			`# if rel_to_base < 0 or err < rel_to_base:`
			`# rel_to_base = err`
			`# index = k`

			`if use_flip < 0 or rel_to_base < lowest_err:`
			`lowest_err = rel_to_base`
			`use_flip = flip`
			`print(flip, rel_to_base)`
			`else:`
			`pass`
			`# np.divide(base, np.max(base), base)`
			`# print(flip, base)`

			`if lowest_err > 0:`
			`return`
			`print('Flip', use_flip, lowest_err)`
			`for p in samples:`
			`if p.x[use_flip]:`
			`p.y ^= 1`

			`if __name__ == "__main__":`
			`main()`