probabilities/space_analysis3.py
2023-01-01 18:45:51 -05:00

385 lines
13 KiB
Python

import math
import numpy as np
import sys
np.set_printoptions(threshold=sys.maxsize)
cache = {}
def p_bernoulli(n, k, m, j):
key = (n, k, m, j)
if key in cache:
return cache[key]
probabilities = np.zeros((n + 1, n + 1))
probabilities.fill(-1)
stack = [(0,0)]
while len(stack) > 0:
(a, b) = stack.pop()
if a + b == n:
probabilities[a][b] = 1 if a == k else 0
elif a > j:
probabilities[a][b] = 0
elif b > (m - j):
probabilities[a][b] = 0
else:
p_left = probabilities[a + 1][b]
p_right = probabilities[a][b + 1]
if p_left >= 0 and p_right >= 0:
p = (j - a) / (m - a - b)
probabilities[a][b] = p_left * p + p_right * (1 - p)
else:
stack.append((a, b))
if p_left < 0:
stack.append((a + 1, b))
if p_right < 0:
stack.append((a, b + 1))
# if len(cache) % 100 == 0:
# print('Cache size: ', len(cache), math.floor(10000 * hits / (hits + misses)) / 100, '%')
cache[key] = probabilities[0][0]
return probabilities[0][0]
def decode(x, N):
index = 0
output = np.zeros((N))
while x > 0 and index < N:
output[index] = x & 0b1
x >>= 1
index += 1
return output
def hamming_distance(a, b):
return np.sum(np.logical_xor(a, b))
def xor(x, bits):
return np.sum(x[:bits]) % 2
def compute_pseudopascal(N):
dist = np.zeros((N, N))
for j in range(0, N):
dist[0][j] = math.comb(N - 1, j)
dist[-1][j] = math.comb(N, j + 1) * (1 - (j % 2))
for i in range(1, N):
for j in range(0, i + 1):
dist[i][j] = math.comb(i + 1, j + 1) * (1 - (j % 2))
for k in range(i + 1, N):
for j in reversed(range(0, k)):
dist[i][j+1] = dist[i][j] + dist[i][j+1]
return dist
def compute_pyramids(N):
num_orders = max(int(N / 2), 1)
pyramids = np.zeros((num_orders, N, N)).astype(np.int32)
# 1st order can be filled in as multiplication and forms the base case
for i in range(0, N):
for j in range(0, i + 1):
pyramids[0][i][j] = (i - j + 1) * (j + 1)
for order in range(1, num_orders):
offset = order * 2
# fill in the LHS and diagonal
for i in range(0, N - offset):
value = math.comb(2 * (order + 1) + i - 1, i)
pyramids[order][i + offset][0] = value
# mirror
pyramids[order][i + offset][i + offset] = value
# accumulate along the diagonals
for i in range(1, N):
value = pyramids[order][i][0]
acc = value
for j in range(1, N - i):
value += acc
pyramids[order][i + j][j] = value
acc += pyramids[order - 1][i + j - 1][j - 1]
return pyramids
def compute_string_key(key):
return ','.join([str(x) for x in key])
def generate_bands(points):
all_bands = [{} for _ in range(0, len(points))]
for origin_index in range(0, len(points)):
bands = all_bands[origin_index]
key = []
group = [index for index in range(0, len(points)) if index != origin_index]
stack = [(origin_index, key, group)]
while len(stack) > 0:
(origin_index, key, group) = stack.pop()
distance = hamming_distance(points[origin_index], points[group[0]])
in_band = []
out_of_band = []
for index in group:
if distance == hamming_distance(points[origin_index], points[index]):
in_band.append(index)
else:
out_of_band.append(index)
if len(out_of_band) > 0:
stack.append((origin_index, key, out_of_band))
key = key[:]
key.append(distance)
string_key = compute_string_key(key)
if string_key not in bands:
bands[string_key] = 0
bands[string_key] += len(in_band)
if len(in_band) < 2:
continue
for origin_index in in_band:
group = [index for index in in_band if index != origin_index]
stack.append((origin_index, key, group))
return all_bands
def test():
N = 8
points = [decode(x, N) for x in range(0, 2 ** N)]
print(generate_bands(points)[0])
# 2
# 4, 4,
# 6, 8, 6
# 8, 12, 12, 8
# 10, 16, 18, 16, 10
# 12, 20, 24, 24, 20, 12
# 14, 24, 30, 32, 30, 24, 14
# 16, 28, 36, 40, 40, 36, 28, 16
# 1
# 2, 2
# 3, 4, 3
# 4, 6, 6, 4
# 5, 8, 9, 8, 5
# 6, 10, 12, 12, 10, 6
# 7, 12, 15, 16, 15, 12, 7
# 6, 0, 6
# 24, 12, 12, 24
# 60, 48, 36, 48, 60
# 120, 120, 96, 96, 120, 120
# 210, 240, 210, 192, 210, 240, 210
# 336, 420, 396, 360, 360, 396, 420, 336
# 504, 672, 672, 624, 600, 624, 672, 672, 504
# 1, 0, 1
# 4, 2, 2, 4
# 10, 8, 6, 8, 10
# 20, 20, 16, 16, 20, 20
# 35, 40, 35, 32, 35, 40, 35
# 56, 70, 66, 60, 60, 66, 70, 56
# 84, 112, 112, 104, 100, 104, 112, 112, 84
#
# 20, 0, 20, 0, 20,
# 120, 40, 80, 80, 40, 120
# 420, 240, 260, 320, 260, 240, 420
# 1120, 840, 760, 880, 880, 760, 840, 1120
# 1, 0, 1, 0, 1
# 6, 2, 4, 4, 2, 6
# 21, 12, 13, 16, 13, 12, 21
# 56, 42, 38, 44, 44, 38, 42, 56
# 70, 0, 70, 0, 70, 0, 70
# 560, 140, 420, 280, 280, 420, 140, 560
# 252, 0, 252, 0, 252, 0, 252, 0, 252
# 2520, 504, 2016, 1008, 1512, 1512, 1008, 2016, 504, 2520
# 1, 2, 3, 4,
# 1, 3, 6, 10
# 1, 4, 10, 20
# 1, 5, 15, 35
# 1, 6,
# 1, 2, 1
# 1, 3, 3, 1
# 1, 4, 6, 4, 1
# 1, 5, 10, 10, 5, 1
# 1, 6, 15, 20, 15, 6, 1
# 2, 6, 12, 20, 30, 42, 56
# 6, 30, 90, 210, 420
# 20, 140, 560,
# 70
# 1, 3, 6, 10, 15, 21, 28
# 1, 5, 15, 35
def main():
test()
return
N = 5
# print(compute_pseudopascal(10))
# print(compute_pyramids(10))
points = []
for i in range(0, 2 ** N):
points.append(decode(i, N))
bands = [[[] for _ in range(0, N + 1)] for _ in range(0, len(points))]
for i in range(0, len(points)):
a = points[i]
for j in range(0, len(points)):
if i == j:
continue
b = points[j]
distance = hamming_distance(a, b)
bands[i][distance].append(b)
golden_incoherent_distances = None
golden_total_distances = None
golden_incoherent_bands = None
golden_total_bands = None
golden_incoherent_sub_bands = None
golden_total_sub_bands = None
# for t in range(0, len(points)):
for t in range(0, 1):
incoherent_distances = np.zeros((N + 1, N + 1)).astype(np.int32)
total_distances = np.zeros((N + 1)).astype(np.int32)
if t == 0:
golden_incoherent_distances = incoherent_distances
golden_total_distances = total_distances
incoherent_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32)
total_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
if t == 0:
golden_incoherent_bands = incoherent_bands
golden_total_bands = total_bands
incoherent_sub_bands = np.zeros((N + 1, N + 1, N + 1, N + 1)).astype(np.int32)
total_sub_bands = np.zeros((N + 1, N + 1, N + 1)).astype(np.int32)
if t == 0:
golden_incoherent_sub_bands = incoherent_sub_bands
golden_total_sub_bands = total_sub_bands
# print(t)
for k in range(1, N + 1):
# print(k, '================================')
x_a = points[t]
y_a = xor(x_a, k)
for distance in range(0, N + 1):
# print('distance', distance)
band = bands[t][distance]
for x_b in band:
y_b = xor(x_b, k)
if k == 1:
total_distances[distance] += 1
if y_a != y_b:
incoherent_distances[k][distance] += 1
if len(band) < 2:
continue
for band_origin in range(0, len(band)):
x_p = band[band_origin]
y_p = xor(x_p, k)
sub_bands = [[] for _ in range(0, N + 1)]
for i in range(0, len(band)):
if i == band_origin:
continue
x_q = band[i]
y_q = xor(x_q, k)
band_distance = hamming_distance(x_p, x_q)
if k == 1:
total_bands[distance][band_distance] += 1
if y_p != y_q:
incoherent_bands[k][distance][band_distance] += 1
sub_bands[band_distance].append(x_q)
# incoherent_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
# total_sub_bands = np.zeros((N + 1, N + 1)).astype(np.int32)
for band_distance in range(0, N + 1):
sub_band = sub_bands[band_distance]
if len(sub_band) < 2:
continue
for sub_band_origin in range(0, len(sub_band)):
x_u = sub_band[sub_band_origin]
y_u = xor(x_u, k)
for i in range(0, len(sub_band)):
if i == sub_band_origin:
continue
x_v = sub_band[i]
y_v = xor(x_v, k)
sub_band_distance = hamming_distance(x_v, x_u)
if k == 1:
total_sub_bands[band_distance][sub_band_distance] += 1
if y_u != y_v:
incoherent_sub_bands[k][distance][band_distance][sub_band_distance] += 1
# print(incoherent_sub_bands)
# print(total_sub_bands)
# print('==========================')
if t != 0:
if not np.array_equal(golden_incoherent_sub_bands, incoherent_sub_bands):
print(golden_incoherent_sub_bands)
print(incoherent_sub_bands)
raise Exception('Not symmetric')
if not np.array_equal(golden_incoherent_bands, incoherent_bands):
print(golden_incoherent_bands)
print(incoherent_bands)
raise Exception('Not symmetric')
# print(incoherent_bands)
# print(total_bands)
# print(distance, hamming_distance(x_p, x_q), y_p, y_q)
if not np.array_equal(golden_incoherent_distances, incoherent_distances):
print(golden_incoherent_distances)
print(incoherent_distances)
raise Exception('Not symmetric')
# print(golden_total_distances)
# print(golden_incoherent_distances)
# print(golden_total_bands)
# print(golden_incoherent_bands)
# print(golden_total_bands)
p = np.ones((2 ** N, N + 1))
for sample_size in range(0, 2 ** N):
for k in range(0, N + 1):
for d1 in range(0, N + 1):
if golden_total_distances[d1] == 0:
continue
m = golden_total_distances[d1]
j = golden_incoherent_distances[k][d1]
n = min(sample_size, m)
l = int(n * j / m)
p[sample_size][k] *= p_bernoulli(n, l, m, j)
print(np.around(p, 2))
p = np.ones((4 ** N, N + 1))
for sample_size in range(0, 4 ** N):
for k in range(0, N + 1):
for d1 in range(0, N + 1):
for d2 in range(0, N + 1):
if golden_total_bands[d1][d2] == 0:
continue
m = golden_total_bands[d1][d2]
j = golden_incoherent_bands[k][d1][d2]
n = min(sample_size, m)
l = int(n * j / m)
p[sample_size][k] *= p_bernoulli(n, l, m, j)
print(np.around(p, 3))
# p = np.ones((N + 1))
# for k in range(0, N + 1):
# for d1 in range(0, N + 1):
# for d2 in range(0, N + 1):
# if golden_total_bands[d1][d2] == 0:
# continue
# partial = golden_incoherent_bands[k][d1][d2] / golden_total_bands[d1][d2]
# p[k] *= max(partial, 1 - partial)
# print(p)
# p = np.ones((N + 1))
# for k in range(0, N + 1):
# for d1 in range(0, N + 1):
# for d2 in range(0, N + 1):
# for d3 in range(0, N + 1):
# if golden_total_sub_bands[d1][d2][d3] == 0:
# continue
# partial = golden_incoherent_sub_bands[k][d1][d2][d3] / golden_total_sub_bands[d1][d2][d3]
# p[k] *= max(partial, 1 - partial)
# print(p)
# print(bands)
if __name__ == "__main__":
main()