import hashlib
import math
from statistics import median, stdev
import numpy as np

def count_one_bits(n):
    return bin(n).count("1")

def compute_distance(a, b):
    distance = count_one_bits(a ^ b)
    # return 1 / (8 ** distance)
    if distance == 0:
        return 0
    # return 1 / (64 ** (distance - 1))
    return  distance

def xor_n(n):
    return count_one_bits(n) % 2

def sha_n(n):
    m = hashlib.sha256()
    m.update(str(n).encode("utf-8"))
    result = m.digest()
    return result[0] & 0b1

def compute_distances(N):
    return [[compute_distance(i, j) for j in range(N)] for i in range(N)]

def compute_nn_probabilities(i, knowns, distances):
    total = 0.0
    total_zero = 0.0
    total_one = 0.0
    for known in knowns:
        j = known[0]
        if i == j:
            continue
        distance = distances[i][j]
        total += distance
        if known[1] == 0:
            total_zero += distance
        else:
            total_one += distance
    p_zero = total_zero / total
    p_one = total_one / total
    return (p_zero, p_one)

def interpolate_probabilities(i, knowns, distances, probabilities, dim):
    total = 0.0
    total_dim = [0.0] * dim
    for known in knowns:
        j = known[0]
        if i == j:
            continue
        distance = distances[i][j]
        total += distance
        probability = probabilities[j]
        for index in range(dim):
            total_dim[index] += distance * probability[index]
    for index in range(dim):
        total_dim[index] /= total
    return total_dim

def compute_est_coherence(i, knowns, coherences, distances):
    total = 0.0
    coherence = 0.0
    for known in knowns:
        j = known[0]
        distance = distances[i][j]
        total += distance
        coherence += distance * coherences[j]
    return coherence / total

def compute_est_coherences(N, knowns, distances):
    nn_probabilities = [None for i in range(N)]
    nn_correct_probabilities = [None for i in range(N)]
    coherences = []

    for known in knowns:
        i = known[0]
        nn_probabilities[i] = compute_nn_probabilities(i, knowns, distances)
    
    # for i in range(len(nn_probabilities)):
    #     if not nn_probabilities[i] is None:
    #         continue
    #     nn_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_probabilities, 2)
    
    for known in knowns:
        i = known[0]
        nn_correct_probabilities[i] = [nn_probabilities[i][known[1]]]

    # for i in range(len(nn_correct_probabilities)):
    #     if not nn_correct_probabilities[i] is None:
    #         continue
    #     nn_correct_probabilities[i] = interpolate_probabilities(i, knowns, distances, nn_correct_probabilities, 1)

    coherences_0 = []
    coherences_1 = []
    for known_i in knowns:
        i = known_i[0]
        coherence = 0.0
        total = 0.0
        for known_j in knowns:
            j = known_j[0]
            if i == j:
                continue

            distance = distances[i][j]
            total += distance

            nn_p_i_0 = nn_probabilities[i][0]
            nn_p_i_1 = nn_probabilities[i][1]
            nn_c_p_i = nn_correct_probabilities[i][0]

            nn_p_j_0 = nn_probabilities[j][0]
            nn_p_j_1 = nn_probabilities[j][1]
            nn_c_p_j = nn_correct_probabilities[j][0]

            p_i_0 = nn_p_i_0 * nn_c_p_i + nn_p_i_1 * (1 - nn_c_p_i)
            p_i_1 = nn_p_i_1 * nn_c_p_i + nn_p_i_0 * (1 - nn_c_p_i)

            p_j_0 = nn_p_j_0 * nn_c_p_j + nn_p_j_1 * (1 - nn_c_p_j)
            p_j_1 = nn_p_j_1 * nn_c_p_j + nn_p_j_0 * (1 - nn_c_p_j)

            coherence += distance * (p_i_0 * p_j_0 + p_i_1 * p_j_1)
        coherences.append(coherence / total)
        if known_i[1] == 0:
            coherences_0.append(coherence / total)
        else:
            coherences_1.append(coherence / total)

    return coherences

def score(coherences, knowns, distances):
    # while len(coherences) > 1:
    #     coherences = [(coherences[i] + coherences[i + 1]) / 2 for i in range(0, len(coherences), 2)]
    # return coherences[0]

    # return median(coherences)
    # return sum(coherences) / len(coherences)
    if len(coherences) == 0:
        return 1.0
    numerator_0 = 0.0
    denominator_0 = 0.0
    numerator_1 = 0.0
    denominator_1 = 0.0
    count_0 = 0.0
    count_1 = 0.0
    for i in range(len(knowns)):
        weight = 0
        for j in range(len(knowns)):
            weight += distances[knowns[i][0]][knowns[j][0]]
        print(weight, end=' ')
        if knowns[i][1] == 0:
            denominator_0 += weight
            numerator_0 += weight * coherences[i]
            count_0 += 1
        else:
            denominator_1 += weight
            numerator_1 += weight * coherences[i]
            count_1 += 1
    # print()
    if count_0 == 0 or count_1 == 0:
        return 1.0

    # return ((sum(coherences[0]) / len(coherences[0])) + (sum(coherences[1]) / len(coherences[1]))) / 2.0
    # return (sum(coherences[0]) + sum(coherences[1])) / (len(coherences[0]) + len(coherences[1]))
    # div_0 = (numerator_0 / denominator_0 if denominator_0 > 0 else 1.0) * 0.5
    # div_1 = (numerator_1 / denominator_1 if denominator_1 > 0 else 1.0) * 0.5
    # return div_0 + div_1
    # aligned = 1.0 - abs(0.5 - max(count_0 / (count_0 + count_1), count_1 / (count_0 + count_1)))
    # return ((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) * (aligned ** 0.1)
    # return (((numerator_0 + numerator_1) / (denominator_0 + denominator_1)) + 0.12 * aligned) * (1.0 / 1.12)
    return (numerator_0 + numerator_1) / (denominator_0 + denominator_1)

def xor_by_index(knowns, index, reverse=False):
    mask = 1 << index
    knowns = knowns[:]
    for i in range(len(knowns)):
        known = knowns[i]
        if known[0] & mask or (not (known[0] & mask) and reverse):
            knowns[i] = (known[0], known[1] ^ 1)
    return knowns

def flip(n, index):
    return n ^ (1 << index)

def matrix_from_knowns(knowns, N):
    S = 2 ** N
    mat = np.zeros((S, S))
    val = np.zeros(S)
    unknowns = set([i for i in range(0, S)])
    for (i, value) in knowns:
        mat[i][i] = 1.0
        val[i] = value
        unknowns.remove(i)
    for i in unknowns:
        mat[i][i] = -1.0
        for j in range(0, N):
            mat[i][flip(i,j)] = 1.0 / N
    return (mat, val)

def compute_splits(knowns, N):
    splits = []
    for i in range(0, N):
        mask = 1 << i
        left_0 = 0
        left_1 = 0
        right_0 = 0
        right_1 = 0
        for (j, value) in knowns:
            if j & mask == 0:
                if value == 0:
                    left_0 += 1
                else:
                    left_1 += 1
            else:
                if value == 0:
                    right_0 += 1
                else:
                    right_1 += 1
        print((left_0, left_1), (right_0, right_1))
        left_ratio = min(left_0, left_1) / (left_0 + left_1)
        right_ratio = min(right_0, right_1) / (right_0 + right_1)
        # print(left_ratio, right_ratio)
        splits.append((left_ratio + right_ratio) / 2)
    return splits

def compute_coherence(knowns, N):
    S = 2 ** N
    # (mat, val) = matrix_from_knowns(knowns, N)
    # solution = np.linalg.inv(mat).dot(val)
    # for it in range(0, 1000):
    #     next = np.zeros(len(solution))
    #     for i in range(0, len(solution)):
    #         sum = 0.0
    #         for j in range(0, N):
    #             sum += solution[flip(i,j)]
    #         next[i] = sum / N
    #     solution = next
    # return 0.0

    # coherence_0 = 0.0
    # coherence_1 = 0.0
    # zeros = 0.0
    # ones = 0.0
    # lowest = 1.0
    # print()
    (mat, val) = matrix_from_knowns(knowns, N)
    A = np.linalg.inv(mat).dot(val)
    knowns_nn = []
    for known_index in range(0, len(knowns)):
        (mat, val) = matrix_from_knowns(knowns[:known_index] + knowns[known_index + 1:], N)
        solution = np.linalg.inv(mat).dot(val)
        (i, value) = knowns[known_index]
        value_nn = solution[i]
        knowns_nn.append((i, value_nn))
    (mat, val) = matrix_from_knowns(knowns_nn, N)
    B = np.linalg.inv(mat).dot(val)
    return 1.0 - (sum(abs(A - B)) / len(A))
    # # print(A)
    # # print(B)
    # A_sub_B = A - B
    # print(A)
    # print(B)
    # print(A)
    # print(B)
    # print(np.dot(A, B) / len(A))
    # return 1.0 - (np.dot(A_sub_B, A_sub_B) / len(A))
        # print(i, value, value_nn, partial)
        # coherence += ((value * value_nn) + ((1 - value) * (1 - value_nn))) / len(knowns)
        # if value == 0:
        #     coherence_0 += partial
        #     zeros += 1
        # else:
        #     coherence_1 += partial
        #     ones += 1
    # if zeros == 0 or ones == 0:
    #     return 1.0    
    # return 0.5 * coherence_0 / zeros + 0.5 * coherence_1 / ones

    # coherences = np.zeros(S)
    # (mat, val) = matrix_from_knowns(knowns, N)
    # solution = np.linalg.inv(mat).dot(val)
    # print(solution)
    # for i in range(0, S):
    #     p = solution[i]
    #     coherence = 0.0
    #     for j in range(0, N):
    #         q = solution[flip(i,j)]
    #         coherence += ((p * q) + ((1 - p) * (1 - q))) / N
    #     coherences[i] = coherence
    # print(coherences)
    # return sum(coherences) / len(coherences)

def compute_split_knowns(knowns, N):
    sum = 0
    splits = []
    for i in range(0, N):
        mask = 1 << i
        left = []
        right = []
        for (j, value) in knowns:
            k = (j & ((1 << i) - 1)) | ((j & ~((1 << (i + 1)) - 1)) >> 1)
            masked_known = (k, value)
            if j & mask == 0:
                left.append(masked_known)
            else:
                right.append(masked_known)
        left_coherence = compute_coherence(left, N - 1)
        right_coherence = compute_coherence(right, N - 1)
        splits.append((left_coherence, right_coherence))
        sum += min(left_coherence, right_coherence) * (1.0 - abs(left_coherence - right_coherence))
    # print()
    # print(splits)
    # print()
    return sum / N

def remove_bit(i, n):
    return (i & ((1 << n) - 1)) | ((i & ~((1 << (n + 1)) - 1)) >> 1)

def compute_split_knowns_r(knowns, N):
    if len(knowns) == 0:
        raise ValueError('This should never happen')

    hist = np.zeros(N)
    for i in range(0, N):
        mask = 1 << i
        for (j, value) in knowns:
            if j & mask == 0:
                hist[i] += 1

    constant_bits = []
    for i in range(0, N):
        if hist[i] == 0 or hist[i] == len(knowns):
            constant_bits.append(i)

    if len(constant_bits) > 0:
        constant_bits.reverse()
        for n in constant_bits:
            reduced_knowns = []
            for (j, value) in knowns:
                reduced_knowns.append((remove_bit(j, n), value))
            knowns = reduced_knowns
        return compute_split_knowns_r(knowns, N - len(constant_bits))

    if len(knowns) == 1:
        return 1.0
    if len(knowns) == 2:
        if knowns[0][1] == knowns[1][1]:
            return 1.0
        else:
            return 0.0

    sum = 0
    for i in range(0, N):
        mask = 1 << i
        left = []
        right = []
        for (j, value) in knowns:
            k = remove_bit(j, i)
            masked_known = (k, value)
            if j & mask == 0:
                left.append(masked_known)
            else:
                right.append(masked_known)

        # left_correctness = max(left_0_count, left_1_count) / (left_0_count + left_1_count) if left_0_count > 0 and left_1_count > 0 else 1.0
        # right_correctness = max(right_0_count, right_1_count) / (right_0_count + right_1_count) if right_0_count > 0 and right_1_count > 0 else 1.0
        left_coherence = compute_split_knowns_r(left, N - 1)
        right_coherence = compute_split_knowns_r(right, N - 1)
        evenness = min(left_coherence, right_coherence) / max(left_coherence, right_coherence) if left_coherence > 0 and right_coherence > 0 else 1.0
        # sum += min(left_coherence, right_coherence) * (evenness ** 2)
        # delta = 1.0 - ((left_coherence - right_coherence) * (left_coherence - right_coherence))
        sum += 0.7 * min(left_coherence, right_coherence) + 0.3 * evenness ** 2
        # sum += min(left_coherence, right_coherence) * (1.0 - abs(left_coherence - right_coherence))
    return sum / N

def main():
    N = 8
    S = 2 ** N
    distances = compute_distances(S)

    knowns = [(i, sha_n(i)) for i in [
        0, 1, 2, 3, 4, 5, 6, 7
        # 0, 3, 4, 5, 7
        # 3, 5, 6, 10, 12, 14
        # 1, 3, 7, 10, 14, 15
        # 0, 3, 5, 6, 10, 11, 12
        # 0, 3, 5, 6, 10, 11, 12, 24, 30
        # 0, 3, 5, 6, 10, 11, 12, 24, 30, 52, 63, 255, 243, 127
        # 128, 131, 248, 0, 7, 13, 17, 19
    ]]

    # best_coherence = compute_coherence(knowns, N)
    best_coherence = compute_split_knowns_r(knowns, N)
    print(best_coherence)
    print(knowns)
    print()
    while best_coherence < 1.0:
        best_index = -1
        best_reverse = False
        # best_coherence = 0
        for i in range(0, N):
            for reverse in [False, True]:
                mutated_knowns = xor_by_index(knowns, i, reverse)
                # coherence = compute_coherence(mutated_knowns, N)
                coherence = compute_split_knowns_r(mutated_knowns, N)
                print(i, reverse, coherence)
                if coherence > best_coherence:
                    best_coherence = coherence
                    best_index = i
                    best_reverse = reverse
        if best_index < 0:
            break
        knowns = xor_by_index(knowns, best_index, best_reverse)
        print()
        print(best_index, best_reverse, best_coherence)
        print(knowns)
        print()
    print(knowns)

    # for known_i in knowns:
    #     i = known_i[0]
    #     for known_j in knowns:
    #         j = known_j[0]
    #         print(distances[i][j], end=' ')
    #     print()

    # print(knowns)
    # print()

    # coherences = compute_est_coherences(N, knowns, distances)
    # best_coherence = score(coherences, knowns, distances)
    # print(best_coherence)

    # flipped = []
    # while best_coherence < 1.0:
    #     print()
    #     # print(knowns)
    #     # print()
    #     best_index = -1
    #     # best_coherence = 0
    #     for i in range(0, n):
    #         if i in flipped:
    #             continue
    #         mutated_knowns = xor_by_index(knowns, i)
    #         coherences = compute_est_coherences(N, mutated_knowns, distances)
    #         coherence = score(coherences, mutated_knowns, distances)
    #         # print(coherence)
    #         print(coherence, end=' ')
    #         print(mutated_knowns)
    #         if coherence > best_coherence:
    #             best_coherence = coherence
    #             best_index = i
    #     if best_index < 0:
    #         break
    #     knowns = xor_by_index(knowns, best_index)
    #     # flipped.append(best_index)
    #     print(knowns)


if __name__ == "__main__":
    main()