import hashlib
import math
import numpy as np
import random
import secrets
from struct import pack, pack_into, unpack_from

def bit_at_index(buffer, index):
    offset = (index >> 3) % len(buffer)
    return buffer[offset] & (1 << (index & 0b111)) != 0

def count_one_bits(n):
    return bin(n).count("1")

def hamming_distance(a, b):
    distance = 0
    for i in range(0, len(a)):
        distance += count_one_bits(a[i] ^ b[i])
    return distance

def xor_n(n):
    return count_one_bits(n) % 2

def sha(x):
    m = hashlib.sha256()
    m.update(x)
    result = m.digest()
    return result[0] & 0b1

def apply_flips(samples, inputs, flips):
    samples = samples[:]
    for i in range(len(samples)):
        (key, old_value) = samples[i]
        new_value = old_value
        for index in flips:
            if bit_at_index(inputs[key], index):
                new_value = new_value ^ 1
        if not new_value == old_value:
            samples[i] = (key, new_value)
    return samples

def coherence_for_knowns(knowns, distances, N):
    if len(knowns) == 1:
        return 1.0
    coherences = []
    for i in range(0, len(knowns)):
        (a_key, a_value) = knowns[i]
        numerator = 0
        denominator = 0
        for j in range(0, len(knowns)):
            if i == j:
                continue
            (b_key, b_value) = knowns[j]
            distance = distances[a_key][b_key]
            weight = 1.0 / (2 ** distance)
            denominator += weight
            if a_value == b_value:
                numerator += weight
        coherence = numerator / denominator if denominator > 0 else 0
        coherences.append(coherence)
    return sum(coherences) / len(coherences)

def iterate_indices(indices, N):
    carry_index = -1
    for i in range(0, len(indices)):
        j = len(indices) - i - 1
        if indices[j] + 1 + i < N:
            carry_index = j
            break
    if carry_index < 0:
        return None
    base_value = indices[carry_index]
    for i in range(0, len(indices) - carry_index):
        new_value = base_value + i + 1
        if new_value >= N:
            return None
        indices[carry_index + i] = new_value
    return indices

def compute_indices(samples, inputs, N):
    zero_buckets = [False for i in range(0, N)]
    one_buckets = [False for i in range(0, N)]
    for (key, _) in samples:
        for index in range(0, N):
            if bit_at_index(inputs[key], index):
                one_buckets[index] = True
            else:
                zero_buckets[index] = True
    return [index for index in range(0, N) if zero_buckets[index] and one_buckets[index]]

def compute_distances(inputs, distances):
    for i in range(0, len(inputs)):
        a = inputs[i]
        for j in range(i, len(inputs)):
            b = inputs[j]
            distance = hamming_distance(a, b) if j != i else 0
            distances[i][j] = distance
            distances[j][i] = distance

def reduce(samples, inputs, distances, N):
    available_indices = compute_indices(samples, inputs, N)
    flips = []
    best_coherence = coherence_for_knowns(samples, distances, N)
    # print(best_coherence)
    # print(knowns)
    # print()
    depth = 1
    while depth <= len(available_indices) and depth < 2:
        while best_coherence < 1.0:
            best_flip = None
            try_indices = [i for i in range(0, depth)]
            while not try_indices is None:
                try_flip = [available_indices[i] for i in try_indices]
                mutated_samples = apply_flips(samples, inputs, try_flip)
                coherence = coherence_for_knowns(mutated_samples, distances, N)
                # print(try_flip, coherence)
                if coherence > best_coherence:
                    best_coherence = coherence
                    best_flip = try_flip
                try_indices = iterate_indices(try_indices, len(available_indices))

            if best_flip is None:
                depth += 1
                # print(depth)
                break
            samples = apply_flips(samples, inputs, best_flip)
            flips += best_flip
            available_indices = [index for index in available_indices if index not in best_flip]
            depth = 1
            # print()
            # print(best_flip, best_coherence)
            # print(knowns)
            # print()
            # print(depth)
            if len(available_indices) == 0:
                break
        if best_coherence == 1.0:
            break
    return (samples, best_coherence, flips)

def dominant_value(knowns, M=2):
    buckets = [0 for i in range(0, M)]
    for (_, value) in knowns:
        buckets[value] += 1
    return buckets.index(max(buckets))

def solve(samples, inputs, distances, N):
    (samples, coherence, flips) = reduce(samples, inputs, distances, N)
    if coherence == 1.0:
        inverted = samples[0][1]
        return (inverted, flips, None)
    
    identity = dominant_value(samples)
    left = [(key, 1) for (key, value) in samples if value != identity]
    right = [(key, 1) for (key, value) in samples if value != identity]
    for (key, value) in samples:
        if value == identity:
            if random.random() > 0.5:
                left.append((key, 0))
            else:
                right.append((key, 0))

    return (0, flips, (identity, solve(left, inputs, distances, N), solve(right, inputs, distances, N)))

def evaluate(model, x, value = 0):
    (inverted, flips, child) = model
    for i in flips:
        if bit_at_index(x, i) != 0:
            value ^= 1
    if not child is None:
        (identity, left_child, right_child) = child
        left = evaluate(left_child, x)
        right = evaluate(right_child, x)
        if left & right != identity:
            value ^= 1
    if inverted:
        value ^= 1
    return value

def transform(x, layers):
    x[0] = 0
    for layer in layers:
        prefix = 0
        for i in range(0, len(layer)):
            model = layer[i]
            value = evaluate(model, x)
            prefix <<= 1
            prefix |= value
        x[0] = prefix

def encode_f(f, buffer, offset=0):
    (inverted, flips, residual) = f
    pack_into('B', buffer, offset, inverted)
    offset += 1
    for index in flips:
        pack_into('B', buffer, offset, 0)
        offset += 1
        pack_into('I', buffer, offset, index)
        offset += 4
    if residual is None:
        pack_into('B', buffer, offset, 1)
        offset += 1
        return offset
    (inverted, left, right) = residual
    pack_into('B', buffer, offset, 2 if not inverted else 3)
    offset += 1
    offset = encode_f(left, buffer, offset)
    offset = encode_f(right, buffer, offset)
    return offset

def decode_f(buffer, offset = 0):
    [inverted] = unpack_from('B', buffer, offset)
    offset += 1
    inverted &= 0b1
    flips = []
    while offset < len(buffer):
        [opcode] = unpack_from('B', buffer, offset)
        offset += 1
        opcode &= 0b11
        if opcode == 0:
            [index] = unpack_from('I', buffer, offset)
            offset += 4
            flips.append(index)
        elif opcode == 1:
            return (offset, (inverted, flips, None))
        else:
            (offset, left) = decode_f(buffer, offset)
            (offset, right) = decode_f(buffer, offset)
            gate_inverted = 0 if opcode == 2 else 1
            return (offset, (gate_inverted, flips, (left, right)))
    return (offset, (inverted, [], None))

def random_input():
    return bytearray(1) + secrets.token_bytes(3)

def main():
    N = 32
    S = 2 ** N
    train_size = 64
    test_size = 1000
    f = sha
    num_epochs = 4
    num_layers = 7
    layers_samples = []
    layers = []
    score = 0.5
    distances = np.zeros((train_size, train_size))

    for epoch in range(0, num_epochs):
        layer = []
        layer_samples = []
        total_correct = 0.0
        layer_index = 0
        total_difficulty = 0
        difficulty = 0
        while layer_index < num_layers:
            inputs = []
            samples = []
            raw_samples = []
            for i in range(0, train_size):
                x = random_input()
                y = f(x)
                transform(x, layers)
                inputs.append(x)
                samples.append((i, y))
                raw_samples.append((x, y))

            compute_distances(inputs, distances)
            model = solve(samples, inputs, distances, N)
            # print(model)
            # encoded = bytearray(1024)
            # offset = encode_f(model, encoded)
            # decoded_model = decode_f(encoded)
            # print()
            # print(decoded_model)

            # correct = 0
            # for (x, y) in samples:
            #     if evaluate(model, inputs[x]) == y:
            #         correct += 1
            # print(str(correct) + "/" + str(train_size))
            
            correct = 0
            for _ in range(0, test_size):
                x = random_input()
                y = f(x)
                transform(x, layers)
                if evaluate(model, x) == y:
                    correct += 1
            difficulty += 1
            local_score = correct / test_size
            if local_score < score - 0.0001 * difficulty:
                continue
            # print_score = round(local_score * 10000.0) / 100.0
            # print('Layer ' + str(layer_index) + ': ' + str(candidates) + ' ' + str(print_score) + '%')
            layer_index += 1
            total_correct += correct
            total_difficulty += difficulty
            difficulty = 0
            layer.append(model)
            layer_samples.append(raw_samples)
        score = total_correct / (test_size * num_layers)
        average_difficulty = round(total_difficulty * 100.0 / num_layers) / 100.0
        print_score = round(score * 10000.0) / 100.0
        print('Epoch ' + str(epoch) + ': ' + str(average_difficulty) + ' ' + str(print_score) + '%')
        layers.append(layer)
        layers_samples.append(layer_samples)

if __name__ == "__main__":
    main()