import bisect
from cmath import isnan
from email.mime import base
import matplotlib.pyplot as plt
import hashlib
import math
import numpy as np
import random
import statistics
from math import comb
from pprint import pprint

from pkg_resources import get_distribution
from scipy import optimize, stats
from astropy import modeling

def encode(v):
    byte_values = []
    for i in range(0, math.ceil(len(v) / 8)):
        x = 0
        for j in range(0, 8):
            index = i * 8 + j
            if index >= len(v):
                continue
            x <<= 1
            x |= int(v[index])
        byte_values.append(x)
    return bytearray(byte_values)

def sha(v):
    x = encode(v)
    m = hashlib.sha256()
    m.update(x)
    result = m.digest()
    return result[0] & 0b1

def xor(v):
    return np.sum(v[2:]) % 2

def hamming_distance(a, b, scratch):
    np.logical_xor(a, b, scratch)
    return sum(scratch)

def index_hash(indices):
    return ','.join([str(index) for index in sorted(indices)])

def bin_div(a, b):
    if a == 0 and b == 0:
        return 2
    if a == 1 and b == 0:
        return -1
    if a == 0 and b == 1:
        return 0
    return 1

class Candidate():
    def __init__(self, indices):
        self.indices = indices[:]
        self.uplift = 0

    def evaluate(self, x):
        if len(x) in self.indices:
            return 0
        value = 1
        for index in self.indices:
            value *= x[index]
        return value

    def id(self):
        return index_hash(self.indices)

    def eval_str(self):
        parts = []
        for index in self.indices:
            parts.append('x[' + str(index) + ']')
        return '*'.join(parts)

class Probabilities():
    def __init__(self):
        self.N = 16
        self.actual_N = self.N * 2
        self.num_terms = 1
        self.num_candidates = 100
        # self.sample_size = self.N ** 2
        self.sample_size = 1024
        self.p = np.zeros((self.actual_N + 1,))
        self.p_temp = np.empty_like(self.p)
        self.next_p = np.empty_like(self.p)
        self.knowns = []
        self.stops = set()
        self.reset_p()
        self.epoch = 0

        self.inputs = np.zeros((self.sample_size, self.actual_N)).astype(np.int32)
        self.raw_inputs = np.zeros((self.sample_size, self.N)).astype(np.int32)
        self.masked_distances = np.zeros((self.sample_size, self.sample_size))
        self.distances = np.zeros((self.sample_size, self.sample_size))
        self.xor_square = np.zeros((self.sample_size, self.sample_size))
        self.nn = np.zeros((self.sample_size, self.sample_size)).astype(np.int32)
        self.nn_distances = np.zeros((2, self.sample_size)).astype(np.int32)
        self.base_outputs = np.zeros((self.sample_size)).astype(np.int32)
        self.outputs = np.zeros((self.sample_size)).astype(np.int32)
        self.expected_outputs = np.zeros((self.sample_size)).astype(np.int32)
        self.base_output_xor = np.zeros((self.sample_size)).astype(np.int32)
        self.output_xor = np.zeros((self.sample_size)).astype(np.int32)
        self.mask = np.zeros((self.sample_size))
        self.numerators = np.zeros((self.sample_size))
        self.denominators = np.zeros((self.sample_size))
        self.coherences = np.zeros((self.sample_size))
        self.max_coherences = np.zeros((self.actual_N + 1))
        self.max_candidates = [None for _ in range(0, self.actual_N)]
        self.uplifts = np.zeros((self.actual_N))
        self.uplift_means = np.zeros((self.actual_N))
        self.uplift_medians = np.zeros((self.actual_N))
        self.uplift_convergences = np.zeros((self.actual_N))
        # self.subspace_uplift_samples = [[] for _ in range(0, self.actual_N)]
        self.superspace_uplift_samples = []
        self.subspace_uplifts = np.zeros((self.actual_N))
        self.uplift_ranges = [[0, 0] for _ in range(0, self.actual_N)]
        self.uplift_stddevs = np.zeros((self.actual_N))

        self.base_coherences = np.zeros((self.sample_size))
        self.offset_coherences = np.zeros((self.sample_size))

        self.last_index = -1
        self.last_pvalue = -1
        self.left_half = True

        self.samples = 10
        self.num_bins = 1000
        # self.samples = 200
        self.base_coherence_samples = np.zeros((self.samples))
        self.coherence_samples = np.zeros((self.actual_N, self.samples))
        self.subspace_uplift_samples = np.zeros((self.actual_N, self.samples))
        self.subspace_uplift_weights = np.zeros((self.actual_N, self.samples))

        self.layers = []
        self.layer_confidence = {}
        self.base = None

        self.scratch = np.zeros((self.N,))
        
        self.last_value = -1
        self.rounds = 0
        self.average_delta_over_null = 0
        self.visited = set()

        self.candidate_pool = []
        self.candidate_ids = set()
        self.has_added_layer = False

    def randomize_inputs(self):
        for i in range(0, self.sample_size):
            for j in range(0, self.N):
                val = random.randint(0, 1)
                self.raw_inputs[i][j] = val
                self.inputs[i][j * 2] = val
                self.inputs[i][j * 2 + 1] = val ^ 1

    def populate_distances(self):
        self.nn.fill(-1)
        self.nn_distances.fill(-1)
        for i in range(0, len(self.raw_inputs)):
            x_a = self.raw_inputs[i]
            for j in range(0, len(self.raw_inputs)):
                if i == j:
                    continue
                x_b = self.raw_inputs[j]
                distance = hamming_distance(x_a, x_b, self.scratch)
                if (self.nn_distances[0][i] < 0 or distance < self.nn_distances[0][i]) and distance > 0:
                    self.nn_distances[0][i] = distance
                    self.nn_distances[1][i] = 1
                    self.nn[i][0] = j
                elif distance == self.nn_distances[0][i]:
                    count = self.nn_distances[1][i]
                    self.nn_distances[1][i] = count + 1
                    self.nn[i][count] = j
                # self.distances[i][j] = 1.0 / (2 ** (distance - 1)) if distance > 0 else 0
                self.distances[i][j] = distance
                # self.distances[i][j] = 1.0 / (distance ** 12) if distance > 0 else 0

    def compute_expected_outputs(self):
        for i in range(0, len(self.raw_inputs)):
            self.expected_outputs[i] = xor(self.raw_inputs[i])

    def compute_base_outputs(self):
        if self.base is None:
            self.base_outputs.fill(0)
            return
        for i in range(0, len(self.inputs)):
            self.base_outputs[i] = self.base(self.inputs[i])

    def mat_coherence(self):
        np.abs(self.output_xor, self.mask)
        np.subtract(self.output_xor, self.mask, self.mask)
        np.divide(self.mask, 2.0, self.mask)
        np.add(1.0, self.mask, self.mask)

        for i in range(0, len(self.output_xor)):
            for j in range(0, len(self.output_xor)):
                self.xor_square[i][j] = self.output_xor[i] ^ self.output_xor[j] ^ (1 if self.distances[i][j] % 2 == 0 else 0)
                self.masked_distances[i][j] = 1.0 / (2 ** self.distances[i][j])

        # self.xor_square.fill(0)
        # np.copyto(self.masked_distances, self.distances)
        # masked_distances_t = self.masked_distances.transpose()
        # for i in range(0, len(self.xor_square)):
            # self.xor_square[i] = self.output_xor
            # np.multiply(self.masked_distances[i], self.mask, self.masked_distances[i])
            # np.multiply(masked_distances_t[i], self.mask, masked_distances_t[i])
        np.sum(self.masked_distances, axis=0, out=self.denominators)
        # self.xor_square = self.xor_square.transpose()
        # np.logical_xor(self.xor_square, self.output_xor, self.xor_square)
        np.multiply(self.xor_square, self.masked_distances, self.xor_square)
        np.sum(self.xor_square, axis=0, out=self.numerators)
        np.divide(self.numerators, self.denominators, self.coherences)
        mean = np.nanmean(self.coherences)
        if isnan(mean):
            mean = 1.0
        return 1.0 - mean

    def nn_coherence(self):
        for i in range(0, len(self.output_xor)):
            total = 0
            y_a = self.output_xor[i]
            distance = self.nn_distances[0][i]
            count = self.nn_distances[1][i]
            for index in range(0, count):
                j = self.nn[i][index]
                y_b = self.output_xor[j]
                total += 1 if y_a == 1 and y_b == 1 or y_a == 0 and y_b == 0 else 0
            self.coherences[i] = total
            # if distance % 2 == 0:
            #     self.coherences[i] = 1.0 - self.coherences[i]
        return np.mean(self.coherences)

    def coherence(self, outputs=None):
        if outputs is None:
            outputs = self.outputs
        np.logical_xor(outputs, self.expected_outputs, self.output_xor)
        return self.nn_coherence()
        # return self.mat_coherence()
        coherences = []
        for i in range(0, len(self.output_xor)):
            y_a = self.output_xor[i]
            numerator = 0
            denominator = 0
            for j in range(0, len(self.output_xor)):
                if i == j:
                    continue
                y_b = self.output_xor[j]
                weight = self.distances[i][j]
                denominator += weight
                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
                    numerator += weight
            coherence = numerator / denominator if denominator > 0 else 0
            coherences.append(coherence)

        raw_coherence = sum(coherences) / len(coherences)
        check_coherence = self.mat_coherence()

        return raw_coherence

    def div_coherence(self):
        coherences = []
        for i in range(0, len(self.output_xor)):
            y_a = self.output_xor[i]
            if y_a < 0:
                continue
            numerator = 0
            denominator = 0
            for j in range(0, len(self.output_xor)):
                if i == j:
                    continue
                y_b = self.output_xor[j]
                if y_b < 0:
                    continue
                weight = self.distances[i][j]
                denominator += weight
                if y_a == 0 and y_b == 0 or y_a == 1 and y_b == 1:
                    numerator += weight
                # if y_a < 0 or y_b < 0:
                #     numerator += weight
            coherence = numerator / denominator if denominator > 0 else 0
            coherences.append(coherence)
        if len(coherences) == 0:
            return 1.0
        return sum(coherences) / len(coherences)

    def normalize_p(self):
        check = self.knowns[:]
        for i in range(0, len(self.p)):
            if self.p[i] < 0:
                self.p[i] = 0
        for i in range(0, len(self.p)):
            if i in self.knowns:
                flip = i ^ 0b1
                self.p[i] = 0.0
                self.p[flip] = 0.0
            else:
                check.append(i)
                stop_id = index_hash(check)
                check.pop()
                if stop_id in self.stops:
                    self.p[i] = 0.0
        total = np.sum(self.p)
        if total > 0:
            for i in range(0, len(self.p)):
                self.p[i] = self.p[i] / total

    def reset_p(self):
        self.p.fill(1.0)
        self.normalize_p()

    def threshold(self):
        # return (1.0 / (self.num_terms - len(self.knowns))) - (self.epoch / 100)
        return 1.0 - (self.epoch / 1000)

    def get_converged_index(self):
        for i in range(0, len(self.p)):
            if self.p[i] > self.threshold():
                return i
        return None

    def add_layer(self):
        self.has_added_layer = True
        self.add_stop()
        layer = Candidate(self.knowns)
        self.layers.append(layer)
        self.base = self.cache_layers()
        self.knowns.pop()
        self.reset_p()

    def random_sample(self):
        self.randomize_inputs()
        self.populate_distances()
        self.compute_expected_outputs()
        self.compute_base_outputs()
        return self.coherence(self.base_outputs)

    def random_candidate(self):
        indices = self.knowns[:]
        np.copyto(self.p_temp, self.p)
        self.p_temp[self.actual_N] = 0
        total = np.sum(self.p_temp)
        if total == 0:
            return None
        np.divide(self.p_temp, total, self.p_temp)
        for _ in range(0, self.num_terms - len(self.knowns)):
            index = np.random.choice(len(self.p_temp), 1, p=self.p_temp)[0]
            indices.append(index)
            flip = index ^ 0b1
            self.p_temp[index] = 0
            self.p_temp[flip] = 0
            for i in range(0, len(self.p_temp)):
                if i not in indices:
                    indices.append(i)
                    stop_id = index_hash(indices)
                    indices.pop()
                    if stop_id in self.stops:
                        self.p_temp[i] = 0.0
            total = np.sum(self.p_temp)
            if total == 0:
                return None
            np.divide(self.p_temp, total, self.p_temp)
        return Candidate(indices)

    def seed_candidate_pool(self):
        for _ in range(0, self.num_candidates):
            candidate = self.random_candidate()
            if candidate is None:
                continue
            candidate_id = candidate.id()
            if candidate_id in self.candidate_ids:
                continue
            self.candidate_pool.append(candidate)
            self.candidate_ids.add(candidate_id)

    def add_stop(self):
        stop_id = index_hash(self.knowns)
        self.stops.add(stop_id)

    def get_distribution(self, candidate, half = 1):
        count = 0
        for i in range(0, len(self.inputs)):
            value = candidate.evaluate(self.inputs[i])
            if value == half:
                self.output_xor[i] = self.base_outputs[i] ^ self.expected_outputs[i]
                count += 1
            else:
                self.output_xor[i] = -1
        # return (count, self.mat_coherence())
        return (count, self.nn_coherence())

    def err(self, fitted_model, bins, hist):
        err = 0
        for i in range(0, self.num_bins):
            x = bins[i + 1]
            y = hist[i]
            delta = fitted_model(x) - y
            err += delta * delta
        return err / self.num_bins

    def update(self):
        sample = self.epoch
        self.epoch += 1

        base_coherence = self.random_sample()
        np.copyto(self.base_coherences, self.coherences)
        np.copyto(self.base_output_xor, self.output_xor)

        # self.base_coherence_samples[sample] = base_coherence
        candidate = Candidate(self.knowns[:])

        index = -1
        lowest_pvalue = -1
        highest_mode = 0

        fig, axs = plt.subplots(int(self.actual_N / 4), 4)
        x_eval = np.linspace(0, 1.0, num=10000)

        for i in range(0, self.actual_N):
            candidate.indices.append(i)
            try:

                # count_0, subspace_coherence_0 = self.get_distribution(candidate, 0)
                # # count_1, subspace_coherence_1 = self.get_distribution(candidate, 1)
                # # delta = (subspace_coherence_0 - base_coherence) * count_0 / self.sample_size
                # # delta = subspace_coherence_0 - subspace_coherence_1
                # self.subspace_uplift_samples[i][sample] = subspace_coherence_0 - base_coherence
                # self.subspace_uplift_weights[i][sample] = count_0 / self.sample_size
                # # self.subspace_uplift_left_samples[i][sample] = subspace_coherence_0
                # # self.subspace_uplift_right_samples[i][sample] = subspace_coherence_1 - base_coherence

                # if index_hash(candidate.indices) in self.stops:
                #     continue

                for j in range(0, len(self.inputs)):
                    self.outputs[j] = self.base_outputs[j] ^ candidate.evaluate(self.inputs[j])

                coherence = self.coherence()
                np.subtract(self.coherences, self.base_coherences, self.offset_coherences)
                # coherence = sum(self.offset_coherences * self.nn_distances[1] / self.nn_distances[0])

                # result = stats.ttest_rel(self.base_coherences, self.coherences, alternative='less')
                # # print(i, result)

                # pvalue = result.pvalue

                # if pvalue < 0.05 and (pvalue < lowest_pvalue or lowest_pvalue < 0):
                #     index = i
                #     lowest_pvalue = pvalue

                # result = stats.ttest_1samp(self.offset_coherences, 0, alternative='greater', weights=self.nn_distances[0])
                # print(i, result)

                # (hist, bins) = np.histogram(self.offset_coherences, 10)
                # fitter = modeling.fitting.LevMarLSQFitter()
                # model = modeling.models.Gaussian1D()
                # fitted_model = fitter(model, bins[1:], hist, weights=np.divide(1.0, self.nn_distances[0]))
                # axs[int(i/4)][int(i%4)].scatter(bins[1:], hist, s=1, color='r', alpha=0.5)
                # axs[int(i/4)][int(i%4)].plot(x_eval, fitted_model(x_eval), color='r')
                est_num = 0
                est_denom = 0
                # print(self.offset_coherences)
                for j in range(0, len(self.offset_coherences)):
                    # weight = 1.0 / 2 ** self.nn_distances[0][j]
                    if self.offset_coherences[j] == 0:
                        continue
                    weight = 1.0
                    est_num += weight * self.offset_coherences[j]
                    est_denom += weight
                # print(i, est_num / est_denom)
                # mode = est_num / est_denom

                # density = stats.gaussian_kde(self.offset_coherences, weights=1.0 / (2 ** (self.nn_distances[0] - 1)))(x_eval)
                filtered_points = [x for x in self.offset_coherences if x != 0 and x != 1 and x != -1]
                left_half = [x for x in self.offset_coherences if x < 0 and x != -1]
                right_half = [x for x in self.offset_coherences if x > 0 and x != 1]

                left_distances = [self.nn_distances[0][j] for j in range(0, self.sample_size) if self.offset_coherences[j] < 0]
                # left_score = sum([1 if d % 2 == 0 else 0 for d in left_distances]) / len(left_distances)
                right_distances = [self.nn_distances[0][j] for j in range(0, self.sample_size) if self.offset_coherences[j] > 0]
                
                left_counts = {}
                right_counts = {}
                counts = {}
                for j in range(1, self.N):
                    count = sum([1 if d == j else 0 for d in left_distances])
                    counts[j] = 0
                    if count > 0:
                        left_counts[j] = count
                        counts[j] += count
                    count = sum([1 if d == j else 0 for d in right_distances])
                    if count > 0:
                        right_counts[j] = count
                        counts[j] += count
                
                # left_sum = sum([1 if d % 2 == 0 else 0 for d in left_distances])
                right_sum = sum([1 if d % 2 == 0 else 0 for d in right_distances])

                # print(left_sum, right_sum)

                # left_value = (left_sum / len(left_distances)) * (len(left_distances) / (len(left_distances) + len(right_distances))) if len(left_distances) > 0 else 0
                # right_value = (right_sum / len(right_distances)) * (len(right_distances) / (len(left_distances) + len(right_distances))) if len(right_distances) > 0 else 0

                score = 1.0 - (right_sum / len(right_distances)) if len(right_distances) > 3 else 0

                # left_mean = np.mean(left_half)
                # right_mean = np.mean(right_half)
                # print(i, left_mean, right_mean)
                # left_density = stats.gaussian_kde(left_half)(x_eval)
                # right_density = stats.gaussian_kde(right_half)(x_eval)
                # axs[int(i/4)][int(i%4)].plot(x_eval, left_density, color='g')
                # axs[int(i/4)][int(i%4)].plot(x_eval, right_density, color='b')

                # weights = [1.0 / (2 ** self.nn_distances[0][j]) for j in range(0, len(self.offset_coherences)) if self.offset_coherences[j] != 0 and self.offset_coherences[j] != 1 and self.offset_coherences[j] != -1]
                # weights_a = [self.nn_distances[0][j] for j in range(0, len(self.offset_coherences)) if self.offset_coherences[j] != 0 and self.offset_coherences[j] != 1 and self.offset_coherences[j] != -1]
                base = [((1.0 - self.base_coherences[j]) if self.nn_distances[0][j] % 2 == 0 else self.base_coherences[j]) for j in range(0, self.sample_size)]
                # print(i, sum(points))
                modified = [((1.0 - self.coherences[j]) if self.nn_distances[0][j] % 2 == 0 else self.coherences[j]) for j in range(0, self.sample_size)]
                # print(i, sum(points))
                # score = (sum([(-self.offset_coherences[j] if self.nn_distances[0][j] % 2 == 0 else self.offset_coherences[j]) / (self.nn_distances[0][j] ** 2) for j in range(0, self.sample_size) if self.offset_coherences[j] != 0]))
                score = sum([((self.base_coherences[j] - self.coherences[j]) if self.nn_distances[0][j] % 2 == 0 else (self.coherences[j] - self.base_coherences[j])) * (1.0 / comb(int(self.N / 2) + 1, self.nn_distances[0][j])) for j in range(0, self.sample_size)])
                
                # 3 5 7 10 12 14
                total = 0
                unique_inputs = set()
                for j in range(0, self.sample_size):
                    input_id = str(self.raw_inputs[j])
                    if input_id in unique_inputs:
                        continue
                    unique_inputs.add(input_id)
                    buckets = {}
                    for k in range(0, self.sample_size):
                        distance = int(self.distances[j][k])
                        if distance == 0:
                            continue
                        if distance not in buckets:
                            buckets[distance] = [0,0,0,0]
                        base_value = self.base_output_xor[j] ^ self.base_output_xor[k]
                        value = self.output_xor[j] ^ self.output_xor[k]

                        if distance % 2 == 0:
                            if value == 0 and base_value == 0:
                                total += 1
                            if value == 1 and base_value == 0:
                                total -= 1
                            # 1,3
                            if value == 0 and base_value == 1:
                                total -= 1
                            if value == 1 and base_value == 1:
                                total -= 1
                        else:
                            if value == 1 and base_value == 1:
                                total += 1
                            if value == 0 and base_value == 1:
                                total -= 1
                            # 0,2
                            if value == 0 and base_value == 0:
                                total -= 1
                            if value == 1 and base_value == 0:
                                total -= 1
                        
                        if value == 0 and base_value == 0:
                            buckets[distance][0] += 1
                        elif value == 0 and base_value == 1:
                            buckets[distance][1] += 1
                        elif value == 1 and base_value == 0:
                            buckets[distance][2] += 1
                        elif value == 1 and base_value == 1:
                            buckets[distance][3] += 1                            
                        # buckets[distance] += value - base_value
                        # total += ((base_value - value) if distance % 2 == 0 else (value - base_value))
                    if j == 0:
                        print(j, buckets)
                    # pprint(buckets)
                alt_score = total

                # score += alt_score
                # score += (sum([self.offset_coherences[j] * self.nn_distances[1][j] / (2 ** self.nn_distances[0][j]) for j in range(0, self.sample_size)]))

                # alt_score = (sum([self.offset_coherences[j] for j in range(0, self.sample_size)])) / self.sample_size
                # score += alt_score

                # points = [-1.0 * self.offset_coherences[j] * self.nn_distances[1][j] if self.nn_distances[0][j] % 2 == 0 and self.offset_coherences[j] > 0 else self.offset_coherences[j] * self.nn_distances[1][j] for j in range(0, self.sample_size) if self.offset_coherences[j] != 0 and self.offset_coherences[j] != 1 and self.offset_coherences[j] != -1]
                try:
                    density = stats.gaussian_kde(self.base_coherences)(x_eval)
                    density_a = stats.gaussian_kde(self.coherences)(x_eval)
                    # density_a = stats.gaussian_kde(filtered_points, weights = weights_a)(x_eval)
                    axs[int(i/4)][int(i%4)].plot(x_eval, density, color='g')
                    axs[int(i/4)][int(i%4)].plot(x_eval, density_a, color='b')
                except:
                    pass
                # axs[int(i/4)][int(i%4)].scatter(filtered_points, np.zeros_like(filtered_points))
                # left_mode = x_eval[np.argsort(left_density)[-1]]
                # right_mode = x_eval[np.argsort(right_density)[-1]]
                # print(i, left_mode, right_mode)
                # score = sum(points) / len(points)
                # print(i, score)

                # score = coherence
                print(i, score, alt_score, left_counts, right_counts)

                if score > highest_mode:
                    highest_mode = score
                    index = i

                # self.coherence_samples[i][sample] = coherence - base_coherence
                # self.coherence_samples[i][sample] = coherence
            finally:
                candidate.indices.pop()
        
        if index >= 0:
            self.knowns.append(index)
            print(self.knowns, highest_mode)
            self.add_layer()
            self.knowns = []
            print(base_coherence)

        plt.show()
        return


        # if self.epoch >= self.samples:
        #     # for i in range(0, self.actual_N):
        #     #     parameters = stats.norm.fit(self.uplift_samples[i])
        #     #     print(i, parameters)
        #     #     print(i, stats.kstest(self.uplift_samples[i], "norm", parameters))

        #     added = False
        #     # parameters = stats.norm.fit(self.base_coherence_samples)
        #     # (base_mu, _) = parameters

        #     # (hist, bins) = np.histogram(self.base_coherence_samples, self.num_bins, density=True)
        #     # fitter = modeling.fitting.LevMarLSQFitter()
        #     # model = modeling.models.Gaussian1D()
        #     # fitted_model = fitter(model, bins[1:], hist)
        #     # print('Base', fitted_model.mean.value, self.err(fitted_model, bins, hist))

        #     # x = np.linspace(0, 1.0, 10000)
        #     # density = stats.gaussian_kde(self.base_coherence_samples)(x)
        #     # mode = x[np.argsort(density)[-1]]
        #     # print(mode)

        #     # for i in range(0, self.actual_N):
        #     #     count = 0
        #     #     for j in range(0, self.samples):
        #     #         for k in range(0, self.samples):
        #     #             if self.coherence_samples[i][j] > self.base_coherence_samples[k]:
        #     #                 count += 1
        #     #     print(i, count)

        #     try:
        #         index = -1
        #         lowest_index = -1
        #         lowest_pvalue = -1
        #         highest_index = -1
        #         highest_pvalue = -1
        #         best_pvalue = -1
        #         pvalue_sum = 0
        #         pvalue_denom = 0
        #         is_subspace = False
                
        #         for i in range(0, self.actual_N):
        #             if i in self.knowns:
        #                 continue
        #             try:


        #                 result = stats.ttest_1samp(self.coherence_samples[i], 0, alternative='greater')
        #                 print(i, result)
        #                 # (hist, bins) = np.histogram(self.coherence_samples[i], 20, range=(-0.01, 0.01))
        #                 # total = 0
        #                 # for j in range(0, 20):
        #                 #     total += hist[j] * (bins[j] + bins[j + 1]) / 2
        #                 # mode = total / sum(hist)

        #                 # fitter = modeling.fitting.LevMarLSQFitter()
        #                 # model = modeling.models.Gaussian1D()
        #                 # fitted_model = fitter(model, bins[1:], hist)
        #                 # mode = fitted_model.mean.value
        #                 # print(i, total)

        #                 # result = stats.kstest(self.base_coherence_samples, self.coherence_samples[i], alternative='greater')
        #                 # print(i, result)
        #                 # value = result.pvalue * (1 - result.statistic)
        #                 # parameters = stats.norm.fit(self.coherence_samples[i])
        #                 # (mu, _) = parameters
        #                 # density = stats.gaussian_kde(self.coherence_samples[i])(x)
        #                 # mode = x[np.argsort(density)[-1]]
        #                 # print(i, mode)
        #                 # print(i, mu)
        #                 if not isnan(result.pvalue):
        #                     if i == self.last_index:
        #                         delta = abs(result.pvalue - self.last_pvalue)
        #                         if delta < 0.1:
        #                             print('Low delta!')
        #                             print(self.last_index, delta)
        #                             # self.last_index = -1
        #                             self.left_half = not self.left_half
        #                             # self.layers.pop()
        #                             # self.base = self.cache_layers()
        #                             # return

        #                     pvalue_sum += result.pvalue
        #                     pvalue_denom += 1
        #                     if lowest_index < 0 or result.pvalue < lowest_pvalue:
        #                         lowest_index = i
        #                         lowest_pvalue = result.pvalue
        #                     if highest_index < 0 or result.pvalue > highest_pvalue:
        #                         highest_index = i
        #                         highest_pvalue = result.pvalue
        #             except Exception as e:
        #                 print(e)
        #                 pass
        #         average_pvalue = pvalue_sum / pvalue_denom
        #         print(average_pvalue)
        #         index = highest_index if self.left_half else lowest_index
        #         best_pvalue = highest_pvalue if self.left_half else lowest_pvalue

        #         self.last_index = index
        #         self.last_pvalue = best_pvalue
        #         # if average_pvalue < 0.5:
        #         #     index = lowest_index
        #         #     best_pvalue = lowest_pvalue
        #         # else:
        #         #     index = highest_index
        #         #     best_pvalue = highest_pvalue
        #                 # print(e)
                
        #         # for i in range(0, self.actual_N):
        #         #     if i in self.knowns:
        #         #         continue
        #         #     # result = stats.kstest(self.base_coherence_samples, self.subspace_uplift_left_samples[i], alternative='greater')
        #         #     # # result = stats.kstest(self.subspace_uplift_left_samples[i], self.subspace_uplift_right_samples[i], alternative='greater')
        #         #     # print(i, result)
        #         #     # value = result.pvalue * (1 - result.statistic)
        #         #     # parameters = stats.norm.fit(self.subspace_uplift_left_samples[i])
        #         #     # (mu, _) = parameters
        #         #     try:
        #         #         result = stats.ttest_1samp(self.subspace_uplift_samples[i], 0, alternative='greater')
        #         #         print(i, result)
        #         #         # (hist, bins) = np.histogram(self.subspace_uplift_samples[i], 20, range=(-0.01, 0.01))
        #         #         # bin_index = np.argsort(hist)[-1]
        #         #         # mode = (bins[bin_index] + bins[bin_index + 1]) / 2
        #         #         # fitter = modeling.fitting.LevMarLSQFitter()
        #         #         # model = modeling.models.Gaussian1D()
        #         #         # fitted_model = fitter(model, bins[1:], hist)
        #         #         # mode = fitted_model.mean.value
        #         #         # print(i, mode)
        #         #         # density = stats.gaussian_kde(self.subspace_uplift_samples[i], weights=self.subspace_uplift_weights[i])(x)
        #         #         # density = stats.gaussian_kde(self.subspace_uplift_samples[i])(x)
        #         #         # mode = x[np.argsort(density)[-1]]
        #         #         # print(i, mode)
        #         #         # print(i, mu)
        #         #         if (index < 0 or result.pvalue < lowest_pvalue) and not isnan(result.pvalue):
        #         #         # if index < 0 or value < lowest_pvalue:
        #         #             index = i
        #         #             lowest_pvalue = result.pvalue
        #         #             is_subspace = True

        #         #         # if result.pvalue > 0.95:
        #         #         #     index = i
        #         #         # parameters = stats.norm.fit(self.subspace_uplift_samples[i])
        #         #         # (mu, _) = parameters
        #         #         # if mu > base_mu:
        #         #         #     if index < 0 or mu > highest_mu:
        #         #         #         index = i
        #         #         #         highest_mu = mu
        #         #     except Exception as e:
        #         #         print(e)
        #         #         pass
        #         #         # print(e)

        #         if index >= 0:
        #             if is_subspace:
        #                 # print('subspace')
        #                 self.knowns.append(index)
        #                 print(self.knowns, best_pvalue)
        #             else:
        #                 # print('flat')
        #                 self.knowns.append(index)
        #                 # self.layer_confidence[index_hash(self.knowns)] = confidence
        #                 # num_terms = len(self.knowns)
        #                 print(self.knowns, best_pvalue)
        #                 print(base_coherence)
        #                 self.add_layer()
        #                 # if num_terms > self.num_terms:
        #                 #     self.stops = set()
        #                 # self.num_terms = num_terms
        #                 self.knowns = []
        #             return
        #         else:
        #             self.knowns = []
        #         # else:
        #         #     self.knowns = []

        #         # if len(self.knowns) > 0:
        #         #     # self.add_stop()
        #         #     self.knowns = []
        #     finally:
        #         fig, axs = plt.subplots(int(self.actual_N / 4), 4)
        #         x_eval = np.linspace(-1.0, 1.0, num=1000)
        #         for i in range(0, int(self.actual_N / 4)):
        #             for j in range(0, 4):
        #                 # (hist, bins) = np.histogram(self.base_coherence_samples, self.num_bins, density=True)
        #         #         # fitter = modeling.fitting.LevMarLSQFitter()
        #         #         # model = modeling.models.Gaussian1D()
        #         #         # fitted_model = fitter(model, bins[1:], hist)
        #         #         # axs[i][j].scatter(bins[1:], hist, s=1, color='r', alpha=0.5)
        #         #         # axs[i][j].plot(x_eval, fitted_model(x_eval), color='r')

        #         #         (hist, bins) = np.histogram(self.coherence_samples[i * 4 + j], self.num_bins, density=True)
        #         #         # fitter = modeling.fitting.LevMarLSQFitter()
        #         #         # model = modeling.models.Gaussian1D()
        #         #         # fitted_model = fitter(model, bins[1:], hist)
        #         #         axs[i][j].scatter(bins[1:], hist, s=1, color='g', alpha=0.5)
        #         #         # axs[i][j].plot(x_eval, fitted_model(x_eval), color='g')

        #         #         (hist, bins) = np.histogram(self.subspace_uplift_samples[i * 4 + j], self.num_bins, density=True)
        #         #         # fitter = modeling.fitting.LevMarLSQFitter()
        #         #         # model = modeling.models.Gaussian1D()
        #         #         # fitted_model = fitter(model, bins[1:], hist)
        #         #         axs[i][j].scatter(bins[1:], hist, s=1, color='b', alpha=0.5)
        #         #         # axs[i][j].plot(x_eval, fitted_model(x_eval), color='b')

        #         #         # kde0 = stats.gaussian_kde(self.base_coherence_samples)
        #         #         kde1 = stats.gaussian_kde(self.coherence_samples[i * 4 + j])
        #         #         # kde2 = stats.gaussian_kde(self.subspace_uplift_samples[i * 4 + j], weights=self.subspace_uplift_weights[i])
        #         #         kde2 = stats.gaussian_kde(self.subspace_uplift_samples[i * 4 + j])
        #         #         # axs[i][j].plot(x_eval, kde0(x_eval), color='r')
        #         #         axs[i][j].plot(x_eval, kde1(x_eval), color='g')
        #         #         axs[i][j].plot(x_eval, kde2(x_eval), color='b')
        #         #         # n, bins, patches = axs[i][j].hist(self.base_coherence_samples, 50, density=True, facecolor='r', alpha=0.5)
        #         #         # n, bins, patches = axs[i][j].hist(self.coherence_samples[i * 4 + j], 50, density=True, facecolor='g', alpha=0.5)
        #         #         # n, bins, patches = axs[i][j].hist(self.subspace_uplift_samples[i * 4 + j], 50, density=True, facecolor='b', alpha=0.5)
        #         # plt.show()
        #         # self.epoch = 0
        
        # return

        # print('=====' + str(base_coherence))
        # print(self.uplifts)
        # print(self.uplift_means)
        # print(self.uplift_medians)
        # print(self.uplift_stddevs)
        # print(self.uplift_ranges)
        # print(self.uplift_convergences)
        # print(self.subspace_uplifts)

        if index >= 0:
            self.knowns.append(index)
            print(base_coherence)
            print(self.knowns, self.epoch)
            # print(self.uplift_medians)
            # print(self.uplifts)
            # print(self.subspace_uplifts)
            self.add_layer()
            self.uplifts.fill(0)
            self.subspace_uplifts.fill(0)
            self.uplift_medians.fill(0)
            self.uplift_convergences.fill(0)
            self.uplift_samples = [[] for _ in range(0, self.actual_N)]
            self.epoch = 0
            return

        if subspace_index >= 0:
            self.knowns.append(subspace_index)
            print(self.knowns, self.epoch)
            # print(self.uplifts)
            # print(self.subspace_uplifts)
            self.uplifts.fill(0)
            self.subspace_uplifts.fill(0)
            self.uplift_medians.fill(0)
            self.uplift_convergences.fill(0)
            self.uplift_samples = [[] for _ in range(0, self.actual_N)]
            self.epoch = 0
        return

        # print('======')
        # print(self.epoch, base_coherence)
        # print('======')

        # if len(self.candidate_pool) == 0:
        #     print(self.p)

        # for i in range(0, min(5, len(self.candidate_pool))):
        #     candidate = self.candidate_pool[i]
        #     print(candidate.id(), candidate.uplift)

        # if self.epoch < 15:
        #     return
        
        if self.candidate_pool[0].uplift > 0.3:
            candidate = self.candidate_pool[0]
            candidate_id = candidate.id()
            self.candidate_ids.remove(candidate_id)
            print(candidate_id)
            self.knowns = candidate.indices
            self.add_layer()
            self.knowns = []
            self.reset_p()
            self.epoch = 0
            self.candidate_pool = []
            self.candidate_ids = set()
        elif self.candidate_pool[0].uplift < -0.3 or self.epoch > 200:
            self.epoch = 0
            self.num_terms += 1
            self.candidate_pool = []
            self.candidate_ids = set()
            self.knowns = []
            self.stops = set()
            self.reset_p()
        return

        # np.copyto(self.next_p, self.p)
        for _ in range(0, self.num_candidates):
            candidate = self.random_candidate()
            if candidate is None:
                continue
            candidate_id = candidate.id()
            if candidate_id in visited:
                continue
            visited.add(candidate_id)
            if self.actual_N in candidate.indices:
                continue
            has_candidate = True
            for i in range(0, len(self.inputs)):
                self.outputs[i] = self.base_outputs[i] ^ candidate.evaluate(self.inputs[i])
            # coherence = self.ring_coherence()
            coherence = self.coherence()
            # if coherence <= base_coherence:
            #     continue
            # for index in candidate.indices:
            #     self.next_p[index] += (coherence - base_coherence) * (1 / 1000.0)
                # self.p_temp[index] += 0
            for index in candidate.indices:
                if coherence > self.max_coherences[index]:
                    self.max_coherences[index] = coherence
                    self.max_candidates[index] = candidate
                # self.max_coherences[index] = max(self.max_coherences[index], coherence)
        # np.copyto(self.p, self.next_p)

        # np.copyto(self.p_temp, self.p)
        for i in range(0, self.actual_N):
            candidate = self.max_candidates[i]
            if candidate is None:
                continue
            for index in candidate.indices:
                self.p[index] += (self.max_coherences[index] - base_coherence) * (1 / 1000.0)
            # print(i, self.max_coherences[i] - base_coherence, self.max_candidates[i].id())
        self.normalize_p()
        # print(self.p)

        # np.subtract(self.p_temp, self.p, self.p_temp)
        # np.abs(self.p_temp, self.p_temp)
        # delta = np.sum(self.p_temp) / len(self.p_temp)
        # print(delta, np.argmax(self.p))
        # np.copyto(self.p_temp, self.p)
        # for i in range(0, len(self.p_temp)):
        #     self.p_temp[i] = round(self.p_temp[i] * 100) / 100
        # print(self.p_temp)

        index = np.argmax(self.p)
        delta_over_null = self.p[index] - self.p[self.actual_N]
        if self.epoch == 0:
            self.average_delta_over_null = delta_over_null
        else:
            self.average_delta_over_null = 0.9 * self.average_delta_over_null + 0.1 * delta_over_null
        diff = self.num_terms - len(self.knowns)

        print(self.average_delta_over_null, np.argpartition(self.p, -diff)[-diff:], np.argmax(self.p))

        # Always iterate for a minimum number of epochs
        if self.epoch < 15:
            return
        if self.average_delta_over_null > 0.00001 and self.average_delta_over_null < 0.001 and self.epoch < 300:
            return
        if self.average_delta_over_null < 0.001:
            index = self.actual_N
        else:
            index = np.argmax(self.p)

        # index = np.argmax(self.p)
        # if index == self.last_value:
        #     self.rounds += 1
        # else:
        #     self.rounds = 0
        #     self.last_value = index

        # if self.rounds < 10 and self.epoch < 100:
        #     return

        # if self.epoch < 5 or (delta > 0.001 and self.epoch < 50):
        #     return

        # index = np.argmax(self.p)
        
        # print(self.p)
        # print(self.threshold())
        # print(self.p)
        # index = self.get_converged_index()
        if not index is None or not has_candidate:
            # print(index, delta, np.argmax(self.p))
            self.epoch = 0
            if index == self.actual_N or not has_candidate:
                if len(self.knowns) > 0:
                    self.add_stop()
                    self.knowns.pop()
                    print('Backtrack: ' + str(self.knowns))
                    self.reset_p()
                    return
                self.num_terms += 1
                self.knowns = []
                self.stops = set()
                self.reset_p()
                print(self.num_terms)
                return
            self.knowns.append(index)
            # bisect.insort(self.knowns, index)
            if len(self.knowns) == self.num_terms:
                print('Add layer: ' + str(self.knowns))
                self.add_layer()
            else:
                print('Found term: ' + str(self.knowns))
                self.reset_p()
            print(base_coherence)
            return

    def cache_layers(self):
        expr = 'def f(x):\n\tresult=0\n'
        for layer in self.layers:
            expr += '\tresult^=' + layer.eval_str() + '\n'
        expr += '\treturn result\n'
        scope = {}
        exec(expr, scope)
        return scope['f']

def main():
    probabilities = Probabilities()
    # probabilities.knowns = [14]
    # probabilities.add_layer()
    # probabilities.knowns = [8]
    # probabilities.add_layer()
    # probabilities.knowns = [4]
    # probabilities.add_layer()
    while probabilities.num_terms <= probabilities.N:
        probabilities.update()

if __name__ == "__main__":
    main()