import numpy as np
import sys
import os

def assert_valid_input_files(names, on_fail):
    # Check that all input files are valid
    for f in names:
        if not os.path.exists(f) or os.path.getsize(f) == 0:
            print("ERROR: File '{}' does not exist or is empty".format(f), file=sys.stderr);
            on_fail()
            exit()

# This parses the result data from unthreaded timing experiments
# @param f File name to load
# @returns res Map of benchmark name to sample count
# @returns samples Map of benchmark name to list of execution time samples
# @returns max_res May of benchmark to maximum execution time among all samples for that benchmark
def load_baseline(f):
    # constants for columns of baseline data files
    TOTAL_NS = 5
    BENCH_NAME = 0
    SAMPLES = 4

    # Load baseline data. This logic is based off the summarize programs
    res = {} # Map of benchmark to list of all execution time samples
    samples = {} # Map of benchmark name to sample count
    max_res = {} # Map of benchmark name to maximum execution time

    with open(f) as fp:
        for line in fp:
            s = line.split()
            if s[BENCH_NAME] not in res:
                res[s[BENCH_NAME]] = list([int(s[TOTAL_NS])])
                samples[s[BENCH_NAME]] = int(s[SAMPLES])
                max_res[s[BENCH_NAME]] = int(s[TOTAL_NS])
            else:
                res[s[BENCH_NAME]].append(int(s[TOTAL_NS]))
                max_res[s[BENCH_NAME]] = max(int(s[TOTAL_NS]), max_res[s[BENCH_NAME]])
    return res, samples, max_res

# This parses the result data from paired, threaded timing experiements
# @param file1 The -A file name
# @param file2 The -B file name
# @returns time 2D array of benchmark IDs to list of total container execution times
# @returns offset 2D array of benchmark IDs to list of differences between the start
#                 of the first and the start of the second benchmark
# @returns name_to_idx Map of benchmark names to benchmark IDs
# @returns idx_to_name List which when indexed with benchmark ID will yield the benchmark name
def load_paired(file1, file2, benchmarkCount):
    # constants for columns of paired data files
    FIRST_PROG = 0
    SECOND_PROG = 1
    FIRST_CORE = 2
    SECOND_CORE = 3
    TRIALS = 4
    START_S = 5 # Start seconds
    START_N = 6 # Start nanoseconds
    END_S = 7   # End seconds
    END_N = 8   # End nanoseconds
    RUN_ID = 9
    JOB_NUM = 10

    with open(file1) as f1:
        numJobs = int(f1.readline().split()[TRIALS])
    assert numJobs > 0
    assert benchmarkCount > 0

    # Total times of each container
    time=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
    # Difference in time between when the first and the second task start in the container
    offset=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]

    # Some aggregate counters that we update as we go along
    avg_off = 0
    avg_off_samp = 0

    # Load paired data
    bench1 = 0 # Index to what's the current first benchmark being examined
    bench2 = 0 # Index to what's the current second benchmark being examined

    name_to_idx = {}
    idx_to_name = [0 for x in range(benchmarkCount)]

    job_idx = 0
    with open(file1) as f1, open(file2) as f2:
        for line1, line2 in zip(f1, f2):
            lineArr1 = line1.split()
            lineArr2 = line2.split()
            start1 = int(lineArr1[START_S]) * 10**9 + int(lineArr1[START_N])
            start2 = int(lineArr2[START_S]) * 10**9 + int(lineArr2[START_N])
            minStart = min(start1, start2)
            end1 = int(lineArr1[END_S]) * 10**9 + int(lineArr1[END_N])
            end2 = int(lineArr2[END_S]) * 10**9 + int(lineArr2[END_N])
            maxEnd = max(end1, end2)
            # Time actually co-scheduled is minEnd - maxStart, but Sims uses a different model
#            time[bench1][bench2][int(lineArr1[JOB_NUM])] = maxEnd - minStart
            time[bench1][bench2][job_idx] = maxEnd - minStart
            if lineArr1[SECOND_PROG] == "h264_dec" and lineArr2[JOB_NUM] == 0:
                print(maxEnd - minStart)
            # Compute offset: if first job starts at t=0, when does second start?
#            offset[bench1][bench2][int(lineArr1[JOB_NUM])] = abs(start2-start1)
            offset[bench1][bench2][job_idx] = abs(start2-start1)
            # Compute some running statistics
            avg_off += abs(start2-start1)
            avg_off_samp += 1
            # Increment to the next benchmark, this is weird because of the zip()
            # This is doubly weird because our results are an upper trianguler matrix
            if job_idx == numJobs - 1: #int(lineArr1[JOB_NUM]) == numJobs - 1:
                if bench2 < benchmarkCount-1:
                    bench2 = bench2 + 1
                    job_idx = 0
                else:
                    name_to_idx[lineArr1[FIRST_PROG]] = bench1
                    idx_to_name[bench1] = lineArr1[FIRST_PROG]
                    bench1 = bench1 + 1
                    bench2 = bench1 # bench1 will never again appear as bench2
                    job_idx = 0
            else:
                job_idx += 1
    print("Average offset is: " + str(avg_off/avg_off_samp) + "ns")
    return time, offset, name_to_idx, idx_to_name

# Paired times use an abuse of the baseline file format
def load_fake_paired(fake_paired_filename):
    paired_times_raw, _, _ = load_baseline(fake_paired_filename)
    benchmarkCount = int(np.sqrt(len(list(paired_times_raw.keys()))))
    numJobs = len(next(iter(paired_times_raw.values())))
    paired_times=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
    idx_to_name=[]
    name_to_idx={}
    bench1 = -1
    #Generate the indexing approach
    for pair in sorted(paired_times_raw.keys()):
        [bench1name, bench2name] = pair.split('+') # Benchmark name is pair concatenated together with a '+' delimiter
        if bench1 == -1 or bench1name != idx_to_name[-1]:
            idx_to_name.append(bench1name)
            name_to_idx[bench1name] = len(idx_to_name) - 1
            bench1 += 1
    # Populate the array
    for bench1 in range(len(idx_to_name)):
        for bench2 in range(len(idx_to_name)):
            paired_times[bench1][bench2] = paired_times_raw[idx_to_name[bench1]+"+"+idx_to_name[bench2]]
    return paired_times, name_to_idx, idx_to_name

def assert_base_and_pair_keys_match(baseline_times, name_to_idx):
    if sorted(baseline_times.keys()) != sorted(name_to_idx.keys()):
        print("ERROR: The baseline and paired experiments were over a different set of benchmarks!", file=sys.stderr)
        print("Baseline keys:", baseline_times.keys(), file=sys.stderr)
        print("Paired keys:", name_to_idx.keys(), file=sys.stderr)
        exit();