smt_analysis/libSMT.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151

import numpy as np
import sys
import os

def assert_valid_input_files(names, on_fail):
    # Check that all input files are valid
    for f in names:
        if not os.path.exists(f) or os.path.getsize(f) == 0:
            print("ERROR: File '{}' does not exist or is empty".format(f), file=sys.stderr);
            on_fail()
            exit()

# This parses the result data from unthreaded timing experiments
# @param f File name to load
# @returns res Map of benchmark name to sample count
# @returns samples Map of benchmark name to list of execution time samples
# @returns max_res May of benchmark to maximum execution time among all samples for that benchmark
def load_baseline(f):
    # constants for columns of baseline data files
    TOTAL_NS = 5
    BENCH_NAME = 0
    SAMPLES = 4

    # Load baseline data. This logic is based off the summarize programs
    res = {} # Map of benchmark to list of all execution time samples
    samples = {} # Map of benchmark name to sample count
    max_res = {} # Map of benchmark name to maximum execution time

    with open(f) as fp:
        for line in fp:
            s = line.split()
            if s[BENCH_NAME] not in res:
                res[s[BENCH_NAME]] = list([int(s[TOTAL_NS])])
                samples[s[BENCH_NAME]] = int(s[SAMPLES])
                max_res[s[BENCH_NAME]] = int(s[TOTAL_NS])
            else:
                res[s[BENCH_NAME]].append(int(s[TOTAL_NS]))
                max_res[s[BENCH_NAME]] = max(int(s[TOTAL_NS]), max_res[s[BENCH_NAME]])
    return res, samples, max_res

# This parses the result data from paired, threaded timing experiements
# @param file1 The -A file name
# @param file2 The -B file name
# @returns time 2D array of benchmark IDs to list of total container execution times
# @returns offset 2D array of benchmark IDs to list of differences between the start
#                 of the first and the start of the second benchmark
# @returns name_to_idx Map of benchmark names to benchmark IDs
# @returns idx_to_name List which when indexed with benchmark ID will yield the benchmark name
def load_paired(file1, file2, benchmarkCount):
    # constants for columns of paired data files
    FIRST_PROG = 0
    SECOND_PROG = 1
    FIRST_CORE = 2
    SECOND_CORE = 3
    TRIALS = 4
    START_S = 5 # Start seconds
    START_N = 6 # Start nanoseconds
    END_S = 7   # End seconds
    END_N = 8   # End nanoseconds
    RUN_ID = 9
    JOB_NUM = 10

    with open(file1) as f1:
        numJobs = int(f1.readline().split()[TRIALS])
    assert numJobs > 0
    assert benchmarkCount > 0

    # Total times of each container
    time=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
    # Difference in time between when the first and the second task start in the container
    offset=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]

    # Some aggregate counters that we update as we go along
    avg_off = 0
    avg_off_samp = 0

    # Load paired data
    bench1 = 0 # Index to what's the current first benchmark being examined
    bench2 = 0 # Index to what's the current second benchmark being examined

    name_to_idx = {}
    idx_to_name = [0 for x in range(benchmarkCount)]

    job_idx = 0
    with open(file1) as f1, open(file2) as f2:
        for line1, line2 in zip(f1, f2):
            lineArr1 = line1.split()
            lineArr2 = line2.split()
            start1 = int(lineArr1[START_S]) * 10**9 + int(lineArr1[START_N])
            start2 = int(lineArr2[START_S]) * 10**9 + int(lineArr2[START_N])
            minStart = min(start1, start2)
            end1 = int(lineArr1[END_S]) * 10**9 + int(lineArr1[END_N])
            end2 = int(lineArr2[END_S]) * 10**9 + int(lineArr2[END_N])
            maxEnd = max(end1, end2)
            # Time actually co-scheduled is minEnd - maxStart, but Sims uses a different model
#            time[bench1][bench2][int(lineArr1[JOB_NUM])] = maxEnd - minStart
            time[bench1][bench2][job_idx] = maxEnd - minStart
            if lineArr1[SECOND_PROG] == "h264_dec" and lineArr2[JOB_NUM] == 0:
                print(maxEnd - minStart)
            # Compute offset: if first job starts at t=0, when does second start?
#            offset[bench1][bench2][int(lineArr1[JOB_NUM])] = abs(start2-start1)
            offset[bench1][bench2][job_idx] = abs(start2-start1)
            # Compute some running statistics
            avg_off += abs(start2-start1)
            avg_off_samp += 1
            # Increment to the next benchmark, this is weird because of the zip()
            # This is doubly weird because our results are an upper trianguler matrix
            if job_idx == numJobs - 1: #int(lineArr1[JOB_NUM]) == numJobs - 1:
                if bench2 < benchmarkCount-1:
                    bench2 = bench2 + 1
                    job_idx = 0
                else:
                    name_to_idx[lineArr1[FIRST_PROG]] = bench1
                    idx_to_name[bench1] = lineArr1[FIRST_PROG]
                    bench1 = bench1 + 1
                    bench2 = bench1 # bench1 will never again appear as bench2
                    job_idx = 0
            else:
                job_idx += 1
    print("Average offset is: " + str(avg_off/avg_off_samp) + "ns")
    return time, offset, name_to_idx, idx_to_name

# Paired times use an abuse of the baseline file format
def load_fake_paired(fake_paired_filename):
    paired_times_raw, _, _ = load_baseline(fake_paired_filename)
    benchmarkCount = int(np.sqrt(len(list(paired_times_raw.keys()))))
    numJobs = len(next(iter(paired_times_raw.values())))
    paired_times=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
    idx_to_name=[]
    name_to_idx={}
    bench1 = -1
    #Generate the indexing approach
    for pair in sorted(paired_times_raw.keys()):
        [bench1name, bench2name] = pair.split('+') # Benchmark name is pair concatenated together with a '+' delimiter
        if bench1 == -1 or bench1name != idx_to_name[-1]:
            idx_to_name.append(bench1name)
            name_to_idx[bench1name] = len(idx_to_name) - 1
            bench1 += 1
    # Populate the array
    for bench1 in range(len(idx_to_name)):
        for bench2 in range(len(idx_to_name)):
            paired_times[bench1][bench2] = paired_times_raw[idx_to_name[bench1]+"+"+idx_to_name[bench2]]
    return paired_times, name_to_idx, idx_to_name

def assert_base_and_pair_keys_match(baseline_times, name_to_idx):
    if sorted(baseline_times.keys()) != sorted(name_to_idx.keys()):
        print("ERROR: The baseline and paired experiments were over a different set of benchmarks!", file=sys.stderr)
        print("Baseline keys:", baseline_times.keys(), file=sys.stderr)
        print("Paired keys:", name_to_idx.keys(), file=sys.stderr)
        exit();