summaryrefslogtreecommitdiffstats
path: root/smt_analysis/libSMT.py
diff options
context:
space:
mode:
Diffstat (limited to 'smt_analysis/libSMT.py')
-rwxr-xr-xsmt_analysis/libSMT.py151
1 files changed, 151 insertions, 0 deletions
diff --git a/smt_analysis/libSMT.py b/smt_analysis/libSMT.py
new file mode 100755
index 0000000..cca2fce
--- /dev/null
+++ b/smt_analysis/libSMT.py
@@ -0,0 +1,151 @@
1import numpy as np
2import sys
3import os
4
5def assert_valid_input_files(names, on_fail):
6 # Check that all input files are valid
7 for f in names:
8 if not os.path.exists(f) or os.path.getsize(f) == 0:
9 print("ERROR: File '{}' does not exist or is empty".format(f), file=sys.stderr);
10 on_fail()
11 exit()
12
13# This parses the result data from unthreaded timing experiments
14# @param f File name to load
15# @returns res Map of benchmark name to sample count
16# @returns samples Map of benchmark name to list of execution time samples
17# @returns max_res May of benchmark to maximum execution time among all samples for that benchmark
18def load_baseline(f):
19 # constants for columns of baseline data files
20 TOTAL_NS = 5
21 BENCH_NAME = 0
22 SAMPLES = 4
23
24 # Load baseline data. This logic is based off the summarize programs
25 res = {} # Map of benchmark to list of all execution time samples
26 samples = {} # Map of benchmark name to sample count
27 max_res = {} # Map of benchmark name to maximum execution time
28
29 with open(f) as fp:
30 for line in fp:
31 s = line.split()
32 if s[BENCH_NAME] not in res:
33 res[s[BENCH_NAME]] = list([int(s[TOTAL_NS])])
34 samples[s[BENCH_NAME]] = int(s[SAMPLES])
35 max_res[s[BENCH_NAME]] = int(s[TOTAL_NS])
36 else:
37 res[s[BENCH_NAME]].append(int(s[TOTAL_NS]))
38 max_res[s[BENCH_NAME]] = max(int(s[TOTAL_NS]), max_res[s[BENCH_NAME]])
39 return res, samples, max_res
40
41# This parses the result data from paired, threaded timing experiements
42# @param file1 The -A file name
43# @param file2 The -B file name
44# @returns time 2D array of benchmark IDs to list of total container execution times
45# @returns offset 2D array of benchmark IDs to list of differences between the start
46# of the first and the start of the second benchmark
47# @returns name_to_idx Map of benchmark names to benchmark IDs
48# @returns idx_to_name List which when indexed with benchmark ID will yield the benchmark name
49def load_paired(file1, file2, benchmarkCount):
50 # constants for columns of paired data files
51 FIRST_PROG = 0
52 SECOND_PROG = 1
53 FIRST_CORE = 2
54 SECOND_CORE = 3
55 TRIALS = 4
56 START_S = 5 # Start seconds
57 START_N = 6 # Start nanoseconds
58 END_S = 7 # End seconds
59 END_N = 8 # End nanoseconds
60 RUN_ID = 9
61 JOB_NUM = 10
62
63 with open(file1) as f1:
64 numJobs = int(f1.readline().split()[TRIALS])
65 assert numJobs > 0
66 assert benchmarkCount > 0
67
68 # Total times of each container
69 time=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
70 # Difference in time between when the first and the second task start in the container
71 offset=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
72
73 # Some aggregate counters that we update as we go along
74 avg_off = 0
75 avg_off_samp = 0
76
77 # Load paired data
78 bench1 = 0 # Index to what's the current first benchmark being examined
79 bench2 = 0 # Index to what's the current second benchmark being examined
80
81 name_to_idx = {}
82 idx_to_name = [0 for x in range(benchmarkCount)]
83
84 job_idx = 0
85 with open(file1) as f1, open(file2) as f2:
86 for line1, line2 in zip(f1, f2):
87 lineArr1 = line1.split()
88 lineArr2 = line2.split()
89 start1 = int(lineArr1[START_S]) * 10**9 + int(lineArr1[START_N])
90 start2 = int(lineArr2[START_S]) * 10**9 + int(lineArr2[START_N])
91 minStart = min(start1, start2)
92 end1 = int(lineArr1[END_S]) * 10**9 + int(lineArr1[END_N])
93 end2 = int(lineArr2[END_S]) * 10**9 + int(lineArr2[END_N])
94 maxEnd = max(end1, end2)
95 # Time actually co-scheduled is minEnd - maxStart, but Sims uses a different model
96# time[bench1][bench2][int(lineArr1[JOB_NUM])] = maxEnd - minStart
97 time[bench1][bench2][job_idx] = maxEnd - minStart
98 if lineArr1[SECOND_PROG] == "h264_dec" and lineArr2[JOB_NUM] == 0:
99 print(maxEnd - minStart)
100 # Compute offset: if first job starts at t=0, when does second start?
101# offset[bench1][bench2][int(lineArr1[JOB_NUM])] = abs(start2-start1)
102 offset[bench1][bench2][job_idx] = abs(start2-start1)
103 # Compute some running statistics
104 avg_off += abs(start2-start1)
105 avg_off_samp += 1
106 # Increment to the next benchmark, this is weird because of the zip()
107 # This is doubly weird because our results are an upper trianguler matrix
108 if job_idx == numJobs - 1: #int(lineArr1[JOB_NUM]) == numJobs - 1:
109 if bench2 < benchmarkCount-1:
110 bench2 = bench2 + 1
111 job_idx = 0
112 else:
113 name_to_idx[lineArr1[FIRST_PROG]] = bench1
114 idx_to_name[bench1] = lineArr1[FIRST_PROG]
115 bench1 = bench1 + 1
116 bench2 = bench1 # bench1 will never again appear as bench2
117 job_idx = 0
118 else:
119 job_idx += 1
120 print("Average offset is: " + str(avg_off/avg_off_samp) + "ns")
121 return time, offset, name_to_idx, idx_to_name
122
123# Paired times use an abuse of the baseline file format
124def load_fake_paired(fake_paired_filename):
125 paired_times_raw, _, _ = load_baseline(fake_paired_filename)
126 benchmarkCount = int(np.sqrt(len(list(paired_times_raw.keys()))))
127 numJobs = len(next(iter(paired_times_raw.values())))
128 paired_times=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
129 idx_to_name=[]
130 name_to_idx={}
131 bench1 = -1
132 #Generate the indexing approach
133 for pair in sorted(paired_times_raw.keys()):
134 [bench1name, bench2name] = pair.split('+') # Benchmark name is pair concatenated together with a '+' delimiter
135 if bench1 == -1 or bench1name != idx_to_name[-1]:
136 idx_to_name.append(bench1name)
137 name_to_idx[bench1name] = len(idx_to_name) - 1
138 bench1 += 1
139 # Populate the array
140 for bench1 in range(len(idx_to_name)):
141 for bench2 in range(len(idx_to_name)):
142 paired_times[bench1][bench2] = paired_times_raw[idx_to_name[bench1]+"+"+idx_to_name[bench2]]
143 return paired_times, name_to_idx, idx_to_name
144
145def assert_base_and_pair_keys_match(baseline_times, name_to_idx):
146 if sorted(baseline_times.keys()) != sorted(name_to_idx.keys()):
147 print("ERROR: The baseline and paired experiments were over a different set of benchmarks!", file=sys.stderr)
148 print("Baseline keys:", baseline_times.keys(), file=sys.stderr)
149 print("Paired keys:", name_to_idx.keys(), file=sys.stderr)
150 exit();
151