1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
import numpy as np
import sys
import os
def assert_valid_input_files(names, on_fail):
# Check that all input files are valid
for f in names:
if not os.path.exists(f) or os.path.getsize(f) == 0:
print("ERROR: File '{}' does not exist or is empty".format(f), file=sys.stderr);
on_fail()
exit()
# This parses the result data from unthreaded timing experiments
# @param f File name to load
# @returns res Map of benchmark name to sample count
# @returns samples Map of benchmark name to list of execution time samples
# @returns max_res May of benchmark to maximum execution time among all samples for that benchmark
def load_baseline(f):
# constants for columns of baseline data files
TOTAL_NS = 5
BENCH_NAME = 0
SAMPLES = 4
# Load baseline data. This logic is based off the summarize programs
res = {} # Map of benchmark to list of all execution time samples
samples = {} # Map of benchmark name to sample count
max_res = {} # Map of benchmark name to maximum execution time
with open(f) as fp:
for line in fp:
s = line.split()
if s[BENCH_NAME] not in res:
res[s[BENCH_NAME]] = list([int(s[TOTAL_NS])])
samples[s[BENCH_NAME]] = int(s[SAMPLES])
max_res[s[BENCH_NAME]] = int(s[TOTAL_NS])
else:
res[s[BENCH_NAME]].append(int(s[TOTAL_NS]))
max_res[s[BENCH_NAME]] = max(int(s[TOTAL_NS]), max_res[s[BENCH_NAME]])
return res, samples, max_res
# This parses the result data from paired, threaded timing experiements
# @param file1 The -A file name
# @param file2 The -B file name
# @returns time 2D array of benchmark IDs to list of total container execution times
# @returns offset 2D array of benchmark IDs to list of differences between the start
# of the first and the start of the second benchmark
# @returns name_to_idx Map of benchmark names to benchmark IDs
# @returns idx_to_name List which when indexed with benchmark ID will yield the benchmark name
def load_paired(file1, file2, benchmarkCount):
# constants for columns of paired data files
FIRST_PROG = 0
SECOND_PROG = 1
FIRST_CORE = 2
SECOND_CORE = 3
TRIALS = 4
START_S = 5 # Start seconds
START_N = 6 # Start nanoseconds
END_S = 7 # End seconds
END_N = 8 # End nanoseconds
RUN_ID = 9
JOB_NUM = 10
with open(file1) as f1:
numJobs = int(f1.readline().split()[TRIALS])
assert numJobs > 0
assert benchmarkCount > 0
# Total times of each container
time=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
# Difference in time between when the first and the second task start in the container
offset=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
# Some aggregate counters that we update as we go along
avg_off = 0
avg_off_samp = 0
# Load paired data
bench1 = 0 # Index to what's the current first benchmark being examined
bench2 = 0 # Index to what's the current second benchmark being examined
name_to_idx = {}
idx_to_name = [0 for x in range(benchmarkCount)]
job_idx = 0
with open(file1) as f1, open(file2) as f2:
for line1, line2 in zip(f1, f2):
lineArr1 = line1.split()
lineArr2 = line2.split()
start1 = int(lineArr1[START_S]) * 10**9 + int(lineArr1[START_N])
start2 = int(lineArr2[START_S]) * 10**9 + int(lineArr2[START_N])
minStart = min(start1, start2)
end1 = int(lineArr1[END_S]) * 10**9 + int(lineArr1[END_N])
end2 = int(lineArr2[END_S]) * 10**9 + int(lineArr2[END_N])
maxEnd = max(end1, end2)
# Time actually co-scheduled is minEnd - maxStart, but Sims uses a different model
# time[bench1][bench2][int(lineArr1[JOB_NUM])] = maxEnd - minStart
time[bench1][bench2][job_idx] = maxEnd - minStart
if lineArr1[SECOND_PROG] == "h264_dec" and lineArr2[JOB_NUM] == 0:
print(maxEnd - minStart)
# Compute offset: if first job starts at t=0, when does second start?
# offset[bench1][bench2][int(lineArr1[JOB_NUM])] = abs(start2-start1)
offset[bench1][bench2][job_idx] = abs(start2-start1)
# Compute some running statistics
avg_off += abs(start2-start1)
avg_off_samp += 1
# Increment to the next benchmark, this is weird because of the zip()
# This is doubly weird because our results are an upper trianguler matrix
if job_idx == numJobs - 1: #int(lineArr1[JOB_NUM]) == numJobs - 1:
if bench2 < benchmarkCount-1:
bench2 = bench2 + 1
job_idx = 0
else:
name_to_idx[lineArr1[FIRST_PROG]] = bench1
idx_to_name[bench1] = lineArr1[FIRST_PROG]
bench1 = bench1 + 1
bench2 = bench1 # bench1 will never again appear as bench2
job_idx = 0
else:
job_idx += 1
print("Average offset is: " + str(avg_off/avg_off_samp) + "ns")
return time, offset, name_to_idx, idx_to_name
# Paired times use an abuse of the baseline file format
def load_fake_paired(fake_paired_filename):
paired_times_raw, _, _ = load_baseline(fake_paired_filename)
benchmarkCount = int(np.sqrt(len(list(paired_times_raw.keys()))))
numJobs = len(next(iter(paired_times_raw.values())))
paired_times=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
idx_to_name=[]
name_to_idx={}
bench1 = -1
#Generate the indexing approach
for pair in sorted(paired_times_raw.keys()):
[bench1name, bench2name] = pair.split('+') # Benchmark name is pair concatenated together with a '+' delimiter
if bench1 == -1 or bench1name != idx_to_name[-1]:
idx_to_name.append(bench1name)
name_to_idx[bench1name] = len(idx_to_name) - 1
bench1 += 1
# Populate the array
for bench1 in range(len(idx_to_name)):
for bench2 in range(len(idx_to_name)):
paired_times[bench1][bench2] = paired_times_raw[idx_to_name[bench1]+"+"+idx_to_name[bench2]]
return paired_times, name_to_idx, idx_to_name
def assert_base_and_pair_keys_match(baseline_times, name_to_idx):
if sorted(baseline_times.keys()) != sorted(name_to_idx.keys()):
print("ERROR: The baseline and paired experiments were over a different set of benchmarks!", file=sys.stderr)
print("Baseline keys:", baseline_times.keys(), file=sys.stderr)
print("Paired keys:", name_to_idx.keys(), file=sys.stderr)
exit();
|