smt_analysis/computeSMTslowdown.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291

#!/usr/bin/python3
from typing import List, Any
import numpy as np
from scipy import stats
import sys
import os
import plotille.plotille as plt
TIMING_ERROR = 1000 #ns
LEVEL_C_ANALYSIS = False

def print_usage(argv):
    print("This program takes in the all-pairs and baseline SMT data and computes how much each program is slowed when SMT in enabled.", file=sys.stderr)
    print("Level-A/B usage: {} <file -A> <file -B> <baseline file> --cij".format(argv[0]), file=sys.stderr)
    print("Level-C usage: {} <continuous pairs> <baseline file>".format(argv[0]), file=sys.stderr)

# Check that we got the right number of parameters
if len(sys.argv) < 3:
    print_usage(sys.argv)
    exit()

if len(sys.argv) > 3:
    print("Analyzing results using Level-A/B methodology...")
else:
    print("Analyzing results using Level-C methodology...")
    LEVEL_C_ANALYSIS = True

# Check that all input files are valid
for f in sys.argv[1:-1]:
    if not os.path.exists(f) or os.path.getsize(f) == 0:
        print("ERROR: File '{}' does not exist or is empty".format(f), file=sys.stderr);
        print_usage(sys.argv)
        exit()

# Print Cij values rather than Mij
TIMES_ONLY = len(sys.argv) > 4 and "--cij" in sys.argv[4]
OK_PAIRS_ONLY = len(sys.argv) > 4 and "--cij-ok" in sys.argv[4]

# This parses the result data from unthreaded timing experiments
# @param f File name to load
# @returns res Map of benchmark name to sample count
# @returns samples Map of benchmark name to list of execution time samples
# @returns max_res May of benchmark to maximum execution time among all samples for that benchmark
def load_baseline(f):
    # constants for columns of baseline data files
    TOTAL_NS = 5
    BENCH_NAME = 0
    SAMPLES = 4

    # Load baseline data. This logic is based off the summarize programs
    res = {} # Map of benchmark to list of all execution time samples
    samples = {} # Map of benchmark name to sample count
    max_res = {} # Map of benchmark name to maximum execution time

    with open(f) as fp:
        for line in fp:
            s = line.split()
            if s[BENCH_NAME] not in res:
                res[s[BENCH_NAME]] = list([int(s[TOTAL_NS])])
                samples[s[BENCH_NAME]] = int(s[SAMPLES])
                max_res[s[BENCH_NAME]] = int(s[TOTAL_NS])
            else:
                res[s[BENCH_NAME]].append(int(s[TOTAL_NS]))
                max_res[s[BENCH_NAME]] = max(int(s[TOTAL_NS]), max_res[s[BENCH_NAME]])
    return res, samples, max_res

# This parses the result data from paired, threaded timing experiements
# @param file1 The -A file name
# @param file2 The -B file name
# @returns time 2D array of benchmark IDs to list of total container execution times
# @returns offset 2D array of benchmark IDs to list of differences between the start
#                 of the first and the start of the second benchmark
# @returns name_to_idx Map of benchmark names to benchmark IDs
# @returns idx_to_name List which when indexed with benchmark ID will yield the benchmark name
def load_paired(file1, file2, benchmarkCount):
    # constants for columns of paired data files
    FIRST_PROG = 0
    SECOND_PROG = 1
    FIRST_CORE = 2
    SECOND_CORE = 3
    TRIALS = 4
    START_S = 5 # Start seconds
    START_N = 6 # Start nanoseconds
    END_S = 7   # End seconds
    END_N = 8   # End nanoseconds
    RUN_ID = 9
    JOB_NUM = 10

    with open(file1) as f1:
        numJobs = int(f1.readline().split()[TRIALS])
    assert numJobs > 0
    assert benchmarkCount > 0

    # Total times of each container
    time=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
    # Difference in time between when the first and the second task start in the container
    offset=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]

    # Some aggregate counters that we update as we go along
    avg_off = 0
    avg_off_samp = 0

    # Load paired data
    bench1 = 0 # Index to what's the current first benchmark being examined
    bench2 = 0 # Index to what's the current second benchmark being examined

    name_to_idx = {}
    idx_to_name = [0 for x in range(benchmarkCount)]

    job_idx = 0
    with open(file1) as f1, open(file2) as f2:
        for line1, line2 in zip(f1, f2):
            lineArr1 = line1.split()
            lineArr2 = line2.split()
            start1 = int(lineArr1[START_S]) * 10**9 + int(lineArr1[START_N])
            start2 = int(lineArr2[START_S]) * 10**9 + int(lineArr2[START_N])
            minStart = min(start1, start2)
            end1 = int(lineArr1[END_S]) * 10**9 + int(lineArr1[END_N])
            end2 = int(lineArr2[END_S]) * 10**9 + int(lineArr2[END_N])
            maxEnd = max(end1, end2)
            # Time actually co-scheduled is minEnd - maxStart, but Sims uses a different model
#            time[bench1][bench2][int(lineArr1[JOB_NUM])] = maxEnd - minStart
            time[bench1][bench2][job_idx] = maxEnd - minStart
            if lineArr1[SECOND_PROG] == "h264_dec" and lineArr2[JOB_NUM] == 0:
                print(maxEnd - minStart)
            # Compute offset: if first job starts at t=0, when does second start?
#            offset[bench1][bench2][int(lineArr1[JOB_NUM])] = abs(start2-start1)
            offset[bench1][bench2][job_idx] = abs(start2-start1)
            # Compute some running statistics
            avg_off += abs(start2-start1)
            avg_off_samp += 1
            # Increment to the next benchmark, this is weird because of the zip()
            # This is doubly weird because our results are an upper trianguler matrix
            if job_idx == numJobs - 1: #int(lineArr1[JOB_NUM]) == numJobs - 1:
                if bench2 < benchmarkCount-1:
                    bench2 = bench2 + 1
                    job_idx = 0
                else:
                    name_to_idx[lineArr1[FIRST_PROG]] = bench1
                    idx_to_name[bench1] = lineArr1[FIRST_PROG]
                    bench1 = bench1 + 1
                    bench2 = bench1 # bench1 will never again appear as bench2
                    job_idx = 0
            else:
                job_idx += 1
    print("Average offset is: " + str(avg_off/avg_off_samp) + "ns")
    return time, offset, name_to_idx, idx_to_name

# Pull in the data
if not LEVEL_C_ANALYSIS:
    baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[3])
    paired_times, paired_offsets, name_to_idx, idx_to_name = load_paired(sys.argv[1], sys.argv[2], len(list(baseline_times.keys())))
    for key in baseline_times:
        print(key,max(baseline_times[key]))
else:
    baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[2])
    # Paired times use an abuse of the baseline file format
    paired_times_raw, _, _ = load_baseline(sys.argv[1])
    benchmarkCount = int(np.sqrt(len(list(paired_times_raw.keys()))))
    numJobs = len(next(iter(paired_times_raw.values())))
    paired_times=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
    idx_to_name=[]
    name_to_idx={}
    bench1 = -1
    #Generate the indexing approach
    for pair in sorted(paired_times_raw.keys()):
        [bench1name, bench2name] = pair.split('+') # Benchmark name is pair concatenated together with a '+' delimiter
        if bench1 == -1 or bench1name != idx_to_name[-1]:
            idx_to_name.append(bench1name)
            name_to_idx[bench1name] = len(idx_to_name) - 1
            bench1 += 1
    # Populate the array
    for bench1 in range(len(idx_to_name)):
        for bench2 in range(len(idx_to_name)):
            paired_times[bench1][bench2] = paired_times_raw[idx_to_name[bench1]+"+"+idx_to_name[bench2]]

# We work iff the baseline was run for the same set of benchmarks as the pairs were
if sorted(baseline_times.keys()) != sorted(name_to_idx.keys()):
    print("ERROR: The baseline and paired experiments were over a different set of benchmarks!", file=sys.stderr)
    print("Baseline keys:", baseline_times.keys(), file=sys.stderr)
    print("Paired keys:", name_to_idx.keys(), file=sys.stderr)
    exit();

# Only consider benchmarks that are at least an order of magnitude longer than the timing error
reliableNames = []
for i in range(0, len(name_to_idx)):
    benchmark = idx_to_name[i]
    if min(baseline_times[benchmark]) > TIMING_ERROR * 10:
        reliableNames.append(benchmark)

# Compute SMT slowdown for each benchmark
# Output format: table, each row is one benchmark and each column is one benchmark
#                each cell is base1 + base2*m = pair solved for m, aka (pair - base1) / base2
# Print table header
print("Bench          ", end=" ")
for name in reliableNames:
    if not TIMES_ONLY: print("{:<10.10}".format(name), end=" ")
    if TIMES_ONLY: print("{:<12.12}".format(name), end=" ")
print()
# Print rows
sample_f = max # Change this to np.mean to use mean values in Mij generation
M_vals = []
for b1 in reliableNames:
    if not TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ")
    if TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ")
    for b2 in reliableNames:
        if not LEVEL_C_ANALYSIS:
            Ci = max(sample_f(baseline_times[b1]), sample_f(baseline_times[b2]))
            Cj = min(sample_f(baseline_times[b1]), sample_f(baseline_times[b2]))
            Cij = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]])
            if False:
                M = np.std(paired_times[name_to_idx[b1]][name_to_idx[b2]]) / np.mean(paired_times[name_to_idx[b1]][name_to_idx[b2]])
            else:
                M = (Cij - Ci) / Cj
            if Cij and Cj * 10 > Ci: # We don't pair tasks with more than a 10x difference in length
                M_vals.append(M)
                if not TIMES_ONLY: print("{:>10.3}".format(M), end=" ")
            else:
                if not TIMES_ONLY: print("{:>10}".format("N/A"), end=" ")

            if TIMES_ONLY and (not OK_PAIRS_ONLY or Cj * 10 > Ci):
                print("{:>12}".format(Cij), end=" ")
            elif OK_PAIRS_ONLY and Cj * 10 <= Ci:
                print("{:>12}".format("0"), end=" ")

        else:
            time_with_smt = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]])
            time_wout_smt = sample_f(baseline_times[b1])
            M = time_with_smt / time_wout_smt
            M_vals.append(M)
            print("{:>10.3}".format(M), end=" ")
    print("")
# Print some statistics about the distribution
print("Average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(M_vals), np.std(M_vals), sample_f.__name__))
Ms = np.asarray(M_vals, dtype=np.float32)
if not LEVEL_C_ANALYSIS:
    print(np.sum(Ms <= 0), "of", len(M_vals), "M_i:j values are at most zero -", 100*np.sum(Ms <= 0)/len(M_vals), "percent")
    print(np.sum(Ms > 1), "of", len(M_vals), "M_i:j values are greater than one -", 100*np.sum(Ms > 1)/len(M_vals), "percent")
    M_vals_to_plot = Ms[np.logical_and(Ms > 0, Ms <= 1)]
else:
    print(np.sum(Ms <= 1), "of", len(M_vals), "M_i:j values are at most one -", 100*np.sum(Ms <= 1)/len(M_vals), "percent")
    print(np.sum(Ms > 2), "of", len(M_vals), "M_i:j values are greater than two -", 100*np.sum(Ms > 2)/len(M_vals), "percent")
    M_vals_to_plot = Ms

print("Using Sim's analysis, average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(list(M_vals_to_plot)), np.std(list(M_vals_to_plot)), sample_f.__name__))
print(plt.hist(M_vals_to_plot, bins=10))

##### BELOW TEXT IS OLD OFFSET CODE (patched) #####
## This still works, but is hacky and deprecated ##
## PearsonR doesn't work though                  ##
if not LEVEL_C_ANALYSIS:
    benchmarkNames = idx_to_name
    benchmarkCount = len(benchmarkNames)
    numJobs = len(paired_times[0][0])

    reliableNames=["ndes", "cjpeg_wrbmp", "adpcm_enc", "cjpeg_transupp", "epic", "gsm_dec", "h264_dec", "huff_enc", "rijndael_enc", "rijndael_dec", "gsm_enc", "ammunition", "mpeg2"]

    #stats.pearsonr(time[b1][b2], oList),

    with open("weakRelPairs_offset.csv", mode="w+") as f3:
        print("Benchmark1", "Benchmark2", "minOffset", "maxOffset", "meanOffset", "meddOffset", "stdOffset", "minLength", "maxLength", sep=",", file=f3)
        for b1 in range (0, benchmarkCount):
            for b2 in range (0, benchmarkCount):
                if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames:
                    #exclude last job due to inccurate timing
                    oList = paired_offsets[b1][b2][:numJobs-1]
                    jList = paired_times[b1][b2][:numJobs-1]
#                   plt.scatter(oList, jList)
#                   plt.title(benchmarkNames[b1] + ", " + benchmarkNames[b2])
#                   plt.show()
#                   print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), stats.pearsonr(jList, oList), stats.spearmanr(jList, oList),  sep=",", file=f3)
                    print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), min(jList), max(jList),  sep=",", file=f3)
"""
#with open("reliableGraphs.csv", mode="x") as f3:
        for b1 in range(0, benchmarkCount):
            for b2 in range(0, benchmarkCount):
                if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames:
                    oList = offset[b1][b2][:numJobs - 1]
                    jList=time[b1][b2][:numJobs-1]
                    # offset, time scatterplot
                    plt.scatter(oList, jList)
                    plt.title(benchmarkNames[b1] + " " + benchmarkNames[b2] + " Offsets v. Time")
                    plt.show()
                    #time histogram
                    #plt.hist(jList, bins=10)
                    #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Completion Times")
                    #plt.show()
                    #offset histogram
                    #plt.hist(oList, bins=10)
                    #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Offsets")
                    #plt.show()
"""