diff options
| author | Joshua Bakita <jbakita@cs.unc.edu> | 2020-10-22 00:27:52 -0400 |
|---|---|---|
| committer | Joshua Bakita <jbakita@cs.unc.edu> | 2020-10-22 00:28:01 -0400 |
| commit | e7562d1b6e782a415a44be280dc51b0e104242b7 (patch) | |
| tree | 959c2e4511dc774f2099473d418d61bf8bd0102d | |
| parent | 4f634d4cd3254dfc68b65e63be32708105032101 (diff) | |
Add a phenomenal script to distill SMT pair data to Mij and Mi scores
| -rwxr-xr-x | smt_analysis/computeSMTslowdown.py | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/smt_analysis/computeSMTslowdown.py b/smt_analysis/computeSMTslowdown.py new file mode 100755 index 0000000..2cf58ac --- /dev/null +++ b/smt_analysis/computeSMTslowdown.py | |||
| @@ -0,0 +1,291 @@ | |||
| 1 | #!/usr/bin/python3 | ||
| 2 | from typing import List, Any | ||
| 3 | import numpy as np | ||
| 4 | from scipy import stats | ||
| 5 | import sys | ||
| 6 | import os | ||
| 7 | import plotille.plotille as plt | ||
| 8 | TIMING_ERROR = 1000 #ns | ||
| 9 | LEVEL_C_ANALYSIS = False | ||
| 10 | |||
| 11 | def print_usage(argv): | ||
| 12 | print("This program takes in the all-pairs and baseline SMT data and computes how much each program is slowed when SMT in enabled.", file=sys.stderr) | ||
| 13 | print("Level-A/B usage: {} <file -A> <file -B> <baseline file> --cij".format(argv[0]), file=sys.stderr) | ||
| 14 | print("Level-C usage: {} <continuous pairs> <baseline file>".format(argv[0]), file=sys.stderr) | ||
| 15 | |||
| 16 | # Check that we got the right number of parameters | ||
| 17 | if len(sys.argv) < 3: | ||
| 18 | print_usage(sys.argv) | ||
| 19 | exit() | ||
| 20 | |||
| 21 | if len(sys.argv) > 3: | ||
| 22 | print("Analyzing results using Level-A/B methodology...") | ||
| 23 | else: | ||
| 24 | print("Analyzing results using Level-C methodology...") | ||
| 25 | LEVEL_C_ANALYSIS = True | ||
| 26 | |||
| 27 | # Check that all input files are valid | ||
| 28 | for f in sys.argv[1:-1]: | ||
| 29 | if not os.path.exists(f) or os.path.getsize(f) == 0: | ||
| 30 | print("ERROR: File '{}' does not exist or is empty".format(f), file=sys.stderr); | ||
| 31 | print_usage(sys.argv) | ||
| 32 | exit() | ||
| 33 | |||
| 34 | # Print Cij values rather than Mij | ||
| 35 | TIMES_ONLY = len(sys.argv) > 4 and "--cij" in sys.argv[4] | ||
| 36 | OK_PAIRS_ONLY = len(sys.argv) > 4 and "--cij-ok" in sys.argv[4] | ||
| 37 | |||
| 38 | # This parses the result data from unthreaded timing experiments | ||
| 39 | # @param f File name to load | ||
| 40 | # @returns res Map of benchmark name to sample count | ||
| 41 | # @returns samples Map of benchmark name to list of execution time samples | ||
| 42 | # @returns max_res May of benchmark to maximum execution time among all samples for that benchmark | ||
| 43 | def load_baseline(f): | ||
| 44 | # constants for columns of baseline data files | ||
| 45 | TOTAL_NS = 5 | ||
| 46 | BENCH_NAME = 0 | ||
| 47 | SAMPLES = 4 | ||
| 48 | |||
| 49 | # Load baseline data. This logic is based off the summarize programs | ||
| 50 | res = {} # Map of benchmark to list of all execution time samples | ||
| 51 | samples = {} # Map of benchmark name to sample count | ||
| 52 | max_res = {} # Map of benchmark name to maximum execution time | ||
| 53 | |||
| 54 | with open(f) as fp: | ||
| 55 | for line in fp: | ||
| 56 | s = line.split() | ||
| 57 | if s[BENCH_NAME] not in res: | ||
| 58 | res[s[BENCH_NAME]] = list([int(s[TOTAL_NS])]) | ||
| 59 | samples[s[BENCH_NAME]] = int(s[SAMPLES]) | ||
| 60 | max_res[s[BENCH_NAME]] = int(s[TOTAL_NS]) | ||
| 61 | else: | ||
| 62 | res[s[BENCH_NAME]].append(int(s[TOTAL_NS])) | ||
| 63 | max_res[s[BENCH_NAME]] = max(int(s[TOTAL_NS]), max_res[s[BENCH_NAME]]) | ||
| 64 | return res, samples, max_res | ||
| 65 | |||
| 66 | # This parses the result data from paired, threaded timing experiements | ||
| 67 | # @param file1 The -A file name | ||
| 68 | # @param file2 The -B file name | ||
| 69 | # @returns time 2D array of benchmark IDs to list of total container execution times | ||
| 70 | # @returns offset 2D array of benchmark IDs to list of differences between the start | ||
| 71 | # of the first and the start of the second benchmark | ||
| 72 | # @returns name_to_idx Map of benchmark names to benchmark IDs | ||
| 73 | # @returns idx_to_name List which when indexed with benchmark ID will yield the benchmark name | ||
| 74 | def load_paired(file1, file2, benchmarkCount): | ||
| 75 | # constants for columns of paired data files | ||
| 76 | FIRST_PROG = 0 | ||
| 77 | SECOND_PROG = 1 | ||
| 78 | FIRST_CORE = 2 | ||
| 79 | SECOND_CORE = 3 | ||
| 80 | TRIALS = 4 | ||
| 81 | START_S = 5 # Start seconds | ||
| 82 | START_N = 6 # Start nanoseconds | ||
| 83 | END_S = 7 # End seconds | ||
| 84 | END_N = 8 # End nanoseconds | ||
| 85 | RUN_ID = 9 | ||
| 86 | JOB_NUM = 10 | ||
| 87 | |||
| 88 | with open(file1) as f1: | ||
| 89 | numJobs = int(f1.readline().split()[TRIALS]) | ||
| 90 | assert numJobs > 0 | ||
| 91 | assert benchmarkCount > 0 | ||
| 92 | |||
| 93 | # Total times of each container | ||
| 94 | time=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)] | ||
| 95 | # Difference in time between when the first and the second task start in the container | ||
| 96 | offset=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)] | ||
| 97 | |||
| 98 | # Some aggregate counters that we update as we go along | ||
| 99 | avg_off = 0 | ||
| 100 | avg_off_samp = 0 | ||
| 101 | |||
| 102 | # Load paired data | ||
| 103 | bench1 = 0 # Index to what's the current first benchmark being examined | ||
| 104 | bench2 = 0 # Index to what's the current second benchmark being examined | ||
| 105 | |||
| 106 | name_to_idx = {} | ||
| 107 | idx_to_name = [0 for x in range(benchmarkCount)] | ||
| 108 | |||
| 109 | job_idx = 0 | ||
| 110 | with open(file1) as f1, open(file2) as f2: | ||
| 111 | for line1, line2 in zip(f1, f2): | ||
| 112 | lineArr1 = line1.split() | ||
| 113 | lineArr2 = line2.split() | ||
| 114 | start1 = int(lineArr1[START_S]) * 10**9 + int(lineArr1[START_N]) | ||
| 115 | start2 = int(lineArr2[START_S]) * 10**9 + int(lineArr2[START_N]) | ||
| 116 | minStart = min(start1, start2) | ||
| 117 | end1 = int(lineArr1[END_S]) * 10**9 + int(lineArr1[END_N]) | ||
| 118 | end2 = int(lineArr2[END_S]) * 10**9 + int(lineArr2[END_N]) | ||
| 119 | maxEnd = max(end1, end2) | ||
| 120 | # Time actually co-scheduled is minEnd - maxStart, but Sims uses a different model | ||
| 121 | # time[bench1][bench2][int(lineArr1[JOB_NUM])] = maxEnd - minStart | ||
| 122 | time[bench1][bench2][job_idx] = maxEnd - minStart | ||
| 123 | if lineArr1[SECOND_PROG] == "h264_dec" and lineArr2[JOB_NUM] == 0: | ||
| 124 | print(maxEnd - minStart) | ||
| 125 | # Compute offset: if first job starts at t=0, when does second start? | ||
| 126 | # offset[bench1][bench2][int(lineArr1[JOB_NUM])] = abs(start2-start1) | ||
| 127 | offset[bench1][bench2][job_idx] = abs(start2-start1) | ||
| 128 | # Compute some running statistics | ||
| 129 | avg_off += abs(start2-start1) | ||
| 130 | avg_off_samp += 1 | ||
| 131 | # Increment to the next benchmark, this is weird because of the zip() | ||
| 132 | # This is doubly weird because our results are an upper trianguler matrix | ||
| 133 | if job_idx == numJobs - 1: #int(lineArr1[JOB_NUM]) == numJobs - 1: | ||
| 134 | if bench2 < benchmarkCount-1: | ||
| 135 | bench2 = bench2 + 1 | ||
| 136 | job_idx = 0 | ||
| 137 | else: | ||
| 138 | name_to_idx[lineArr1[FIRST_PROG]] = bench1 | ||
| 139 | idx_to_name[bench1] = lineArr1[FIRST_PROG] | ||
| 140 | bench1 = bench1 + 1 | ||
| 141 | bench2 = bench1 # bench1 will never again appear as bench2 | ||
| 142 | job_idx = 0 | ||
| 143 | else: | ||
| 144 | job_idx += 1 | ||
| 145 | print("Average offset is: " + str(avg_off/avg_off_samp) + "ns") | ||
| 146 | return time, offset, name_to_idx, idx_to_name | ||
| 147 | |||
| 148 | # Pull in the data | ||
| 149 | if not LEVEL_C_ANALYSIS: | ||
| 150 | baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[3]) | ||
| 151 | paired_times, paired_offsets, name_to_idx, idx_to_name = load_paired(sys.argv[1], sys.argv[2], len(list(baseline_times.keys()))) | ||
| 152 | for key in baseline_times: | ||
| 153 | print(key,max(baseline_times[key])) | ||
| 154 | else: | ||
| 155 | baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[2]) | ||
| 156 | # Paired times use an abuse of the baseline file format | ||
| 157 | paired_times_raw, _, _ = load_baseline(sys.argv[1]) | ||
| 158 | benchmarkCount = int(np.sqrt(len(list(paired_times_raw.keys())))) | ||
| 159 | numJobs = len(next(iter(paired_times_raw.values()))) | ||
| 160 | paired_times=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)] | ||
| 161 | idx_to_name=[] | ||
| 162 | name_to_idx={} | ||
| 163 | bench1 = -1 | ||
| 164 | #Generate the indexing approach | ||
| 165 | for pair in sorted(paired_times_raw.keys()): | ||
| 166 | [bench1name, bench2name] = pair.split('+') # Benchmark name is pair concatenated together with a '+' delimiter | ||
| 167 | if bench1 == -1 or bench1name != idx_to_name[-1]: | ||
| 168 | idx_to_name.append(bench1name) | ||
| 169 | name_to_idx[bench1name] = len(idx_to_name) - 1 | ||
| 170 | bench1 += 1 | ||
| 171 | # Populate the array | ||
| 172 | for bench1 in range(len(idx_to_name)): | ||
| 173 | for bench2 in range(len(idx_to_name)): | ||
| 174 | paired_times[bench1][bench2] = paired_times_raw[idx_to_name[bench1]+"+"+idx_to_name[bench2]] | ||
| 175 | |||
| 176 | # We work iff the baseline was run for the same set of benchmarks as the pairs were | ||
| 177 | if sorted(baseline_times.keys()) != sorted(name_to_idx.keys()): | ||
| 178 | print("ERROR: The baseline and paired experiments were over a different set of benchmarks!", file=sys.stderr) | ||
| 179 | print("Baseline keys:", baseline_times.keys(), file=sys.stderr) | ||
| 180 | print("Paired keys:", name_to_idx.keys(), file=sys.stderr) | ||
| 181 | exit(); | ||
| 182 | |||
| 183 | # Only consider benchmarks that are at least an order of magnitude longer than the timing error | ||
| 184 | reliableNames = [] | ||
| 185 | for i in range(0, len(name_to_idx)): | ||
| 186 | benchmark = idx_to_name[i] | ||
| 187 | if min(baseline_times[benchmark]) > TIMING_ERROR * 10: | ||
| 188 | reliableNames.append(benchmark) | ||
| 189 | |||
| 190 | # Compute SMT slowdown for each benchmark | ||
| 191 | # Output format: table, each row is one benchmark and each column is one benchmark | ||
| 192 | # each cell is base1 + base2*m = pair solved for m, aka (pair - base1) / base2 | ||
| 193 | # Print table header | ||
| 194 | print("Bench ", end=" ") | ||
| 195 | for name in reliableNames: | ||
| 196 | if not TIMES_ONLY: print("{:<10.10}".format(name), end=" ") | ||
| 197 | if TIMES_ONLY: print("{:<12.12}".format(name), end=" ") | ||
| 198 | print() | ||
| 199 | # Print rows | ||
| 200 | sample_f = max # Change this to np.mean to use mean values in Mij generation | ||
| 201 | M_vals = [] | ||
| 202 | for b1 in reliableNames: | ||
| 203 | if not TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ") | ||
| 204 | if TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ") | ||
| 205 | for b2 in reliableNames: | ||
| 206 | if not LEVEL_C_ANALYSIS: | ||
| 207 | Ci = max(sample_f(baseline_times[b1]), sample_f(baseline_times[b2])) | ||
| 208 | Cj = min(sample_f(baseline_times[b1]), sample_f(baseline_times[b2])) | ||
| 209 | Cij = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]]) | ||
| 210 | if False: | ||
| 211 | M = np.std(paired_times[name_to_idx[b1]][name_to_idx[b2]]) / np.mean(paired_times[name_to_idx[b1]][name_to_idx[b2]]) | ||
| 212 | else: | ||
| 213 | M = (Cij - Ci) / Cj | ||
| 214 | if Cij and Cj * 10 > Ci: # We don't pair tasks with more than a 10x difference in length | ||
| 215 | M_vals.append(M) | ||
| 216 | if not TIMES_ONLY: print("{:>10.3}".format(M), end=" ") | ||
| 217 | else: | ||
| 218 | if not TIMES_ONLY: print("{:>10}".format("N/A"), end=" ") | ||
| 219 | |||
| 220 | if TIMES_ONLY and (not OK_PAIRS_ONLY or Cj * 10 > Ci): | ||
| 221 | print("{:>12}".format(Cij), end=" ") | ||
| 222 | elif OK_PAIRS_ONLY and Cj * 10 <= Ci: | ||
| 223 | print("{:>12}".format("0"), end=" ") | ||
| 224 | |||
| 225 | else: | ||
| 226 | time_with_smt = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]]) | ||
| 227 | time_wout_smt = sample_f(baseline_times[b1]) | ||
| 228 | M = time_with_smt / time_wout_smt | ||
| 229 | M_vals.append(M) | ||
| 230 | print("{:>10.3}".format(M), end=" ") | ||
| 231 | print("") | ||
| 232 | # Print some statistics about the distribution | ||
| 233 | print("Average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(M_vals), np.std(M_vals), sample_f.__name__)) | ||
| 234 | Ms = np.asarray(M_vals, dtype=np.float32) | ||
| 235 | if not LEVEL_C_ANALYSIS: | ||
| 236 | print(np.sum(Ms <= 0), "of", len(M_vals), "M_i:j values are at most zero -", 100*np.sum(Ms <= 0)/len(M_vals), "percent") | ||
| 237 | print(np.sum(Ms > 1), "of", len(M_vals), "M_i:j values are greater than one -", 100*np.sum(Ms > 1)/len(M_vals), "percent") | ||
| 238 | M_vals_to_plot = Ms[np.logical_and(Ms > 0, Ms <= 1)] | ||
| 239 | else: | ||
| 240 | print(np.sum(Ms <= 1), "of", len(M_vals), "M_i:j values are at most one -", 100*np.sum(Ms <= 1)/len(M_vals), "percent") | ||
| 241 | print(np.sum(Ms > 2), "of", len(M_vals), "M_i:j values are greater than two -", 100*np.sum(Ms > 2)/len(M_vals), "percent") | ||
| 242 | M_vals_to_plot = Ms | ||
| 243 | |||
| 244 | print("Using Sim's analysis, average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(list(M_vals_to_plot)), np.std(list(M_vals_to_plot)), sample_f.__name__)) | ||
| 245 | print(plt.hist(M_vals_to_plot, bins=10)) | ||
| 246 | |||
| 247 | ##### BELOW TEXT IS OLD OFFSET CODE (patched) ##### | ||
| 248 | ## This still works, but is hacky and deprecated ## | ||
| 249 | ## PearsonR doesn't work though ## | ||
| 250 | if not LEVEL_C_ANALYSIS: | ||
| 251 | benchmarkNames = idx_to_name | ||
| 252 | benchmarkCount = len(benchmarkNames) | ||
| 253 | numJobs = len(paired_times[0][0]) | ||
| 254 | |||
| 255 | reliableNames=["ndes", "cjpeg_wrbmp", "adpcm_enc", "cjpeg_transupp", "epic", "gsm_dec", "h264_dec", "huff_enc", "rijndael_enc", "rijndael_dec", "gsm_enc", "ammunition", "mpeg2"] | ||
| 256 | |||
| 257 | #stats.pearsonr(time[b1][b2], oList), | ||
| 258 | |||
| 259 | with open("weakRelPairs_offset.csv", mode="w+") as f3: | ||
| 260 | print("Benchmark1", "Benchmark2", "minOffset", "maxOffset", "meanOffset", "meddOffset", "stdOffset", "minLength", "maxLength", sep=",", file=f3) | ||
| 261 | for b1 in range (0, benchmarkCount): | ||
| 262 | for b2 in range (0, benchmarkCount): | ||
| 263 | if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames: | ||
| 264 | #exclude last job due to inccurate timing | ||
| 265 | oList = paired_offsets[b1][b2][:numJobs-1] | ||
| 266 | jList = paired_times[b1][b2][:numJobs-1] | ||
| 267 | # plt.scatter(oList, jList) | ||
| 268 | # plt.title(benchmarkNames[b1] + ", " + benchmarkNames[b2]) | ||
| 269 | # plt.show() | ||
| 270 | # print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), stats.pearsonr(jList, oList), stats.spearmanr(jList, oList), sep=",", file=f3) | ||
| 271 | print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), min(jList), max(jList), sep=",", file=f3) | ||
| 272 | """ | ||
| 273 | #with open("reliableGraphs.csv", mode="x") as f3: | ||
| 274 | for b1 in range(0, benchmarkCount): | ||
| 275 | for b2 in range(0, benchmarkCount): | ||
| 276 | if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames: | ||
| 277 | oList = offset[b1][b2][:numJobs - 1] | ||
| 278 | jList=time[b1][b2][:numJobs-1] | ||
| 279 | # offset, time scatterplot | ||
| 280 | plt.scatter(oList, jList) | ||
| 281 | plt.title(benchmarkNames[b1] + " " + benchmarkNames[b2] + " Offsets v. Time") | ||
| 282 | plt.show() | ||
| 283 | #time histogram | ||
| 284 | #plt.hist(jList, bins=10) | ||
| 285 | #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Completion Times") | ||
| 286 | #plt.show() | ||
| 287 | #offset histogram | ||
| 288 | #plt.hist(oList, bins=10) | ||
| 289 | #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Offsets") | ||
| 290 | #plt.show() | ||
| 291 | """ | ||
