diff options
author | leochanj <jbakita@cs.unc.edu> | 2020-10-23 00:13:06 -0400 |
---|---|---|
committer | leochanj <jbakita@cs.unc.edu> | 2020-10-23 00:13:06 -0400 |
commit | d427b910baffcc330b0b24d87c9b3216f306d0fb (patch) | |
tree | ef312bc5757860a03673316be421c1624a5bb6b7 /smt_analysis/computeSMTslowdown.py | |
parent | b839934c04b214c9bdab399628ee2b94a65bcd10 (diff) | |
parent | a7c3210215bd1181ae93b23c313941dfb44519fb (diff) |
merged
Diffstat (limited to 'smt_analysis/computeSMTslowdown.py')
-rwxr-xr-x | smt_analysis/computeSMTslowdown.py | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/smt_analysis/computeSMTslowdown.py b/smt_analysis/computeSMTslowdown.py new file mode 100755 index 0000000..805def1 --- /dev/null +++ b/smt_analysis/computeSMTslowdown.py | |||
@@ -0,0 +1,155 @@ | |||
1 | #!/usr/bin/python3 | ||
2 | from typing import List, Any | ||
3 | import numpy as np | ||
4 | from scipy import stats | ||
5 | import sys | ||
6 | import plotille.plotille as plt | ||
7 | TIMING_ERROR = 1000 #ns | ||
8 | LEVEL_C_ANALYSIS = False | ||
9 | from libSMT import * | ||
10 | |||
11 | def print_usage(): | ||
12 | print("This program takes in the all-pairs and baseline SMT data and computes how much each program is slowed when SMT in enabled.", file=sys.stderr) | ||
13 | print("Level-A/B usage: {} <file -A> <file -B> <baseline file> --cij".format(sys.argv[0]), file=sys.stderr) | ||
14 | print("Level-C usage: {} <continuous pairs> <baseline file>".format(sys.argv[0]), file=sys.stderr) | ||
15 | |||
16 | # Check that we got the right number of parameters | ||
17 | if len(sys.argv) < 3: | ||
18 | print_usage() | ||
19 | exit() | ||
20 | |||
21 | if len(sys.argv) > 3: | ||
22 | print("Analyzing results using Level-A/B methodology...") | ||
23 | else: | ||
24 | print("Analyzing results using Level-C methodology...") | ||
25 | LEVEL_C_ANALYSIS = True | ||
26 | |||
27 | assert_valid_input_files(sys.argv[1:-1], print_usage); | ||
28 | |||
29 | # Print Cij values rather than Mij | ||
30 | TIMES_ONLY = len(sys.argv) > 4 and "--cij" in sys.argv[4] | ||
31 | OK_PAIRS_ONLY = len(sys.argv) > 4 and "--cij-ok" in sys.argv[4] | ||
32 | |||
33 | # Pull in the data | ||
34 | if not LEVEL_C_ANALYSIS: | ||
35 | baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[3]) | ||
36 | paired_times, paired_offsets, name_to_idx, idx_to_name = load_paired(sys.argv[1], sys.argv[2], len(list(baseline_times.keys()))) | ||
37 | for key in baseline_times: | ||
38 | print(key,max(baseline_times[key])) | ||
39 | else: | ||
40 | # Paired times use an abuse of the baseline file format | ||
41 | baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[2]) | ||
42 | paired_times, name_to_idx, idx_to_name = load_fake_paired(sys.argv[1]) | ||
43 | |||
44 | # We work iff the baseline was run for the same set of benchmarks as the pairs were | ||
45 | assert_base_and_pair_keys_match(baseline_times, name_to_idx) | ||
46 | |||
47 | # Only consider benchmarks that are at least an order of magnitude longer than the timing error | ||
48 | reliableNames = [] | ||
49 | for i in range(0, len(name_to_idx)): | ||
50 | benchmark = idx_to_name[i] | ||
51 | if min(baseline_times[benchmark]) > TIMING_ERROR * 10: | ||
52 | reliableNames.append(benchmark) | ||
53 | |||
54 | # Compute SMT slowdown for each benchmark | ||
55 | # Output format: table, each row is one benchmark and each column is one benchmark | ||
56 | # each cell is base1 + base2*m = pair solved for m, aka (pair - base1) / base2 | ||
57 | # Print table header | ||
58 | print("Bench ", end=" ") | ||
59 | for name in reliableNames: | ||
60 | if not TIMES_ONLY: print("{:<10.10}".format(name), end=" ") | ||
61 | if TIMES_ONLY: print("{:<12.12}".format(name), end=" ") | ||
62 | print() | ||
63 | # Print rows | ||
64 | sample_f = max # Change this to np.mean to use mean values in Mij generation | ||
65 | M_vals = [] | ||
66 | for b1 in reliableNames: | ||
67 | if not TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ") | ||
68 | if TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ") | ||
69 | for b2 in reliableNames: | ||
70 | if not LEVEL_C_ANALYSIS: | ||
71 | Ci = max(sample_f(baseline_times[b1]), sample_f(baseline_times[b2])) | ||
72 | Cj = min(sample_f(baseline_times[b1]), sample_f(baseline_times[b2])) | ||
73 | Cij = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]]) | ||
74 | if False: | ||
75 | M = np.std(paired_times[name_to_idx[b1]][name_to_idx[b2]]) / np.mean(paired_times[name_to_idx[b1]][name_to_idx[b2]]) | ||
76 | else: | ||
77 | M = (Cij - Ci) / Cj | ||
78 | if Cij and Cj * 10 > Ci: # We don't pair tasks with more than a 10x difference in length | ||
79 | M_vals.append(M) | ||
80 | if not TIMES_ONLY: print("{:>10.3}".format(M), end=" ") | ||
81 | else: | ||
82 | if not TIMES_ONLY: print("{:>10}".format("N/A"), end=" ") | ||
83 | |||
84 | if TIMES_ONLY and (not OK_PAIRS_ONLY or Cj * 10 > Ci): | ||
85 | print("{:>12}".format(Cij), end=" ") | ||
86 | elif OK_PAIRS_ONLY and Cj * 10 <= Ci: | ||
87 | print("{:>12}".format("0"), end=" ") | ||
88 | |||
89 | else: | ||
90 | time_with_smt = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]]) | ||
91 | time_wout_smt = sample_f(baseline_times[b1]) | ||
92 | M = time_with_smt / time_wout_smt | ||
93 | M_vals.append(M) | ||
94 | print("{:>10.3}".format(M), end=" ") | ||
95 | print("") | ||
96 | # Print some statistics about the distribution | ||
97 | print("Average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(M_vals), np.std(M_vals), sample_f.__name__)) | ||
98 | Ms = np.asarray(M_vals, dtype=np.float32) | ||
99 | if not LEVEL_C_ANALYSIS: | ||
100 | print(np.sum(Ms <= 0), "of", len(M_vals), "M_i:j values are at most zero -", 100*np.sum(Ms <= 0)/len(M_vals), "percent") | ||
101 | print(np.sum(Ms > 1), "of", len(M_vals), "M_i:j values are greater than one -", 100*np.sum(Ms > 1)/len(M_vals), "percent") | ||
102 | M_vals_to_plot = Ms[np.logical_and(Ms > 0, Ms <= 1)] | ||
103 | else: | ||
104 | print(np.sum(Ms <= 1), "of", len(M_vals), "M_i:j values are at most one -", 100*np.sum(Ms <= 1)/len(M_vals), "percent") | ||
105 | print(np.sum(Ms > 2), "of", len(M_vals), "M_i:j values are greater than two -", 100*np.sum(Ms > 2)/len(M_vals), "percent") | ||
106 | M_vals_to_plot = Ms | ||
107 | |||
108 | print("Using Sim's analysis, average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(list(M_vals_to_plot)), np.std(list(M_vals_to_plot)), sample_f.__name__)) | ||
109 | print(plt.hist(M_vals_to_plot, bins=10)) | ||
110 | |||
111 | ##### BELOW TEXT IS OLD OFFSET CODE (patched) ##### | ||
112 | ## This still works, but is hacky and deprecated ## | ||
113 | ## PearsonR doesn't work though ## | ||
114 | if not LEVEL_C_ANALYSIS and False: | ||
115 | benchmarkNames = idx_to_name | ||
116 | benchmarkCount = len(benchmarkNames) | ||
117 | numJobs = len(paired_times[0][0]) | ||
118 | |||
119 | reliableNames=["ndes", "cjpeg_wrbmp", "adpcm_enc", "cjpeg_transupp", "epic", "gsm_dec", "h264_dec", "huff_enc", "rijndael_enc", "rijndael_dec", "gsm_enc", "ammunition", "mpeg2"] | ||
120 | |||
121 | #stats.pearsonr(time[b1][b2], oList), | ||
122 | |||
123 | with open("weakRelPairs_offset.csv", mode="w+") as f3: | ||
124 | print("Benchmark1", "Benchmark2", "minOffset", "maxOffset", "meanOffset", "meddOffset", "stdOffset", "minLength", "maxLength", sep=",", file=f3) | ||
125 | for b1 in range (0, benchmarkCount): | ||
126 | for b2 in range (0, benchmarkCount): | ||
127 | if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames: | ||
128 | #exclude last job due to inccurate timing | ||
129 | oList = paired_offsets[b1][b2][:numJobs-1] | ||
130 | jList = paired_times[b1][b2][:numJobs-1] | ||
131 | # plt.scatter(oList, jList) | ||
132 | # plt.title(benchmarkNames[b1] + ", " + benchmarkNames[b2]) | ||
133 | # plt.show() | ||
134 | # print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), stats.pearsonr(jList, oList), stats.spearmanr(jList, oList), sep=",", file=f3) | ||
135 | print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), min(jList), max(jList), sep=",", file=f3) | ||
136 | """ | ||
137 | #with open("reliableGraphs.csv", mode="x") as f3: | ||
138 | for b1 in range(0, benchmarkCount): | ||
139 | for b2 in range(0, benchmarkCount): | ||
140 | if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames: | ||
141 | oList = offset[b1][b2][:numJobs - 1] | ||
142 | jList=time[b1][b2][:numJobs-1] | ||
143 | # offset, time scatterplot | ||
144 | plt.scatter(oList, jList) | ||
145 | plt.title(benchmarkNames[b1] + " " + benchmarkNames[b2] + " Offsets v. Time") | ||
146 | plt.show() | ||
147 | #time histogram | ||
148 | #plt.hist(jList, bins=10) | ||
149 | #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Completion Times") | ||
150 | #plt.show() | ||
151 | #offset histogram | ||
152 | #plt.hist(oList, bins=10) | ||
153 | #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Offsets") | ||
154 | #plt.show() | ||
155 | """ | ||