summaryrefslogtreecommitdiffstats
path: root/smt_analysis/computeSMTslowdown.py
diff options
context:
space:
mode:
authorleochanj <jbakita@cs.unc.edu>2020-10-23 00:13:06 -0400
committerleochanj <jbakita@cs.unc.edu>2020-10-23 00:13:06 -0400
commitd427b910baffcc330b0b24d87c9b3216f306d0fb (patch)
treeef312bc5757860a03673316be421c1624a5bb6b7 /smt_analysis/computeSMTslowdown.py
parentb839934c04b214c9bdab399628ee2b94a65bcd10 (diff)
parenta7c3210215bd1181ae93b23c313941dfb44519fb (diff)
merged
Diffstat (limited to 'smt_analysis/computeSMTslowdown.py')
-rwxr-xr-xsmt_analysis/computeSMTslowdown.py155
1 files changed, 155 insertions, 0 deletions
diff --git a/smt_analysis/computeSMTslowdown.py b/smt_analysis/computeSMTslowdown.py
new file mode 100755
index 0000000..805def1
--- /dev/null
+++ b/smt_analysis/computeSMTslowdown.py
@@ -0,0 +1,155 @@
1#!/usr/bin/python3
2from typing import List, Any
3import numpy as np
4from scipy import stats
5import sys
6import plotille.plotille as plt
7TIMING_ERROR = 1000 #ns
8LEVEL_C_ANALYSIS = False
9from libSMT import *
10
11def print_usage():
12 print("This program takes in the all-pairs and baseline SMT data and computes how much each program is slowed when SMT in enabled.", file=sys.stderr)
13 print("Level-A/B usage: {} <file -A> <file -B> <baseline file> --cij".format(sys.argv[0]), file=sys.stderr)
14 print("Level-C usage: {} <continuous pairs> <baseline file>".format(sys.argv[0]), file=sys.stderr)
15
16# Check that we got the right number of parameters
17if len(sys.argv) < 3:
18 print_usage()
19 exit()
20
21if len(sys.argv) > 3:
22 print("Analyzing results using Level-A/B methodology...")
23else:
24 print("Analyzing results using Level-C methodology...")
25 LEVEL_C_ANALYSIS = True
26
27assert_valid_input_files(sys.argv[1:-1], print_usage);
28
29# Print Cij values rather than Mij
30TIMES_ONLY = len(sys.argv) > 4 and "--cij" in sys.argv[4]
31OK_PAIRS_ONLY = len(sys.argv) > 4 and "--cij-ok" in sys.argv[4]
32
33# Pull in the data
34if not LEVEL_C_ANALYSIS:
35 baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[3])
36 paired_times, paired_offsets, name_to_idx, idx_to_name = load_paired(sys.argv[1], sys.argv[2], len(list(baseline_times.keys())))
37 for key in baseline_times:
38 print(key,max(baseline_times[key]))
39else:
40 # Paired times use an abuse of the baseline file format
41 baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[2])
42 paired_times, name_to_idx, idx_to_name = load_fake_paired(sys.argv[1])
43
44# We work iff the baseline was run for the same set of benchmarks as the pairs were
45assert_base_and_pair_keys_match(baseline_times, name_to_idx)
46
47# Only consider benchmarks that are at least an order of magnitude longer than the timing error
48reliableNames = []
49for i in range(0, len(name_to_idx)):
50 benchmark = idx_to_name[i]
51 if min(baseline_times[benchmark]) > TIMING_ERROR * 10:
52 reliableNames.append(benchmark)
53
54# Compute SMT slowdown for each benchmark
55# Output format: table, each row is one benchmark and each column is one benchmark
56# each cell is base1 + base2*m = pair solved for m, aka (pair - base1) / base2
57# Print table header
58print("Bench ", end=" ")
59for name in reliableNames:
60 if not TIMES_ONLY: print("{:<10.10}".format(name), end=" ")
61 if TIMES_ONLY: print("{:<12.12}".format(name), end=" ")
62print()
63# Print rows
64sample_f = max # Change this to np.mean to use mean values in Mij generation
65M_vals = []
66for b1 in reliableNames:
67 if not TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ")
68 if TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ")
69 for b2 in reliableNames:
70 if not LEVEL_C_ANALYSIS:
71 Ci = max(sample_f(baseline_times[b1]), sample_f(baseline_times[b2]))
72 Cj = min(sample_f(baseline_times[b1]), sample_f(baseline_times[b2]))
73 Cij = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]])
74 if False:
75 M = np.std(paired_times[name_to_idx[b1]][name_to_idx[b2]]) / np.mean(paired_times[name_to_idx[b1]][name_to_idx[b2]])
76 else:
77 M = (Cij - Ci) / Cj
78 if Cij and Cj * 10 > Ci: # We don't pair tasks with more than a 10x difference in length
79 M_vals.append(M)
80 if not TIMES_ONLY: print("{:>10.3}".format(M), end=" ")
81 else:
82 if not TIMES_ONLY: print("{:>10}".format("N/A"), end=" ")
83
84 if TIMES_ONLY and (not OK_PAIRS_ONLY or Cj * 10 > Ci):
85 print("{:>12}".format(Cij), end=" ")
86 elif OK_PAIRS_ONLY and Cj * 10 <= Ci:
87 print("{:>12}".format("0"), end=" ")
88
89 else:
90 time_with_smt = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]])
91 time_wout_smt = sample_f(baseline_times[b1])
92 M = time_with_smt / time_wout_smt
93 M_vals.append(M)
94 print("{:>10.3}".format(M), end=" ")
95 print("")
96# Print some statistics about the distribution
97print("Average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(M_vals), np.std(M_vals), sample_f.__name__))
98Ms = np.asarray(M_vals, dtype=np.float32)
99if not LEVEL_C_ANALYSIS:
100 print(np.sum(Ms <= 0), "of", len(M_vals), "M_i:j values are at most zero -", 100*np.sum(Ms <= 0)/len(M_vals), "percent")
101 print(np.sum(Ms > 1), "of", len(M_vals), "M_i:j values are greater than one -", 100*np.sum(Ms > 1)/len(M_vals), "percent")
102 M_vals_to_plot = Ms[np.logical_and(Ms > 0, Ms <= 1)]
103else:
104 print(np.sum(Ms <= 1), "of", len(M_vals), "M_i:j values are at most one -", 100*np.sum(Ms <= 1)/len(M_vals), "percent")
105 print(np.sum(Ms > 2), "of", len(M_vals), "M_i:j values are greater than two -", 100*np.sum(Ms > 2)/len(M_vals), "percent")
106 M_vals_to_plot = Ms
107
108print("Using Sim's analysis, average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(list(M_vals_to_plot)), np.std(list(M_vals_to_plot)), sample_f.__name__))
109print(plt.hist(M_vals_to_plot, bins=10))
110
111##### BELOW TEXT IS OLD OFFSET CODE (patched) #####
112## This still works, but is hacky and deprecated ##
113## PearsonR doesn't work though ##
114if not LEVEL_C_ANALYSIS and False:
115 benchmarkNames = idx_to_name
116 benchmarkCount = len(benchmarkNames)
117 numJobs = len(paired_times[0][0])
118
119 reliableNames=["ndes", "cjpeg_wrbmp", "adpcm_enc", "cjpeg_transupp", "epic", "gsm_dec", "h264_dec", "huff_enc", "rijndael_enc", "rijndael_dec", "gsm_enc", "ammunition", "mpeg2"]
120
121 #stats.pearsonr(time[b1][b2], oList),
122
123 with open("weakRelPairs_offset.csv", mode="w+") as f3:
124 print("Benchmark1", "Benchmark2", "minOffset", "maxOffset", "meanOffset", "meddOffset", "stdOffset", "minLength", "maxLength", sep=",", file=f3)
125 for b1 in range (0, benchmarkCount):
126 for b2 in range (0, benchmarkCount):
127 if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames:
128 #exclude last job due to inccurate timing
129 oList = paired_offsets[b1][b2][:numJobs-1]
130 jList = paired_times[b1][b2][:numJobs-1]
131# plt.scatter(oList, jList)
132# plt.title(benchmarkNames[b1] + ", " + benchmarkNames[b2])
133# plt.show()
134# print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), stats.pearsonr(jList, oList), stats.spearmanr(jList, oList), sep=",", file=f3)
135 print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), min(jList), max(jList), sep=",", file=f3)
136"""
137#with open("reliableGraphs.csv", mode="x") as f3:
138 for b1 in range(0, benchmarkCount):
139 for b2 in range(0, benchmarkCount):
140 if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames:
141 oList = offset[b1][b2][:numJobs - 1]
142 jList=time[b1][b2][:numJobs-1]
143 # offset, time scatterplot
144 plt.scatter(oList, jList)
145 plt.title(benchmarkNames[b1] + " " + benchmarkNames[b2] + " Offsets v. Time")
146 plt.show()
147 #time histogram
148 #plt.hist(jList, bins=10)
149 #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Completion Times")
150 #plt.show()
151 #offset histogram
152 #plt.hist(oList, bins=10)
153 #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Offsets")
154 #plt.show()
155"""