merged

author: leochanj <jbakita@cs.unc.edu> 2020-10-23 00:13:06 -0400
committer: leochanj <jbakita@cs.unc.edu> 2020-10-23 00:13:06 -0400
commit: d427b910baffcc330b0b24d87c9b3216f306d0fb (patch)
tree: ef312bc5757860a03673316be421c1624a5bb6b7 /smt_analysis/computeSMTslowdown.py
parent: b839934c04b214c9bdab399628ee2b94a65bcd10 (diff)
parent: a7c3210215bd1181ae93b23c313941dfb44519fb (diff)
1 files changed, 155 insertions, 0 deletions
diff --git a/smt_analysis/computeSMTslowdown.py b/smt_analysis/computeSMTslowdown.py
new file mode 100755
index 0000000..805def1
--- /dev/null
+++ b/smt_analysis/computeSMTslowdown.py
@@ -0,0 +1,155 @@
+#!/usr/bin/python3
+from typing import List, Any
+import numpy as np
+from scipy import stats
+import sys
+import plotille.plotille as plt
+TIMING_ERROR = 1000 #ns
+LEVEL_C_ANALYSIS = False
+from libSMT import *
+def print_usage():
+    print("This program takes in the all-pairs and baseline SMT data and computes how much each program is slowed when SMT in enabled.", file=sys.stderr)
+    print("Level-A/B usage: {} <file -A> <file -B> <baseline file> --cij".format(sys.argv[0]), file=sys.stderr)
+    print("Level-C usage: {} <continuous pairs> <baseline file>".format(sys.argv[0]), file=sys.stderr)
+# Check that we got the right number of parameters
+if len(sys.argv) < 3:
+    print_usage()
+    exit()
+if len(sys.argv) > 3:
+    print("Analyzing results using Level-A/B methodology...")
+else:
+    print("Analyzing results using Level-C methodology...")
+    LEVEL_C_ANALYSIS = True
+assert_valid_input_files(sys.argv[1:-1], print_usage);
+# Print Cij values rather than Mij
+TIMES_ONLY = len(sys.argv) > 4 and "--cij" in sys.argv[4]
+OK_PAIRS_ONLY = len(sys.argv) > 4 and "--cij-ok" in sys.argv[4]
+# Pull in the data
+if not LEVEL_C_ANALYSIS:
+    baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[3])
+    paired_times, paired_offsets, name_to_idx, idx_to_name = load_paired(sys.argv[1], sys.argv[2], len(list(baseline_times.keys())))
+    for key in baseline_times:
+        print(key,max(baseline_times[key]))
+else:
+    # Paired times use an abuse of the baseline file format
+    baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[2])
+    paired_times, name_to_idx, idx_to_name = load_fake_paired(sys.argv[1])
+# We work iff the baseline was run for the same set of benchmarks as the pairs were
+assert_base_and_pair_keys_match(baseline_times, name_to_idx)
+# Only consider benchmarks that are at least an order of magnitude longer than the timing error
+reliableNames = []
+for i in range(0, len(name_to_idx)):
+    benchmark = idx_to_name[i]
+    if min(baseline_times[benchmark]) > TIMING_ERROR * 10:
+        reliableNames.append(benchmark)
+# Compute SMT slowdown for each benchmark
+# Output format: table, each row is one benchmark and each column is one benchmark
+#                each cell is base1 + base2*m = pair solved for m, aka (pair - base1) / base2
+# Print table header
+print("Bench          ", end=" ")
+for name in reliableNames:
+    if not TIMES_ONLY: print("{:<10.10}".format(name), end=" ")
+    if TIMES_ONLY: print("{:<12.12}".format(name), end=" ")
+print()
+# Print rows
+sample_f = max # Change this to np.mean to use mean values in Mij generation
+M_vals = []
+for b1 in reliableNames:
+    if not TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ")
+    if TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ")
+    for b2 in reliableNames:
+        if not LEVEL_C_ANALYSIS:
+            Ci = max(sample_f(baseline_times[b1]), sample_f(baseline_times[b2]))
+            Cj = min(sample_f(baseline_times[b1]), sample_f(baseline_times[b2]))
+            Cij = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]])
+            if False:
+                M = np.std(paired_times[name_to_idx[b1]][name_to_idx[b2]]) / np.mean(paired_times[name_to_idx[b1]][name_to_idx[b2]])
+            else:
+                M = (Cij - Ci) / Cj
+            if Cij and Cj * 10 > Ci: # We don't pair tasks with more than a 10x difference in length
+                M_vals.append(M)
+                if not TIMES_ONLY: print("{:>10.3}".format(M), end=" ")
+            else:
+                if not TIMES_ONLY: print("{:>10}".format("N/A"), end=" ")
+            if TIMES_ONLY and (not OK_PAIRS_ONLY or Cj * 10 > Ci):
+                print("{:>12}".format(Cij), end=" ")
+            elif OK_PAIRS_ONLY and Cj * 10 <= Ci:
+                print("{:>12}".format("0"), end=" ")
+        else:
+            time_with_smt = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]])
+            time_wout_smt = sample_f(baseline_times[b1])
+            M = time_with_smt / time_wout_smt
+            M_vals.append(M)
+            print("{:>10.3}".format(M), end=" ")
+    print("")
+# Print some statistics about the distribution
+print("Average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(M_vals), np.std(M_vals), sample_f.__name__))
+Ms = np.asarray(M_vals, dtype=np.float32)
+if not LEVEL_C_ANALYSIS:
+    print(np.sum(Ms <= 0), "of", len(M_vals), "M_i:j values are at most zero -", 100*np.sum(Ms <= 0)/len(M_vals), "percent")
+    print(np.sum(Ms > 1), "of", len(M_vals), "M_i:j values are greater than one -", 100*np.sum(Ms > 1)/len(M_vals), "percent")
+    M_vals_to_plot = Ms[np.logical_and(Ms > 0, Ms <= 1)]
+else:
+    print(np.sum(Ms <= 1), "of", len(M_vals), "M_i:j values are at most one -", 100*np.sum(Ms <= 1)/len(M_vals), "percent")
+    print(np.sum(Ms > 2), "of", len(M_vals), "M_i:j values are greater than two -", 100*np.sum(Ms > 2)/len(M_vals), "percent")
+    M_vals_to_plot = Ms
+print("Using Sim's analysis, average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(list(M_vals_to_plot)), np.std(list(M_vals_to_plot)), sample_f.__name__))
+print(plt.hist(M_vals_to_plot, bins=10))
+##### BELOW TEXT IS OLD OFFSET CODE (patched) #####
+## This still works, but is hacky and deprecated ##
+## PearsonR doesn't work though                  ##
+if not LEVEL_C_ANALYSIS and False:
+    benchmarkNames = idx_to_name
+    benchmarkCount = len(benchmarkNames)
+    numJobs = len(paired_times[0][0])
+    reliableNames=["ndes", "cjpeg_wrbmp", "adpcm_enc", "cjpeg_transupp", "epic", "gsm_dec", "h264_dec", "huff_enc", "rijndael_enc", "rijndael_dec", "gsm_enc", "ammunition", "mpeg2"]
+    #stats.pearsonr(time[b1][b2], oList),
+    with open("weakRelPairs_offset.csv", mode="w+") as f3:
+        print("Benchmark1", "Benchmark2", "minOffset", "maxOffset", "meanOffset", "meddOffset", "stdOffset", "minLength", "maxLength", sep=",", file=f3)
+        for b1 in range (0, benchmarkCount):
+            for b2 in range (0, benchmarkCount):
+                if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames:
+                    #exclude last job due to inccurate timing
+                    oList = paired_offsets[b1][b2][:numJobs-1]
+                    jList = paired_times[b1][b2][:numJobs-1]
+#                   plt.scatter(oList, jList)
+#                   plt.title(benchmarkNames[b1] + ", " + benchmarkNames[b2])
+#                   plt.show()
+#                   print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), stats.pearsonr(jList, oList), stats.spearmanr(jList, oList),  sep=",", file=f3)
+                    print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), min(jList), max(jList),  sep=",", file=f3)
+"""
+#with open("reliableGraphs.csv", mode="x") as f3:
+        for b1 in range(0, benchmarkCount):
+            for b2 in range(0, benchmarkCount):
+                if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames:
+                    oList = offset[b1][b2][:numJobs - 1]
+                    jList=time[b1][b2][:numJobs-1]
+                    # offset, time scatterplot
+                    plt.scatter(oList, jList)
+                    plt.title(benchmarkNames[b1] + " " + benchmarkNames[b2] + " Offsets v. Time")
+                    plt.show()
+                    #time histogram
+                    #plt.hist(jList, bins=10)
+                    #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Completion Times")
+                    #plt.show()
+                    #offset histogram
+                    #plt.hist(oList, bins=10)
+                    #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Offsets")
+                    #plt.show()
+"""
author	leochanj <jbakita@cs.unc.edu>	2020-10-23 00:13:06 -0400
committer	leochanj <jbakita@cs.unc.edu>	2020-10-23 00:13:06 -0400
commit	d427b910baffcc330b0b24d87c9b3216f306d0fb (patch)
tree	ef312bc5757860a03673316be421c1624a5bb6b7 /smt_analysis/computeSMTslowdown.py
parent	b839934c04b214c9bdab399628ee2b94a65bcd10 (diff)
parent	a7c3210215bd1181ae93b23c313941dfb44519fb (diff)

diff --git a/smt_analysis/computeSMTslowdown.py b/smt_analysis/computeSMTslowdown.py new file mode 100755 index 0000000..805def1 --- /dev/null +++ b/smt_analysis/computeSMTslowdown.py
@@ -0,0 +1,155 @@
	1	#!/usr/bin/python3
	2	from typing import List, Any
	3	import numpy as np
	4	from scipy import stats
	5	import sys
	6	import plotille.plotille as plt
	7	TIMING_ERROR = 1000 #ns
	8	LEVEL_C_ANALYSIS = False
	9	from libSMT import *
	10
	11	def print_usage():
	12	print("This program takes in the all-pairs and baseline SMT data and computes how much each program is slowed when SMT in enabled.", file=sys.stderr)
	13	print("Level-A/B usage: {} <file -A> <file -B> <baseline file> --cij".format(sys.argv[0]), file=sys.stderr)
	14	print("Level-C usage: {} <continuous pairs> <baseline file>".format(sys.argv[0]), file=sys.stderr)
	15
	16	# Check that we got the right number of parameters
	17	if len(sys.argv) < 3:
	18	print_usage()
	19	exit()
	20
	21	if len(sys.argv) > 3:
	22	print("Analyzing results using Level-A/B methodology...")
	23	else:
	24	print("Analyzing results using Level-C methodology...")
	25	LEVEL_C_ANALYSIS = True
	26
	27	assert_valid_input_files(sys.argv[1:-1], print_usage);
	28
	29	# Print Cij values rather than Mij
	30	TIMES_ONLY = len(sys.argv) > 4 and "--cij" in sys.argv[4]
	31	OK_PAIRS_ONLY = len(sys.argv) > 4 and "--cij-ok" in sys.argv[4]
	32
	33	# Pull in the data
	34	if not LEVEL_C_ANALYSIS:
	35	baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[3])
	36	paired_times, paired_offsets, name_to_idx, idx_to_name = load_paired(sys.argv[1], sys.argv[2], len(list(baseline_times.keys())))
	37	for key in baseline_times:
	38	print(key,max(baseline_times[key]))
	39	else:
	40	# Paired times use an abuse of the baseline file format
	41	baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[2])
	42	paired_times, name_to_idx, idx_to_name = load_fake_paired(sys.argv[1])
	43
	44	# We work iff the baseline was run for the same set of benchmarks as the pairs were
	45	assert_base_and_pair_keys_match(baseline_times, name_to_idx)
	46
	47	# Only consider benchmarks that are at least an order of magnitude longer than the timing error
	48	reliableNames = []
	49	for i in range(0, len(name_to_idx)):
	50	benchmark = idx_to_name[i]
	51	if min(baseline_times[benchmark]) > TIMING_ERROR * 10:
	52	reliableNames.append(benchmark)
	53
	54	# Compute SMT slowdown for each benchmark
	55	# Output format: table, each row is one benchmark and each column is one benchmark
	56	# each cell is base1 + base2*m = pair solved for m, aka (pair - base1) / base2
	57	# Print table header
	58	print("Bench ", end=" ")
	59	for name in reliableNames:
	60	if not TIMES_ONLY: print("{:<10.10}".format(name), end=" ")
	61	if TIMES_ONLY: print("{:<12.12}".format(name), end=" ")
	62	print()
	63	# Print rows
	64	sample_f = max # Change this to np.mean to use mean values in Mij generation
	65	M_vals = []
	66	for b1 in reliableNames:
	67	if not TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ")
	68	if TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ")
	69	for b2 in reliableNames:
	70	if not LEVEL_C_ANALYSIS:
	71	Ci = max(sample_f(baseline_times[b1]), sample_f(baseline_times[b2]))
	72	Cj = min(sample_f(baseline_times[b1]), sample_f(baseline_times[b2]))
	73	Cij = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]])
	74	if False:
	75	M = np.std(paired_times[name_to_idx[b1]][name_to_idx[b2]]) / np.mean(paired_times[name_to_idx[b1]][name_to_idx[b2]])
	76	else:
	77	M = (Cij - Ci) / Cj
	78	if Cij and Cj * 10 > Ci: # We don't pair tasks with more than a 10x difference in length
	79	M_vals.append(M)
	80	if not TIMES_ONLY: print("{:>10.3}".format(M), end=" ")
	81	else:
	82	if not TIMES_ONLY: print("{:>10}".format("N/A"), end=" ")
	83
	84	if TIMES_ONLY and (not OK_PAIRS_ONLY or Cj * 10 > Ci):
	85	print("{:>12}".format(Cij), end=" ")
	86	elif OK_PAIRS_ONLY and Cj * 10 <= Ci:
	87	print("{:>12}".format("0"), end=" ")
	88
	89	else:
	90	time_with_smt = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]])
	91	time_wout_smt = sample_f(baseline_times[b1])
	92	M = time_with_smt / time_wout_smt
	93	M_vals.append(M)
	94	print("{:>10.3}".format(M), end=" ")
	95	print("")
	96	# Print some statistics about the distribution
	97	print("Average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(M_vals), np.std(M_vals), sample_f.__name__))
	98	Ms = np.asarray(M_vals, dtype=np.float32)
	99	if not LEVEL_C_ANALYSIS:
	100	print(np.sum(Ms <= 0), "of", len(M_vals), "M_i:j values are at most zero -", 100*np.sum(Ms <= 0)/len(M_vals), "percent")
	101	print(np.sum(Ms > 1), "of", len(M_vals), "M_i:j values are greater than one -", 100*np.sum(Ms > 1)/len(M_vals), "percent")
	102	M_vals_to_plot = Ms[np.logical_and(Ms > 0, Ms <= 1)]
	103	else:
	104	print(np.sum(Ms <= 1), "of", len(M_vals), "M_i:j values are at most one -", 100*np.sum(Ms <= 1)/len(M_vals), "percent")
	105	print(np.sum(Ms > 2), "of", len(M_vals), "M_i:j values are greater than two -", 100*np.sum(Ms > 2)/len(M_vals), "percent")
	106	M_vals_to_plot = Ms
	107
	108	print("Using Sim's analysis, average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(list(M_vals_to_plot)), np.std(list(M_vals_to_plot)), sample_f.__name__))
	109	print(plt.hist(M_vals_to_plot, bins=10))
	110
	111	##### BELOW TEXT IS OLD OFFSET CODE (patched) #####
	112	## This still works, but is hacky and deprecated ##
	113	## PearsonR doesn't work though ##
	114	if not LEVEL_C_ANALYSIS and False:
	115	benchmarkNames = idx_to_name
	116	benchmarkCount = len(benchmarkNames)
	117	numJobs = len(paired_times[0][0])
	118
	119	reliableNames=["ndes", "cjpeg_wrbmp", "adpcm_enc", "cjpeg_transupp", "epic", "gsm_dec", "h264_dec", "huff_enc", "rijndael_enc", "rijndael_dec", "gsm_enc", "ammunition", "mpeg2"]
	120
	121	#stats.pearsonr(time[b1][b2], oList),
	122
	123	with open("weakRelPairs_offset.csv", mode="w+") as f3:
	124	print("Benchmark1", "Benchmark2", "minOffset", "maxOffset", "meanOffset", "meddOffset", "stdOffset", "minLength", "maxLength", sep=",", file=f3)
	125	for b1 in range (0, benchmarkCount):
	126	for b2 in range (0, benchmarkCount):
	127	if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames:
	128	#exclude last job due to inccurate timing
	129	oList = paired_offsets[b1][b2][:numJobs-1]
	130	jList = paired_times[b1][b2][:numJobs-1]
	131	# plt.scatter(oList, jList)
	132	# plt.title(benchmarkNames[b1] + ", " + benchmarkNames[b2])
	133	# plt.show()
	134	# print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), stats.pearsonr(jList, oList), stats.spearmanr(jList, oList), sep=",", file=f3)
	135	print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), min(jList), max(jList), sep=",", file=f3)
	136	"""
	137	#with open("reliableGraphs.csv", mode="x") as f3:
	138	for b1 in range(0, benchmarkCount):
	139	for b2 in range(0, benchmarkCount):
	140	if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames:
	141	oList = offset[b1][b2][:numJobs - 1]
	142	jList=time[b1][b2][:numJobs-1]
	143	# offset, time scatterplot
	144	plt.scatter(oList, jList)
	145	plt.title(benchmarkNames[b1] + " " + benchmarkNames[b2] + " Offsets v. Time")
	146	plt.show()
	147	#time histogram
	148	#plt.hist(jList, bins=10)
	149	#plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Completion Times")
	150	#plt.show()
	151	#offset histogram
	152	#plt.hist(oList, bins=10)
	153	#plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Offsets")
	154	#plt.show()
	155	"""