#!/usr/bin/python3 from typing import List, Any import numpy as np from scipy import stats import sys import plotille.plotille as plt TIMING_ERROR = 1000 #ns LEVEL_C_ANALYSIS = False from libSMT import * def print_usage(): print("This program takes in the all-pairs and baseline SMT data and computes how much each program is slowed when SMT in enabled.", file=sys.stderr) print("Level-A/B usage: {} --cij".format(sys.argv[0]), file=sys.stderr) print("Level-C usage: {} ".format(sys.argv[0]), file=sys.stderr) # Check that we got the right number of parameters if len(sys.argv) < 3: print_usage() exit() if len(sys.argv) > 3: print("Analyzing results using Level-A/B methodology...") else: print("Analyzing results using Level-C methodology...") LEVEL_C_ANALYSIS = True assert_valid_input_files(sys.argv[1:-1], print_usage); # Print Cij values rather than Mij TIMES_ONLY = len(sys.argv) > 4 and "--cij" in sys.argv[4] OK_PAIRS_ONLY = len(sys.argv) > 4 and "--cij-ok" in sys.argv[4] # Pull in the data if not LEVEL_C_ANALYSIS: baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[3]) paired_times, paired_offsets, name_to_idx, idx_to_name = load_paired(sys.argv[1], sys.argv[2], len(list(baseline_times.keys()))) for key in baseline_times: print(key,max(baseline_times[key])) else: # Paired times use an abuse of the baseline file format baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[2]) paired_times, name_to_idx, idx_to_name = load_fake_paired(sys.argv[1]) # We work iff the baseline was run for the same set of benchmarks as the pairs were assert_base_and_pair_keys_match(baseline_times, name_to_idx) # Only consider benchmarks that are at least an order of magnitude longer than the timing error reliableNames = [] for i in range(0, len(name_to_idx)): benchmark = idx_to_name[i] if min(baseline_times[benchmark]) > TIMING_ERROR * 10: reliableNames.append(benchmark) # Compute SMT slowdown for each benchmark # Output format: table, each row is one benchmark and each column is one benchmark # each cell is base1 + base2*m = pair solved for m, aka (pair - base1) / base2 # Print table header print("Bench ", end=" ") for name in reliableNames: if not TIMES_ONLY: print("{:<10.10}".format(name), end=" ") if TIMES_ONLY: print("{:<12.12}".format(name), end=" ") print() # Print rows sample_f = max # Change this to np.mean to use mean values in Mij generation M_vals = [] for b1 in reliableNames: if not TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ") if TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ") for b2 in reliableNames: if not LEVEL_C_ANALYSIS: Ci = max(sample_f(baseline_times[b1]), sample_f(baseline_times[b2])) Cj = min(sample_f(baseline_times[b1]), sample_f(baseline_times[b2])) Cij = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]]) if False: M = np.std(paired_times[name_to_idx[b1]][name_to_idx[b2]]) / np.mean(paired_times[name_to_idx[b1]][name_to_idx[b2]]) else: M = (Cij - Ci) / Cj if Cij and Cj * 10 > Ci: # We don't pair tasks with more than a 10x difference in length M_vals.append(M) if not TIMES_ONLY: print("{:>10.3}".format(M), end=" ") else: if not TIMES_ONLY: print("{:>10}".format("N/A"), end=" ") if TIMES_ONLY and (not OK_PAIRS_ONLY or Cj * 10 > Ci): print("{:>12}".format(Cij), end=" ") elif OK_PAIRS_ONLY and Cj * 10 <= Ci: print("{:>12}".format("0"), end=" ") else: time_with_smt = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]]) time_wout_smt = sample_f(baseline_times[b1]) M = time_with_smt / time_wout_smt M_vals.append(M) print("{:>10.3}".format(M), end=" ") print("") # Print some statistics about the distribution print("Average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(M_vals), np.std(M_vals), sample_f.__name__)) Ms = np.asarray(M_vals, dtype=np.float32) if not LEVEL_C_ANALYSIS: print(np.sum(Ms <= 0), "of", len(M_vals), "M_i:j values are at most zero -", 100*np.sum(Ms <= 0)/len(M_vals), "percent") print(np.sum(Ms > 1), "of", len(M_vals), "M_i:j values are greater than one -", 100*np.sum(Ms > 1)/len(M_vals), "percent") M_vals_to_plot = Ms[np.logical_and(Ms > 0, Ms <= 1)] else: print(np.sum(Ms <= 1), "of", len(M_vals), "M_i:j values are at most one -", 100*np.sum(Ms <= 1)/len(M_vals), "percent") print(np.sum(Ms > 2), "of", len(M_vals), "M_i:j values are greater than two -", 100*np.sum(Ms > 2)/len(M_vals), "percent") M_vals_to_plot = Ms print("Using Sim's analysis, average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(list(M_vals_to_plot)), np.std(list(M_vals_to_plot)), sample_f.__name__)) print(plt.hist(M_vals_to_plot, bins=10)) ##### BELOW TEXT IS OLD OFFSET CODE (patched) ##### ## This still works, but is hacky and deprecated ## ## PearsonR doesn't work though ## if not LEVEL_C_ANALYSIS and False: benchmarkNames = idx_to_name benchmarkCount = len(benchmarkNames) numJobs = len(paired_times[0][0]) reliableNames=["ndes", "cjpeg_wrbmp", "adpcm_enc", "cjpeg_transupp", "epic", "gsm_dec", "h264_dec", "huff_enc", "rijndael_enc", "rijndael_dec", "gsm_enc", "ammunition", "mpeg2"] #stats.pearsonr(time[b1][b2], oList), with open("weakRelPairs_offset.csv", mode="w+") as f3: print("Benchmark1", "Benchmark2", "minOffset", "maxOffset", "meanOffset", "meddOffset", "stdOffset", "minLength", "maxLength", sep=",", file=f3) for b1 in range (0, benchmarkCount): for b2 in range (0, benchmarkCount): if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames: #exclude last job due to inccurate timing oList = paired_offsets[b1][b2][:numJobs-1] jList = paired_times[b1][b2][:numJobs-1] # plt.scatter(oList, jList) # plt.title(benchmarkNames[b1] + ", " + benchmarkNames[b2]) # plt.show() # print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), stats.pearsonr(jList, oList), stats.spearmanr(jList, oList), sep=",", file=f3) print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), min(jList), max(jList), sep=",", file=f3) """ #with open("reliableGraphs.csv", mode="x") as f3: for b1 in range(0, benchmarkCount): for b2 in range(0, benchmarkCount): if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames: oList = offset[b1][b2][:numJobs - 1] jList=time[b1][b2][:numJobs-1] # offset, time scatterplot plt.scatter(oList, jList) plt.title(benchmarkNames[b1] + " " + benchmarkNames[b2] + " Offsets v. Time") plt.show() #time histogram #plt.hist(jList, bins=10) #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Completion Times") #plt.show() #offset histogram #plt.hist(oList, bins=10) #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Offsets") #plt.show() """