#!/usr/bin/python3 from typing import List, Any import numpy as np from scipy import stats import sys import os import plotille.plotille as plt TIMING_ERROR = 1000 #ns LEVEL_C_ANALYSIS = False def print_usage(argv): print("This program takes in the all-pairs and baseline SMT data and computes how much each program is slowed when SMT in enabled.", file=sys.stderr) print("Level-A/B usage: {} --cij".format(argv[0]), file=sys.stderr) print("Level-C usage: {} ".format(argv[0]), file=sys.stderr) # Check that we got the right number of parameters if len(sys.argv) < 3: print_usage(sys.argv) exit() if len(sys.argv) > 3: print("Analyzing results using Level-A/B methodology...") else: print("Analyzing results using Level-C methodology...") LEVEL_C_ANALYSIS = True # Check that all input files are valid for f in sys.argv[1:-1]: if not os.path.exists(f) or os.path.getsize(f) == 0: print("ERROR: File '{}' does not exist or is empty".format(f), file=sys.stderr); print_usage(sys.argv) exit() # Print Cij values rather than Mij TIMES_ONLY = len(sys.argv) > 4 and "--cij" in sys.argv[4] OK_PAIRS_ONLY = len(sys.argv) > 4 and "--cij-ok" in sys.argv[4] # This parses the result data from unthreaded timing experiments # @param f File name to load # @returns res Map of benchmark name to sample count # @returns samples Map of benchmark name to list of execution time samples # @returns max_res May of benchmark to maximum execution time among all samples for that benchmark def load_baseline(f): # constants for columns of baseline data files TOTAL_NS = 5 BENCH_NAME = 0 SAMPLES = 4 # Load baseline data. This logic is based off the summarize programs res = {} # Map of benchmark to list of all execution time samples samples = {} # Map of benchmark name to sample count max_res = {} # Map of benchmark name to maximum execution time with open(f) as fp: for line in fp: s = line.split() if s[BENCH_NAME] not in res: res[s[BENCH_NAME]] = list([int(s[TOTAL_NS])]) samples[s[BENCH_NAME]] = int(s[SAMPLES]) max_res[s[BENCH_NAME]] = int(s[TOTAL_NS]) else: res[s[BENCH_NAME]].append(int(s[TOTAL_NS])) max_res[s[BENCH_NAME]] = max(int(s[TOTAL_NS]), max_res[s[BENCH_NAME]]) return res, samples, max_res # This parses the result data from paired, threaded timing experiements # @param file1 The -A file name # @param file2 The -B file name # @returns time 2D array of benchmark IDs to list of total container execution times # @returns offset 2D array of benchmark IDs to list of differences between the start # of the first and the start of the second benchmark # @returns name_to_idx Map of benchmark names to benchmark IDs # @returns idx_to_name List which when indexed with benchmark ID will yield the benchmark name def load_paired(file1, file2, benchmarkCount): # constants for columns of paired data files FIRST_PROG = 0 SECOND_PROG = 1 FIRST_CORE = 2 SECOND_CORE = 3 TRIALS = 4 START_S = 5 # Start seconds START_N = 6 # Start nanoseconds END_S = 7 # End seconds END_N = 8 # End nanoseconds RUN_ID = 9 JOB_NUM = 10 with open(file1) as f1: numJobs = int(f1.readline().split()[TRIALS]) assert numJobs > 0 assert benchmarkCount > 0 # Total times of each container time=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)] # Difference in time between when the first and the second task start in the container offset=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)] # Some aggregate counters that we update as we go along avg_off = 0 avg_off_samp = 0 # Load paired data bench1 = 0 # Index to what's the current first benchmark being examined bench2 = 0 # Index to what's the current second benchmark being examined name_to_idx = {} idx_to_name = [0 for x in range(benchmarkCount)] job_idx = 0 with open(file1) as f1, open(file2) as f2: for line1, line2 in zip(f1, f2): lineArr1 = line1.split() lineArr2 = line2.split() start1 = int(lineArr1[START_S]) * 10**9 + int(lineArr1[START_N]) start2 = int(lineArr2[START_S]) * 10**9 + int(lineArr2[START_N]) minStart = min(start1, start2) end1 = int(lineArr1[END_S]) * 10**9 + int(lineArr1[END_N]) end2 = int(lineArr2[END_S]) * 10**9 + int(lineArr2[END_N]) maxEnd = max(end1, end2) # Time actually co-scheduled is minEnd - maxStart, but Sims uses a different model # time[bench1][bench2][int(lineArr1[JOB_NUM])] = maxEnd - minStart time[bench1][bench2][job_idx] = maxEnd - minStart if lineArr1[SECOND_PROG] == "h264_dec" and lineArr2[JOB_NUM] == 0: print(maxEnd - minStart) # Compute offset: if first job starts at t=0, when does second start? # offset[bench1][bench2][int(lineArr1[JOB_NUM])] = abs(start2-start1) offset[bench1][bench2][job_idx] = abs(start2-start1) # Compute some running statistics avg_off += abs(start2-start1) avg_off_samp += 1 # Increment to the next benchmark, this is weird because of the zip() # This is doubly weird because our results are an upper trianguler matrix if job_idx == numJobs - 1: #int(lineArr1[JOB_NUM]) == numJobs - 1: if bench2 < benchmarkCount-1: bench2 = bench2 + 1 job_idx = 0 else: name_to_idx[lineArr1[FIRST_PROG]] = bench1 idx_to_name[bench1] = lineArr1[FIRST_PROG] bench1 = bench1 + 1 bench2 = bench1 # bench1 will never again appear as bench2 job_idx = 0 else: job_idx += 1 print("Average offset is: " + str(avg_off/avg_off_samp) + "ns") return time, offset, name_to_idx, idx_to_name # Pull in the data if not LEVEL_C_ANALYSIS: baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[3]) paired_times, paired_offsets, name_to_idx, idx_to_name = load_paired(sys.argv[1], sys.argv[2], len(list(baseline_times.keys()))) for key in baseline_times: print(key,max(baseline_times[key])) else: baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[2]) # Paired times use an abuse of the baseline file format paired_times_raw, _, _ = load_baseline(sys.argv[1]) benchmarkCount = int(np.sqrt(len(list(paired_times_raw.keys())))) numJobs = len(next(iter(paired_times_raw.values()))) paired_times=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)] idx_to_name=[] name_to_idx={} bench1 = -1 #Generate the indexing approach for pair in sorted(paired_times_raw.keys()): [bench1name, bench2name] = pair.split('+') # Benchmark name is pair concatenated together with a '+' delimiter if bench1 == -1 or bench1name != idx_to_name[-1]: idx_to_name.append(bench1name) name_to_idx[bench1name] = len(idx_to_name) - 1 bench1 += 1 # Populate the array for bench1 in range(len(idx_to_name)): for bench2 in range(len(idx_to_name)): paired_times[bench1][bench2] = paired_times_raw[idx_to_name[bench1]+"+"+idx_to_name[bench2]] # We work iff the baseline was run for the same set of benchmarks as the pairs were if sorted(baseline_times.keys()) != sorted(name_to_idx.keys()): print("ERROR: The baseline and paired experiments were over a different set of benchmarks!", file=sys.stderr) print("Baseline keys:", baseline_times.keys(), file=sys.stderr) print("Paired keys:", name_to_idx.keys(), file=sys.stderr) exit(); # Only consider benchmarks that are at least an order of magnitude longer than the timing error reliableNames = [] for i in range(0, len(name_to_idx)): benchmark = idx_to_name[i] if min(baseline_times[benchmark]) > TIMING_ERROR * 10: reliableNames.append(benchmark) # Compute SMT slowdown for each benchmark # Output format: table, each row is one benchmark and each column is one benchmark # each cell is base1 + base2*m = pair solved for m, aka (pair - base1) / base2 # Print table header print("Bench ", end=" ") for name in reliableNames: if not TIMES_ONLY: print("{:<10.10}".format(name), end=" ") if TIMES_ONLY: print("{:<12.12}".format(name), end=" ") print() # Print rows sample_f = max # Change this to np.mean to use mean values in Mij generation M_vals = [] for b1 in reliableNames: if not TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ") if TIMES_ONLY: print("{:<14.14}:".format(b1), end=" ") for b2 in reliableNames: if not LEVEL_C_ANALYSIS: Ci = max(sample_f(baseline_times[b1]), sample_f(baseline_times[b2])) Cj = min(sample_f(baseline_times[b1]), sample_f(baseline_times[b2])) Cij = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]]) if False: M = np.std(paired_times[name_to_idx[b1]][name_to_idx[b2]]) / np.mean(paired_times[name_to_idx[b1]][name_to_idx[b2]]) else: M = (Cij - Ci) / Cj if Cij and Cj * 10 > Ci: # We don't pair tasks with more than a 10x difference in length M_vals.append(M) if not TIMES_ONLY: print("{:>10.3}".format(M), end=" ") else: if not TIMES_ONLY: print("{:>10}".format("N/A"), end=" ") if TIMES_ONLY and (not OK_PAIRS_ONLY or Cj * 10 > Ci): print("{:>12}".format(Cij), end=" ") elif OK_PAIRS_ONLY and Cj * 10 <= Ci: print("{:>12}".format("0"), end=" ") else: time_with_smt = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]]) time_wout_smt = sample_f(baseline_times[b1]) M = time_with_smt / time_wout_smt M_vals.append(M) print("{:>10.3}".format(M), end=" ") print("") # Print some statistics about the distribution print("Average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(M_vals), np.std(M_vals), sample_f.__name__)) Ms = np.asarray(M_vals, dtype=np.float32) if not LEVEL_C_ANALYSIS: print(np.sum(Ms <= 0), "of", len(M_vals), "M_i:j values are at most zero -", 100*np.sum(Ms <= 0)/len(M_vals), "percent") print(np.sum(Ms > 1), "of", len(M_vals), "M_i:j values are greater than one -", 100*np.sum(Ms > 1)/len(M_vals), "percent") M_vals_to_plot = Ms[np.logical_and(Ms > 0, Ms <= 1)] else: print(np.sum(Ms <= 1), "of", len(M_vals), "M_i:j values are at most one -", 100*np.sum(Ms <= 1)/len(M_vals), "percent") print(np.sum(Ms > 2), "of", len(M_vals), "M_i:j values are greater than two -", 100*np.sum(Ms > 2)/len(M_vals), "percent") M_vals_to_plot = Ms print("Using Sim's analysis, average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(list(M_vals_to_plot)), np.std(list(M_vals_to_plot)), sample_f.__name__)) print(plt.hist(M_vals_to_plot, bins=10)) ##### BELOW TEXT IS OLD OFFSET CODE (patched) ##### ## This still works, but is hacky and deprecated ## ## PearsonR doesn't work though ## if not LEVEL_C_ANALYSIS: benchmarkNames = idx_to_name benchmarkCount = len(benchmarkNames) numJobs = len(paired_times[0][0]) reliableNames=["ndes", "cjpeg_wrbmp", "adpcm_enc", "cjpeg_transupp", "epic", "gsm_dec", "h264_dec", "huff_enc", "rijndael_enc", "rijndael_dec", "gsm_enc", "ammunition", "mpeg2"] #stats.pearsonr(time[b1][b2], oList), with open("weakRelPairs_offset.csv", mode="w+") as f3: print("Benchmark1", "Benchmark2", "minOffset", "maxOffset", "meanOffset", "meddOffset", "stdOffset", "minLength", "maxLength", sep=",", file=f3) for b1 in range (0, benchmarkCount): for b2 in range (0, benchmarkCount): if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames: #exclude last job due to inccurate timing oList = paired_offsets[b1][b2][:numJobs-1] jList = paired_times[b1][b2][:numJobs-1] # plt.scatter(oList, jList) # plt.title(benchmarkNames[b1] + ", " + benchmarkNames[b2]) # plt.show() # print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), stats.pearsonr(jList, oList), stats.spearmanr(jList, oList), sep=",", file=f3) print(benchmarkNames[b1], benchmarkNames[b2], min(oList), max(oList), np.mean(oList), np.median(oList), np.std(oList), min(jList), max(jList), sep=",", file=f3) """ #with open("reliableGraphs.csv", mode="x") as f3: for b1 in range(0, benchmarkCount): for b2 in range(0, benchmarkCount): if benchmarkNames[b1] in reliableNames and benchmarkNames[b2] in reliableNames: oList = offset[b1][b2][:numJobs - 1] jList=time[b1][b2][:numJobs-1] # offset, time scatterplot plt.scatter(oList, jList) plt.title(benchmarkNames[b1] + " " + benchmarkNames[b2] + " Offsets v. Time") plt.show() #time histogram #plt.hist(jList, bins=10) #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Completion Times") #plt.show() #offset histogram #plt.hist(oList, bins=10) #plt.title(benchmarkNames[b1] + benchmarkNames[b2] + "Offsets") #plt.show() """