From 4f0a2f94f90c1d0cd5f3408a55e68b68a8c74693 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Thu, 22 Oct 2020 18:57:47 -0400 Subject: Add script for computing M_i values Remember, M_i values are the worst-case slowdown of the average- -case execution time. Also factor out functions shared with M_ij computation code to a shared library. --- smt_analysis/libSMT.py | 151 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100755 smt_analysis/libSMT.py (limited to 'smt_analysis/libSMT.py') diff --git a/smt_analysis/libSMT.py b/smt_analysis/libSMT.py new file mode 100755 index 0000000..cca2fce --- /dev/null +++ b/smt_analysis/libSMT.py @@ -0,0 +1,151 @@ +import numpy as np +import sys +import os + +def assert_valid_input_files(names, on_fail): + # Check that all input files are valid + for f in names: + if not os.path.exists(f) or os.path.getsize(f) == 0: + print("ERROR: File '{}' does not exist or is empty".format(f), file=sys.stderr); + on_fail() + exit() + +# This parses the result data from unthreaded timing experiments +# @param f File name to load +# @returns res Map of benchmark name to sample count +# @returns samples Map of benchmark name to list of execution time samples +# @returns max_res May of benchmark to maximum execution time among all samples for that benchmark +def load_baseline(f): + # constants for columns of baseline data files + TOTAL_NS = 5 + BENCH_NAME = 0 + SAMPLES = 4 + + # Load baseline data. This logic is based off the summarize programs + res = {} # Map of benchmark to list of all execution time samples + samples = {} # Map of benchmark name to sample count + max_res = {} # Map of benchmark name to maximum execution time + + with open(f) as fp: + for line in fp: + s = line.split() + if s[BENCH_NAME] not in res: + res[s[BENCH_NAME]] = list([int(s[TOTAL_NS])]) + samples[s[BENCH_NAME]] = int(s[SAMPLES]) + max_res[s[BENCH_NAME]] = int(s[TOTAL_NS]) + else: + res[s[BENCH_NAME]].append(int(s[TOTAL_NS])) + max_res[s[BENCH_NAME]] = max(int(s[TOTAL_NS]), max_res[s[BENCH_NAME]]) + return res, samples, max_res + +# This parses the result data from paired, threaded timing experiements +# @param file1 The -A file name +# @param file2 The -B file name +# @returns time 2D array of benchmark IDs to list of total container execution times +# @returns offset 2D array of benchmark IDs to list of differences between the start +# of the first and the start of the second benchmark +# @returns name_to_idx Map of benchmark names to benchmark IDs +# @returns idx_to_name List which when indexed with benchmark ID will yield the benchmark name +def load_paired(file1, file2, benchmarkCount): + # constants for columns of paired data files + FIRST_PROG = 0 + SECOND_PROG = 1 + FIRST_CORE = 2 + SECOND_CORE = 3 + TRIALS = 4 + START_S = 5 # Start seconds + START_N = 6 # Start nanoseconds + END_S = 7 # End seconds + END_N = 8 # End nanoseconds + RUN_ID = 9 + JOB_NUM = 10 + + with open(file1) as f1: + numJobs = int(f1.readline().split()[TRIALS]) + assert numJobs > 0 + assert benchmarkCount > 0 + + # Total times of each container + time=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)] + # Difference in time between when the first and the second task start in the container + offset=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)] + + # Some aggregate counters that we update as we go along + avg_off = 0 + avg_off_samp = 0 + + # Load paired data + bench1 = 0 # Index to what's the current first benchmark being examined + bench2 = 0 # Index to what's the current second benchmark being examined + + name_to_idx = {} + idx_to_name = [0 for x in range(benchmarkCount)] + + job_idx = 0 + with open(file1) as f1, open(file2) as f2: + for line1, line2 in zip(f1, f2): + lineArr1 = line1.split() + lineArr2 = line2.split() + start1 = int(lineArr1[START_S]) * 10**9 + int(lineArr1[START_N]) + start2 = int(lineArr2[START_S]) * 10**9 + int(lineArr2[START_N]) + minStart = min(start1, start2) + end1 = int(lineArr1[END_S]) * 10**9 + int(lineArr1[END_N]) + end2 = int(lineArr2[END_S]) * 10**9 + int(lineArr2[END_N]) + maxEnd = max(end1, end2) + # Time actually co-scheduled is minEnd - maxStart, but Sims uses a different model +# time[bench1][bench2][int(lineArr1[JOB_NUM])] = maxEnd - minStart + time[bench1][bench2][job_idx] = maxEnd - minStart + if lineArr1[SECOND_PROG] == "h264_dec" and lineArr2[JOB_NUM] == 0: + print(maxEnd - minStart) + # Compute offset: if first job starts at t=0, when does second start? +# offset[bench1][bench2][int(lineArr1[JOB_NUM])] = abs(start2-start1) + offset[bench1][bench2][job_idx] = abs(start2-start1) + # Compute some running statistics + avg_off += abs(start2-start1) + avg_off_samp += 1 + # Increment to the next benchmark, this is weird because of the zip() + # This is doubly weird because our results are an upper trianguler matrix + if job_idx == numJobs - 1: #int(lineArr1[JOB_NUM]) == numJobs - 1: + if bench2 < benchmarkCount-1: + bench2 = bench2 + 1 + job_idx = 0 + else: + name_to_idx[lineArr1[FIRST_PROG]] = bench1 + idx_to_name[bench1] = lineArr1[FIRST_PROG] + bench1 = bench1 + 1 + bench2 = bench1 # bench1 will never again appear as bench2 + job_idx = 0 + else: + job_idx += 1 + print("Average offset is: " + str(avg_off/avg_off_samp) + "ns") + return time, offset, name_to_idx, idx_to_name + +# Paired times use an abuse of the baseline file format +def load_fake_paired(fake_paired_filename): + paired_times_raw, _, _ = load_baseline(fake_paired_filename) + benchmarkCount = int(np.sqrt(len(list(paired_times_raw.keys())))) + numJobs = len(next(iter(paired_times_raw.values()))) + paired_times=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)] + idx_to_name=[] + name_to_idx={} + bench1 = -1 + #Generate the indexing approach + for pair in sorted(paired_times_raw.keys()): + [bench1name, bench2name] = pair.split('+') # Benchmark name is pair concatenated together with a '+' delimiter + if bench1 == -1 or bench1name != idx_to_name[-1]: + idx_to_name.append(bench1name) + name_to_idx[bench1name] = len(idx_to_name) - 1 + bench1 += 1 + # Populate the array + for bench1 in range(len(idx_to_name)): + for bench2 in range(len(idx_to_name)): + paired_times[bench1][bench2] = paired_times_raw[idx_to_name[bench1]+"+"+idx_to_name[bench2]] + return paired_times, name_to_idx, idx_to_name + +def assert_base_and_pair_keys_match(baseline_times, name_to_idx): + if sorted(baseline_times.keys()) != sorted(name_to_idx.keys()): + print("ERROR: The baseline and paired experiments were over a different set of benchmarks!", file=sys.stderr) + print("Baseline keys:", baseline_times.keys(), file=sys.stderr) + print("Paired keys:", name_to_idx.keys(), file=sys.stderr) + exit(); + -- cgit v1.2.2