import numpy as np import sys import os def assert_valid_input_files(names, on_fail): # Check that all input files are valid for f in names: if not os.path.exists(f) or os.path.getsize(f) == 0: print("ERROR: File '{}' does not exist or is empty".format(f), file=sys.stderr); on_fail() exit() # This parses the result data from unthreaded timing experiments # @param f File name to load # @returns res Map of benchmark name to sample count # @returns samples Map of benchmark name to list of execution time samples # @returns max_res May of benchmark to maximum execution time among all samples for that benchmark def load_baseline(f): # constants for columns of baseline data files TOTAL_NS = 5 BENCH_NAME = 0 SAMPLES = 4 # Load baseline data. This logic is based off the summarize programs res = {} # Map of benchmark to list of all execution time samples samples = {} # Map of benchmark name to sample count max_res = {} # Map of benchmark name to maximum execution time with open(f) as fp: for line in fp: s = line.split() if s[BENCH_NAME] not in res: res[s[BENCH_NAME]] = list([int(s[TOTAL_NS])]) samples[s[BENCH_NAME]] = int(s[SAMPLES]) max_res[s[BENCH_NAME]] = int(s[TOTAL_NS]) else: res[s[BENCH_NAME]].append(int(s[TOTAL_NS])) max_res[s[BENCH_NAME]] = max(int(s[TOTAL_NS]), max_res[s[BENCH_NAME]]) return res, samples, max_res # This parses the result data from paired, threaded timing experiements # @param file1 The -A file name # @param file2 The -B file name # @returns time 2D array of benchmark IDs to list of total container execution times # @returns offset 2D array of benchmark IDs to list of differences between the start # of the first and the start of the second benchmark # @returns name_to_idx Map of benchmark names to benchmark IDs # @returns idx_to_name List which when indexed with benchmark ID will yield the benchmark name def load_paired(file1, file2, benchmarkCount): # constants for columns of paired data files FIRST_PROG = 0 SECOND_PROG = 1 FIRST_CORE = 2 SECOND_CORE = 3 TRIALS = 4 START_S = 5 # Start seconds START_N = 6 # Start nanoseconds END_S = 7 # End seconds END_N = 8 # End nanoseconds RUN_ID = 9 JOB_NUM = 10 with open(file1) as f1: numJobs = int(f1.readline().split()[TRIALS]) assert numJobs > 0 assert benchmarkCount > 0 # Total times of each container time=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)] # Difference in time between when the first and the second task start in the container offset=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)] # Some aggregate counters that we update as we go along avg_off = 0 avg_off_samp = 0 # Load paired data bench1 = 0 # Index to what's the current first benchmark being examined bench2 = 0 # Index to what's the current second benchmark being examined name_to_idx = {} idx_to_name = [0 for x in range(benchmarkCount)] job_idx = 0 with open(file1) as f1, open(file2) as f2: for line1, line2 in zip(f1, f2): lineArr1 = line1.split() lineArr2 = line2.split() start1 = int(lineArr1[START_S]) * 10**9 + int(lineArr1[START_N]) start2 = int(lineArr2[START_S]) * 10**9 + int(lineArr2[START_N]) minStart = min(start1, start2) end1 = int(lineArr1[END_S]) * 10**9 + int(lineArr1[END_N]) end2 = int(lineArr2[END_S]) * 10**9 + int(lineArr2[END_N]) maxEnd = max(end1, end2) # Time actually co-scheduled is minEnd - maxStart, but Sims uses a different model # time[bench1][bench2][int(lineArr1[JOB_NUM])] = maxEnd - minStart time[bench1][bench2][job_idx] = maxEnd - minStart if lineArr1[SECOND_PROG] == "h264_dec" and lineArr2[JOB_NUM] == 0: print(maxEnd - minStart) # Compute offset: if first job starts at t=0, when does second start? # offset[bench1][bench2][int(lineArr1[JOB_NUM])] = abs(start2-start1) offset[bench1][bench2][job_idx] = abs(start2-start1) # Compute some running statistics avg_off += abs(start2-start1) avg_off_samp += 1 # Increment to the next benchmark, this is weird because of the zip() # This is doubly weird because our results are an upper trianguler matrix if job_idx == numJobs - 1: #int(lineArr1[JOB_NUM]) == numJobs - 1: if bench2 < benchmarkCount-1: bench2 = bench2 + 1 job_idx = 0 else: name_to_idx[lineArr1[FIRST_PROG]] = bench1 idx_to_name[bench1] = lineArr1[FIRST_PROG] bench1 = bench1 + 1 bench2 = bench1 # bench1 will never again appear as bench2 job_idx = 0 else: job_idx += 1 print("Average offset is: " + str(avg_off/avg_off_samp) + "ns") return time, offset, name_to_idx, idx_to_name # Paired times use an abuse of the baseline file format def load_fake_paired(fake_paired_filename): paired_times_raw, _, _ = load_baseline(fake_paired_filename) benchmarkCount = int(np.sqrt(len(list(paired_times_raw.keys())))) numJobs = len(next(iter(paired_times_raw.values()))) paired_times=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)] idx_to_name=[] name_to_idx={} bench1 = -1 #Generate the indexing approach for pair in sorted(paired_times_raw.keys()): [bench1name, bench2name] = pair.split('+') # Benchmark name is pair concatenated together with a '+' delimiter if bench1 == -1 or bench1name != idx_to_name[-1]: idx_to_name.append(bench1name) name_to_idx[bench1name] = len(idx_to_name) - 1 bench1 += 1 # Populate the array for bench1 in range(len(idx_to_name)): for bench2 in range(len(idx_to_name)): paired_times[bench1][bench2] = paired_times_raw[idx_to_name[bench1]+"+"+idx_to_name[bench2]] return paired_times, name_to_idx, idx_to_name def assert_base_and_pair_keys_match(baseline_times, name_to_idx): if sorted(baseline_times.keys()) != sorted(name_to_idx.keys()): print("ERROR: The baseline and paired experiments were over a different set of benchmarks!", file=sys.stderr) print("Baseline keys:", baseline_times.keys(), file=sys.stderr) print("Paired keys:", name_to_idx.keys(), file=sys.stderr) exit();