1 files changed, 151 insertions, 0 deletions
diff --git a/smt_analysis/libSMT.py b/smt_analysis/libSMT.py
new file mode 100755
index 0000000..cca2fce
--- /dev/null
+++ b/smt_analysis/libSMT.py
@@ -0,0 +1,151 @@
+import numpy as np
+import sys
+import os
+def assert_valid_input_files(names, on_fail):
+    # Check that all input files are valid
+    for f in names:
+        if not os.path.exists(f) or os.path.getsize(f) == 0:
+            print("ERROR: File '{}' does not exist or is empty".format(f), file=sys.stderr);
+            on_fail()
+            exit()
+# This parses the result data from unthreaded timing experiments
+# @param f File name to load
+# @returns res Map of benchmark name to sample count
+# @returns samples Map of benchmark name to list of execution time samples
+# @returns max_res May of benchmark to maximum execution time among all samples for that benchmark
+def load_baseline(f):
+    # constants for columns of baseline data files
+    TOTAL_NS = 5
+    BENCH_NAME = 0
+    SAMPLES = 4
+    # Load baseline data. This logic is based off the summarize programs
+    res = {} # Map of benchmark to list of all execution time samples
+    samples = {} # Map of benchmark name to sample count
+    max_res = {} # Map of benchmark name to maximum execution time
+    with open(f) as fp:
+        for line in fp:
+            s = line.split()
+            if s[BENCH_NAME] not in res:
+                res[s[BENCH_NAME]] = list([int(s[TOTAL_NS])])
+                samples[s[BENCH_NAME]] = int(s[SAMPLES])
+                max_res[s[BENCH_NAME]] = int(s[TOTAL_NS])
+            else:
+                res[s[BENCH_NAME]].append(int(s[TOTAL_NS]))
+                max_res[s[BENCH_NAME]] = max(int(s[TOTAL_NS]), max_res[s[BENCH_NAME]])
+    return res, samples, max_res
+# This parses the result data from paired, threaded timing experiements
+# @param file1 The -A file name
+# @param file2 The -B file name
+# @returns time 2D array of benchmark IDs to list of total container execution times
+# @returns offset 2D array of benchmark IDs to list of differences between the start
+#                 of the first and the start of the second benchmark
+# @returns name_to_idx Map of benchmark names to benchmark IDs
+# @returns idx_to_name List which when indexed with benchmark ID will yield the benchmark name
+def load_paired(file1, file2, benchmarkCount):
+    # constants for columns of paired data files
+    FIRST_PROG = 0
+    SECOND_PROG = 1
+    FIRST_CORE = 2
+    SECOND_CORE = 3
+    TRIALS = 4
+    START_S = 5 # Start seconds
+    START_N = 6 # Start nanoseconds
+    END_S = 7   # End seconds
+    END_N = 8   # End nanoseconds
+    RUN_ID = 9
+    JOB_NUM = 10
+    with open(file1) as f1:
+        numJobs = int(f1.readline().split()[TRIALS])
+    assert numJobs > 0
+    assert benchmarkCount > 0
+    # Total times of each container
+    time=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
+    # Difference in time between when the first and the second task start in the container
+    offset=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
+    # Some aggregate counters that we update as we go along
+    avg_off = 0
+    avg_off_samp = 0
+    # Load paired data
+    bench1 = 0 # Index to what's the current first benchmark being examined
+    bench2 = 0 # Index to what's the current second benchmark being examined
+    name_to_idx = {}
+    idx_to_name = [0 for x in range(benchmarkCount)]
+    job_idx = 0
+    with open(file1) as f1, open(file2) as f2:
+        for line1, line2 in zip(f1, f2):
+            lineArr1 = line1.split()
+            lineArr2 = line2.split()
+            start1 = int(lineArr1[START_S]) * 10**9 + int(lineArr1[START_N])
+            start2 = int(lineArr2[START_S]) * 10**9 + int(lineArr2[START_N])
+            minStart = min(start1, start2)
+            end1 = int(lineArr1[END_S]) * 10**9 + int(lineArr1[END_N])
+            end2 = int(lineArr2[END_S]) * 10**9 + int(lineArr2[END_N])
+            maxEnd = max(end1, end2)
+            # Time actually co-scheduled is minEnd - maxStart, but Sims uses a different model
+#            time[bench1][bench2][int(lineArr1[JOB_NUM])] = maxEnd - minStart
+            time[bench1][bench2][job_idx] = maxEnd - minStart
+            if lineArr1[SECOND_PROG] == "h264_dec" and lineArr2[JOB_NUM] == 0:
+                print(maxEnd - minStart)
+            # Compute offset: if first job starts at t=0, when does second start?
+#            offset[bench1][bench2][int(lineArr1[JOB_NUM])] = abs(start2-start1)
+            offset[bench1][bench2][job_idx] = abs(start2-start1)
+            # Compute some running statistics
+            avg_off += abs(start2-start1)
+            avg_off_samp += 1
+            # Increment to the next benchmark, this is weird because of the zip()
+            # This is doubly weird because our results are an upper trianguler matrix
+            if job_idx == numJobs - 1: #int(lineArr1[JOB_NUM]) == numJobs - 1:
+                if bench2 < benchmarkCount-1:
+                    bench2 = bench2 + 1
+                    job_idx = 0
+                else:
+                    name_to_idx[lineArr1[FIRST_PROG]] = bench1
+                    idx_to_name[bench1] = lineArr1[FIRST_PROG]
+                    bench1 = bench1 + 1
+                    bench2 = bench1 # bench1 will never again appear as bench2
+                    job_idx = 0
+            else:
+                job_idx += 1
+    print("Average offset is: " + str(avg_off/avg_off_samp) + "ns")
+    return time, offset, name_to_idx, idx_to_name
+# Paired times use an abuse of the baseline file format
+def load_fake_paired(fake_paired_filename):
+    paired_times_raw, _, _ = load_baseline(fake_paired_filename)
+    benchmarkCount = int(np.sqrt(len(list(paired_times_raw.keys()))))
+    numJobs = len(next(iter(paired_times_raw.values())))
+    paired_times=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
+    idx_to_name=[]
+    name_to_idx={}
+    bench1 = -1
+    #Generate the indexing approach
+    for pair in sorted(paired_times_raw.keys()):
+        [bench1name, bench2name] = pair.split('+') # Benchmark name is pair concatenated together with a '+' delimiter
+        if bench1 == -1 or bench1name != idx_to_name[-1]:
+            idx_to_name.append(bench1name)
+            name_to_idx[bench1name] = len(idx_to_name) - 1
+            bench1 += 1
+    # Populate the array
+    for bench1 in range(len(idx_to_name)):
+        for bench2 in range(len(idx_to_name)):
+            paired_times[bench1][bench2] = paired_times_raw[idx_to_name[bench1]+"+"+idx_to_name[bench2]]
+    return paired_times, name_to_idx, idx_to_name
+def assert_base_and_pair_keys_match(baseline_times, name_to_idx):
+    if sorted(baseline_times.keys()) != sorted(name_to_idx.keys()):
+        print("ERROR: The baseline and paired experiments were over a different set of benchmarks!", file=sys.stderr)
+        print("Baseline keys:", baseline_times.keys(), file=sys.stderr)
+        print("Paired keys:", name_to_idx.keys(), file=sys.stderr)
+        exit();

diff --git a/smt_analysis/libSMT.py b/smt_analysis/libSMT.py new file mode 100755 index 0000000..cca2fce --- /dev/null +++ b/smt_analysis/libSMT.py
@@ -0,0 +1,151 @@
	1	import numpy as np
	2	import sys
	3	import os
	4
	5	def assert_valid_input_files(names, on_fail):
	6	# Check that all input files are valid
	7	for f in names:
	8	if not os.path.exists(f) or os.path.getsize(f) == 0:
	9	print("ERROR: File '{}' does not exist or is empty".format(f), file=sys.stderr);
	10	on_fail()
	11	exit()
	12
	13	# This parses the result data from unthreaded timing experiments
	14	# @param f File name to load
	15	# @returns res Map of benchmark name to sample count
	16	# @returns samples Map of benchmark name to list of execution time samples
	17	# @returns max_res May of benchmark to maximum execution time among all samples for that benchmark
	18	def load_baseline(f):
	19	# constants for columns of baseline data files
	20	TOTAL_NS = 5
	21	BENCH_NAME = 0
	22	SAMPLES = 4
	23
	24	# Load baseline data. This logic is based off the summarize programs
	25	res = {} # Map of benchmark to list of all execution time samples
	26	samples = {} # Map of benchmark name to sample count
	27	max_res = {} # Map of benchmark name to maximum execution time
	28
	29	with open(f) as fp:
	30	for line in fp:
	31	s = line.split()
	32	if s[BENCH_NAME] not in res:
	33	res[s[BENCH_NAME]] = list([int(s[TOTAL_NS])])
	34	samples[s[BENCH_NAME]] = int(s[SAMPLES])
	35	max_res[s[BENCH_NAME]] = int(s[TOTAL_NS])
	36	else:
	37	res[s[BENCH_NAME]].append(int(s[TOTAL_NS]))
	38	max_res[s[BENCH_NAME]] = max(int(s[TOTAL_NS]), max_res[s[BENCH_NAME]])
	39	return res, samples, max_res
	40
	41	# This parses the result data from paired, threaded timing experiements
	42	# @param file1 The -A file name
	43	# @param file2 The -B file name
	44	# @returns time 2D array of benchmark IDs to list of total container execution times
	45	# @returns offset 2D array of benchmark IDs to list of differences between the start
	46	# of the first and the start of the second benchmark
	47	# @returns name_to_idx Map of benchmark names to benchmark IDs
	48	# @returns idx_to_name List which when indexed with benchmark ID will yield the benchmark name
	49	def load_paired(file1, file2, benchmarkCount):
	50	# constants for columns of paired data files
	51	FIRST_PROG = 0
	52	SECOND_PROG = 1
	53	FIRST_CORE = 2
	54	SECOND_CORE = 3
	55	TRIALS = 4
	56	START_S = 5 # Start seconds
	57	START_N = 6 # Start nanoseconds
	58	END_S = 7 # End seconds
	59	END_N = 8 # End nanoseconds
	60	RUN_ID = 9
	61	JOB_NUM = 10
	62
	63	with open(file1) as f1:
	64	numJobs = int(f1.readline().split()[TRIALS])
	65	assert numJobs > 0
	66	assert benchmarkCount > 0
	67
	68	# Total times of each container
	69	time=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
	70	# Difference in time between when the first and the second task start in the container
	71	offset=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
	72
	73	# Some aggregate counters that we update as we go along
	74	avg_off = 0
	75	avg_off_samp = 0
	76
	77	# Load paired data
	78	bench1 = 0 # Index to what's the current first benchmark being examined
	79	bench2 = 0 # Index to what's the current second benchmark being examined
	80
	81	name_to_idx = {}
	82	idx_to_name = [0 for x in range(benchmarkCount)]
	83
	84	job_idx = 0
	85	with open(file1) as f1, open(file2) as f2:
	86	for line1, line2 in zip(f1, f2):
	87	lineArr1 = line1.split()
	88	lineArr2 = line2.split()
	89	start1 = int(lineArr1[START_S]) * 10**9 + int(lineArr1[START_N])
	90	start2 = int(lineArr2[START_S]) * 10**9 + int(lineArr2[START_N])
	91	minStart = min(start1, start2)
	92	end1 = int(lineArr1[END_S]) * 10**9 + int(lineArr1[END_N])
	93	end2 = int(lineArr2[END_S]) * 10**9 + int(lineArr2[END_N])
	94	maxEnd = max(end1, end2)
	95	# Time actually co-scheduled is minEnd - maxStart, but Sims uses a different model
	96	# time[bench1][bench2][int(lineArr1[JOB_NUM])] = maxEnd - minStart
	97	time[bench1][bench2][job_idx] = maxEnd - minStart
	98	if lineArr1[SECOND_PROG] == "h264_dec" and lineArr2[JOB_NUM] == 0:
	99	print(maxEnd - minStart)
	100	# Compute offset: if first job starts at t=0, when does second start?
	101	# offset[bench1][bench2][int(lineArr1[JOB_NUM])] = abs(start2-start1)
	102	offset[bench1][bench2][job_idx] = abs(start2-start1)
	103	# Compute some running statistics
	104	avg_off += abs(start2-start1)
	105	avg_off_samp += 1
	106	# Increment to the next benchmark, this is weird because of the zip()
	107	# This is doubly weird because our results are an upper trianguler matrix
	108	if job_idx == numJobs - 1: #int(lineArr1[JOB_NUM]) == numJobs - 1:
	109	if bench2 < benchmarkCount-1:
	110	bench2 = bench2 + 1
	111	job_idx = 0
	112	else:
	113	name_to_idx[lineArr1[FIRST_PROG]] = bench1
	114	idx_to_name[bench1] = lineArr1[FIRST_PROG]
	115	bench1 = bench1 + 1
	116	bench2 = bench1 # bench1 will never again appear as bench2
	117	job_idx = 0
	118	else:
	119	job_idx += 1
	120	print("Average offset is: " + str(avg_off/avg_off_samp) + "ns")
	121	return time, offset, name_to_idx, idx_to_name
	122
	123	# Paired times use an abuse of the baseline file format
	124	def load_fake_paired(fake_paired_filename):
	125	paired_times_raw, _, _ = load_baseline(fake_paired_filename)
	126	benchmarkCount = int(np.sqrt(len(list(paired_times_raw.keys()))))
	127	numJobs = len(next(iter(paired_times_raw.values())))
	128	paired_times=[[[0 for x in range(numJobs)]for y in range(benchmarkCount)]for z in range(benchmarkCount)]
	129	idx_to_name=[]
	130	name_to_idx={}
	131	bench1 = -1
	132	#Generate the indexing approach
	133	for pair in sorted(paired_times_raw.keys()):
	134	[bench1name, bench2name] = pair.split('+') # Benchmark name is pair concatenated together with a '+' delimiter
	135	if bench1 == -1 or bench1name != idx_to_name[-1]:
	136	idx_to_name.append(bench1name)
	137	name_to_idx[bench1name] = len(idx_to_name) - 1
	138	bench1 += 1
	139	# Populate the array
	140	for bench1 in range(len(idx_to_name)):
	141	for bench2 in range(len(idx_to_name)):
	142	paired_times[bench1][bench2] = paired_times_raw[idx_to_name[bench1]+"+"+idx_to_name[bench2]]
	143	return paired_times, name_to_idx, idx_to_name
	144
	145	def assert_base_and_pair_keys_match(baseline_times, name_to_idx):
	146	if sorted(baseline_times.keys()) != sorted(name_to_idx.keys()):
	147	print("ERROR: The baseline and paired experiments were over a different set of benchmarks!", file=sys.stderr)
	148	print("Baseline keys:", baseline_times.keys(), file=sys.stderr)
	149	print("Paired keys:", name_to_idx.keys(), file=sys.stderr)
	150	exit();
	151