smt_analysis/computeLCslowdown.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

#!/usr/bin/python3
import numpy as np
import sys
import plotille.plotille as plt
from libSMT import *
TIMING_ERROR = 1000 #ns
ASYNC_FORMAT = False

def print_usage():
    print("This program takes in the all-pairs and baseline SMT data and computes the worst-case slowdown against any other task when SMT is enabled.", file=sys.stderr)
    print("Level-A/B usage: {} <file -A> <file -B> <baseline file>".format(sys.argv[0]), file=sys.stderr)
    print("Level-C usage: {} <continuous pairs> <baseline file>".format(sys.argv[0]), file=sys.stderr)

# Check that we got the right number of parameters
if len(sys.argv) < 3:
    print_usage()
    exit()

if len(sys.argv) > 3:
    print("Reading file using synchronous pair format...")
    print("Are you sure you want to do this? For the RTAS'21 paper, L-A/-B pairs should use the other script.")
    input("Press enter to continue, Ctrl+C to exit...")
else:
    print("Reading file using asynchronous pair format...")
    ASYNC_FORMAT = True

assert_valid_input_files(sys.argv[1:-1], print_usage)

# Pull in the data
if not ASYNC_FORMAT:
    baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[3])
    paired_times, paired_offsets, name_to_idx, idx_to_name = load_paired(sys.argv[1], sys.argv[2], len(list(baseline_times.keys())))
    for key in baseline_times:
        print(key,max(baseline_times[key]))
else:
    baseline_times, baseline_sample_cnt, baseline_max_times = load_baseline(sys.argv[2])
    paired_times, name_to_idx, idx_to_name = load_fake_paired(sys.argv[1])

# We work iff the baseline was run for the same set of benchmarks as the pairs were
assert_base_and_pair_keys_match(baseline_times, name_to_idx)

# Only consider benchmarks that are at least an order of magnitude longer than the timing error
reliableNames = []
for i in range(0, len(name_to_idx)):
    benchmark = idx_to_name[i]
    if min(baseline_times[benchmark]) > TIMING_ERROR * 10:
        reliableNames.append(benchmark)

# Compute worst-case SMT slowdown for each benchmark
print("Bench           Mi")
# Print rows
sample_f = np.mean # Change this to np.mean to use mean values in Mi generation
M_vals = []
for b1 in reliableNames:
    print("{:<14.14}:".format(b1), end=" ")
    max_mi = 0
    # Scan through everyone we ran against and find our maximum slowdown
    for b2 in reliableNames:
        time_with_smt = sample_f(paired_times[name_to_idx[b1]][name_to_idx[b2]])
        time_wout_smt = sample_f(baseline_times[b1])
        M = time_with_smt / time_wout_smt
        max_mi = max(max_mi, M)
    print("{:>10.3}".format(max_mi), end=" ")
    M_vals.append(max_mi)
    print("")
# Print some statistics about the distribution
print("Average: {:>5.3} with standard deviation {:>5.3} using `{}`".format(np.mean(M_vals), np.std(M_vals), sample_f.__name__))
Ms = np.asarray(M_vals, dtype=np.float32)
print(np.sum(Ms <= 1), "of", len(M_vals), "M_i values are at most one -", 100*np.sum(Ms <= 1)/len(M_vals), "percent")
print(np.sum(Ms > 2), "of", len(M_vals), "M_i values are greater than two -", 100*np.sum(Ms > 2)/len(M_vals), "percent")
M_vals_to_plot = Ms

print(plt.hist(M_vals_to_plot, bins=10))