diff options
Diffstat (limited to 'parse')
-rw-r--r-- | parse/__init__.py | 0 | ||||
-rw-r--r-- | parse/dir_map.py | 104 | ||||
-rw-r--r-- | parse/enum.py | 7 | ||||
-rw-r--r-- | parse/ft.py | 60 | ||||
-rw-r--r-- | parse/point.py | 135 | ||||
-rw-r--r-- | parse/sched.py | 89 | ||||
-rw-r--r-- | parse/tuple_table.py | 76 |
7 files changed, 471 insertions, 0 deletions
diff --git a/parse/__init__.py b/parse/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/parse/__init__.py | |||
diff --git a/parse/dir_map.py b/parse/dir_map.py new file mode 100644 index 0000000..6e959f2 --- /dev/null +++ b/parse/dir_map.py | |||
@@ -0,0 +1,104 @@ | |||
1 | import os | ||
2 | |||
3 | from collections import defaultdict | ||
4 | from point import Type | ||
5 | |||
6 | class TreeNode(object): | ||
7 | def __init__(self, parent = None): | ||
8 | self.parent = parent | ||
9 | self.children = defaultdict(lambda : TreeNode(self)) | ||
10 | self.values = [] | ||
11 | |||
12 | class DirMap(object): | ||
13 | def to_csv(self, vals): | ||
14 | val_strs = [] | ||
15 | for key in sorted(vals.keys()): | ||
16 | val_strs += ["%s=%s" % (key, vals[key])] | ||
17 | return "%s.csv" % ("_".join(val_strs)) | ||
18 | |||
19 | def __init__(self, out_dir): | ||
20 | self.root = TreeNode(None) | ||
21 | self.out_dir = out_dir | ||
22 | self.values = [] | ||
23 | |||
24 | def debug_update_node(self, path, keys, value): | ||
25 | self.__update_node(path, keys, value) | ||
26 | |||
27 | def __update_node(self, path, keys, value): | ||
28 | node = self.root | ||
29 | |||
30 | path += [ self.to_csv(keys) ] | ||
31 | for p in path: | ||
32 | node = node.children[p] | ||
33 | |||
34 | node.values += [value] | ||
35 | |||
36 | def add_point(self, vary, vary_value, keys, point): | ||
37 | for stat in point.get_stats(): | ||
38 | summary = point[stat] | ||
39 | |||
40 | for summary_type in Type: | ||
41 | measurement = summary[summary_type] | ||
42 | |||
43 | for base_type in Type: | ||
44 | if not base_type in measurement: | ||
45 | continue | ||
46 | # Ex: wcet/avg/max/vary-type/other-stuff.csv | ||
47 | path = [ stat, summary_type, base_type, "vary-%s" % vary ] | ||
48 | result = measurement[base_type] | ||
49 | |||
50 | self.__update_node(path, keys, (vary_value, result)) | ||
51 | |||
52 | |||
53 | |||
54 | def reduce(self): | ||
55 | def reduce2(node): | ||
56 | for key in node.children.keys(): | ||
57 | child = node.children[key] | ||
58 | reduce2(child) | ||
59 | if not (child.children or child.values): | ||
60 | node.children.pop(key) | ||
61 | |||
62 | if len(node.values) == 1: | ||
63 | node.values = [] | ||
64 | |||
65 | reduce2(self.root) | ||
66 | |||
67 | def write(self): | ||
68 | def write2(path, node): | ||
69 | out_path = "/".join(path) | ||
70 | if node.values: | ||
71 | # Leaf | ||
72 | with open("/".join(path), "w") as f: | ||
73 | arr = [",".join([str(b) for b in n]) for n in node.values] | ||
74 | f.write("\n".join(arr) + "\n") | ||
75 | elif not os.path.isdir(out_path): | ||
76 | os.mkdir(out_path) | ||
77 | |||
78 | for (key, child) in node.children.iteritems(): | ||
79 | path.append(key) | ||
80 | write2(path, child) | ||
81 | path.pop() | ||
82 | |||
83 | |||
84 | write2([self.out_dir], self.root) | ||
85 | |||
86 | |||
87 | def __str__(self): | ||
88 | def str2(node, level): | ||
89 | header = " " * level | ||
90 | ret = "" | ||
91 | if not node.children: | ||
92 | return "%s%s\n" % (header, str(node.values) if node.values else "") | ||
93 | for key,child in node.children.iteritems(): | ||
94 | ret += "%s/%s\n" % (header, key) | ||
95 | ret += str2(child, level + 1) | ||
96 | return ret | ||
97 | |||
98 | return "%s\n%s" % (self.out_dir, str2(self.root, 1)) | ||
99 | |||
100 | |||
101 | |||
102 | |||
103 | |||
104 | |||
diff --git a/parse/enum.py b/parse/enum.py new file mode 100644 index 0000000..bf35d01 --- /dev/null +++ b/parse/enum.py | |||
@@ -0,0 +1,7 @@ | |||
1 | class Enum(frozenset): | ||
2 | def __getattr__(self, name): | ||
3 | if name in self: | ||
4 | return name | ||
5 | raise AttributeError | ||
6 | |||
7 | |||
diff --git a/parse/ft.py b/parse/ft.py new file mode 100644 index 0000000..9837898 --- /dev/null +++ b/parse/ft.py | |||
@@ -0,0 +1,60 @@ | |||
1 | import config.config as conf | ||
2 | import os | ||
3 | import re | ||
4 | import shutil as sh | ||
5 | import subprocess | ||
6 | |||
7 | from point import Measurement,Type | ||
8 | |||
9 | def get_ft_output(data_dir, out_dir): | ||
10 | bin_file = conf.FILES['ft_data'] + "$" | ||
11 | bins = [f for f in os.listdir(data_dir) if re.match(bin_file, f)] | ||
12 | |||
13 | FT_DATA_NAME = "scheduler=x-ft" | ||
14 | output_file = "{}/out-ft".format(out_dir) | ||
15 | |||
16 | if os.path.isfile(output_file): | ||
17 | print("ft-output already exists for %s" % data_dir) | ||
18 | return output_file | ||
19 | |||
20 | if len(bins) != 0: | ||
21 | err_file = open("%s/err-ft" % out_dir, 'w') | ||
22 | # Need to make a copy of the original data file so scripts can change it | ||
23 | sh.copyfile("{}/{}".format(data_dir, bins[0]), | ||
24 | "{}/{}".format(out_dir, FT_DATA_NAME)) | ||
25 | |||
26 | subprocess.call([conf.BINS['sort'], FT_DATA_NAME], | ||
27 | cwd=out_dir, stderr=err_file, stdout=err_file) | ||
28 | subprocess.call([conf.BINS['split'], FT_DATA_NAME], | ||
29 | cwd=out_dir, stderr=err_file, stdout=err_file) | ||
30 | |||
31 | # Previous subprocesses just spit out all these intermediate files | ||
32 | bins = [f for f in os.listdir(out_dir) if re.match(".*overhead=.*bin", f)] | ||
33 | bins = [f for f in bins if os.stat("%s/%s"%(out_dir, f)).st_size] | ||
34 | |||
35 | # Analyze will summarize those | ||
36 | cmd_arr = [conf.BINS['analyze']] | ||
37 | cmd_arr.extend(bins) | ||
38 | with open(output_file, "w") as f: | ||
39 | subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file) | ||
40 | else: | ||
41 | return None | ||
42 | return output_file | ||
43 | |||
44 | def get_ft_data(data_file, result, overheads): | ||
45 | rstr = r",(?:\s+[^\s]+){3}.*?([\d\.]+).*?([\d\.]+),(?:\s+[^\s]+){3}.*?([\d\.]+)" | ||
46 | |||
47 | with open(data_file) as f: | ||
48 | data = f.read() | ||
49 | |||
50 | for ovh in overheads: | ||
51 | measure = Measurement("%s-%s" % (data_file, ovh)) | ||
52 | vals = re.findall(".*{}".format(ovh) + rstr, data); | ||
53 | if len(vals) != 0: | ||
54 | vals = vals[0] | ||
55 | measure[Type.Max] = float(vals[0]) | ||
56 | measure[Type.Avg] = float(vals[1]) | ||
57 | measure[Type.Var] = float(vals[2]) | ||
58 | result[ovh] = measure | ||
59 | |||
60 | return result | ||
diff --git a/parse/point.py b/parse/point.py new file mode 100644 index 0000000..4343d03 --- /dev/null +++ b/parse/point.py | |||
@@ -0,0 +1,135 @@ | |||
1 | """ | ||
2 | Too much duplicate code in this file | ||
3 | """ | ||
4 | |||
5 | import copy | ||
6 | import numpy as np | ||
7 | from enum import Enum | ||
8 | from collections import defaultdict | ||
9 | |||
10 | Type = Enum(['Min','Max','Avg','Var']) | ||
11 | default_typemap = {Type.Max : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1}, | ||
12 | Type.Min : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1}, | ||
13 | Type.Avg : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1}} | ||
14 | |||
15 | def make_typemap(): | ||
16 | return copy.deepcopy(default_typemap) | ||
17 | |||
18 | def dict_str(adict, sep = "\n"): | ||
19 | return sep.join(["%s: %s" % (k, str(v)) for (k,v) in adict.iteritems()]) | ||
20 | |||
21 | class Measurement(object): | ||
22 | def __init__(self, id = None, kv = {}): | ||
23 | self.id = id | ||
24 | self.stats = {} | ||
25 | for k, v in kv.iteritems(): | ||
26 | self[k] = v | ||
27 | |||
28 | def from_array(self,array): | ||
29 | array = np.array(array) | ||
30 | self[Type.Max] = array.max() | ||
31 | self[Type.Avg] = array.mean() | ||
32 | self[Type.Var] = array.var() | ||
33 | return self | ||
34 | |||
35 | def __check_type(self, type): | ||
36 | if not type in Type: | ||
37 | raise AttributeError("Not a valid type '%s'" % type) | ||
38 | |||
39 | def __getitem__(self, type): | ||
40 | self.__check_type(type) | ||
41 | return self.stats[type] | ||
42 | |||
43 | def __iter__(self): | ||
44 | return self.stats.iteritems() | ||
45 | |||
46 | def __contains__(self, type): | ||
47 | self.__check_type(type) | ||
48 | return type in self.stats | ||
49 | |||
50 | def __setitem__(self, type, value): | ||
51 | self.__check_type(type) | ||
52 | self.stats[type] = value | ||
53 | |||
54 | def __str__(self): | ||
55 | return "<Measurement-%s> %s" % (self.id, dict_str(self.stats, " ")) | ||
56 | |||
57 | |||
58 | class Summary(Measurement): | ||
59 | def __init__(self, id, measures, typemap = default_typemap): | ||
60 | super(Summary, self).__init__("Summary-%s" % id) | ||
61 | |||
62 | self.__check_types(measures, typemap) | ||
63 | self.__summarize(measures, typemap) | ||
64 | |||
65 | def __check_types(self, measures, typemap): | ||
66 | required_types = self.__get_required(typemap) | ||
67 | for m in measures: | ||
68 | for type in required_types: | ||
69 | if type not in m: | ||
70 | raise ValueError("measurement '%s' missing type '%s'" % | ||
71 | (self.id, type)) | ||
72 | |||
73 | def __summarize(self, measures, typemap): | ||
74 | for sum_type in Type: | ||
75 | self[sum_type] = Measurement(self.id) | ||
76 | |||
77 | def avg(vals): | ||
78 | return sum(vals) / len(vals) | ||
79 | |||
80 | for base_type in Type: | ||
81 | for sum_type, func in (Type.Min,min),(Type.Max,max),(Type.Avg, avg): | ||
82 | if typemap[sum_type][base_type]: | ||
83 | val = func([m[base_type] for m in measures]) | ||
84 | self[sum_type][base_type] = val | ||
85 | |||
86 | def __get_required(self, typemap): | ||
87 | required = [] | ||
88 | for base_type in Type: | ||
89 | matches = [t[base_type] for t in typemap.itervalues()] | ||
90 | if bool(sum(matches)): | ||
91 | required += [base_type] | ||
92 | return required | ||
93 | |||
94 | class ExpPoint(object): | ||
95 | def __init__(self, id = "", init = {}): | ||
96 | self.stats = {} | ||
97 | for type, value in init.iteritems(): | ||
98 | self[type] = value | ||
99 | self.id = id | ||
100 | |||
101 | def __check_val(self, obj): | ||
102 | if not isinstance(obj, Measurement): | ||
103 | raise AttributeError("Not a valid measurement '%s'" % obj) | ||
104 | |||
105 | def __getitem__(self, type): | ||
106 | return self.stats[type] | ||
107 | |||
108 | def __iter__(self): | ||
109 | return self.stats.iteritems() | ||
110 | |||
111 | def __contains__(self, type): | ||
112 | return type in self.stats | ||
113 | |||
114 | def __setitem__(self, type, value): | ||
115 | self.__check_val(value) | ||
116 | self.stats[type] = value | ||
117 | |||
118 | def __str__(self): | ||
119 | return "<ExpPoint-%s>\n%s" % (self.id, dict_str(self.stats)) | ||
120 | |||
121 | def get_stats(self): | ||
122 | return self.stats.keys() | ||
123 | |||
124 | class SummaryPoint(ExpPoint): | ||
125 | def __init__(self, id, points, typemap = default_typemap): | ||
126 | super(SummaryPoint,self).__init__("Summary-%s" % id) | ||
127 | |||
128 | grouped = defaultdict(lambda : []) | ||
129 | |||
130 | for exp in points: | ||
131 | for name,measure in exp.stats.iteritems(): | ||
132 | grouped[name] += [measure] | ||
133 | |||
134 | for key in grouped.iterkeys(): | ||
135 | self[key] = Summary(key, grouped[key], typemap) | ||
diff --git a/parse/sched.py b/parse/sched.py new file mode 100644 index 0000000..ec4d917 --- /dev/null +++ b/parse/sched.py | |||
@@ -0,0 +1,89 @@ | |||
1 | import config.config as conf | ||
2 | import os | ||
3 | import re | ||
4 | import numpy as np | ||
5 | import subprocess | ||
6 | |||
7 | from collections import namedtuple | ||
8 | from point import Measurement | ||
9 | |||
10 | Task = namedtuple('Task', ['pid', 'period']) | ||
11 | |||
12 | def get_st_output(data_dir, out_dir): | ||
13 | bin_files = conf.FILES['sched_data'].format(".*") | ||
14 | bins = [f for f in os.listdir(data_dir) if re.match(bin_files, f)] | ||
15 | |||
16 | output_file = "%s/out-st" % out_dir | ||
17 | |||
18 | if os.path.isfile(output_file): | ||
19 | return output_file | ||
20 | |||
21 | if len(bins) != 0: | ||
22 | cmd_arr = [conf.BINS['st_show']] | ||
23 | cmd_arr.extend(bins) | ||
24 | with open(output_file, "w") as f: | ||
25 | subprocess.call(cmd_arr, cwd=data_dir, stdout=f) | ||
26 | else: | ||
27 | return None | ||
28 | return output_file | ||
29 | |||
30 | def get_tasks(data): | ||
31 | reg = r"PARAM.*?(\d+).*?cost:\s+[\d\.]+ms.*?period.*?([\d.]+)" | ||
32 | return [Task(x[0], x[1]) for x in re.findall(reg, data)] | ||
33 | |||
34 | def extract_tardy_vals(data, exp_point): | ||
35 | ratios = [] | ||
36 | tards = [] | ||
37 | |||
38 | for t in get_tasks(data): | ||
39 | reg = r"TARDY.*?" + t.pid + "/(\d+).*?Tot.*?([\d.]+).*?ms.*([\d.]+).*?ms.*?([\d.]+)" | ||
40 | matches = re.findall(reg, data) | ||
41 | if len(matches) != 0: | ||
42 | jobs = float(matches[0][0]) | ||
43 | total_tard = float(matches[0][1]) | ||
44 | # max_tard = float(matches[0][2]) | ||
45 | misses = float(matches[0][3]) | ||
46 | rel_tard = (total_tard / jobs) / float(t.period) | ||
47 | if misses != 0: | ||
48 | miss_ratio = (misses / jobs) | ||
49 | else: | ||
50 | miss_ratio = 0 | ||
51 | |||
52 | ratios.append(miss_ratio) | ||
53 | tards.append(rel_tard) | ||
54 | |||
55 | for (array, name) in ((tards, "rel-tard"), (ratios, "miss-ratio")): | ||
56 | exp_point[name] = Measurement().from_array(array) | ||
57 | |||
58 | def extract_variance(data, exp_point): | ||
59 | varz = [] | ||
60 | for t in get_tasks(data): | ||
61 | reg = r"COMPLETION.*?" + t.pid + r".*?([\d\.]+)ms" | ||
62 | matches = re.findall(reg, data) | ||
63 | |||
64 | if len(matches) == 0: | ||
65 | return 0 | ||
66 | |||
67 | job_times = np.array(filter(lambda x: float(x) != 0, matches), dtype=np.float) | ||
68 | |||
69 | # Coefficient of variation | ||
70 | cv = job_times.std() / job_times.mean() | ||
71 | # Correction, assuming normal distributions | ||
72 | corrected = (1 + 1/(4 * len(job_times))) * cv | ||
73 | |||
74 | varz.append(corrected) | ||
75 | |||
76 | exp_point['var'] = Measurement().from_array(varz) | ||
77 | |||
78 | def get_sched_data(data_file, result): | ||
79 | with open(data_file, 'r') as f: | ||
80 | data = f.read() | ||
81 | |||
82 | # if conf != BASE: | ||
83 | # (our_values, their_values) = extract_exec_vals(our_data, their_data) | ||
84 | # conf_result = get_stats(our_values, their_values) | ||
85 | # for key in conf_result.keys(): | ||
86 | # result[key][conf] = conf_result[key] | ||
87 | |||
88 | extract_tardy_vals(data, result) | ||
89 | extract_variance(data, result) | ||
diff --git a/parse/tuple_table.py b/parse/tuple_table.py new file mode 100644 index 0000000..df80b37 --- /dev/null +++ b/parse/tuple_table.py | |||
@@ -0,0 +1,76 @@ | |||
1 | from collections import defaultdict | ||
2 | from point import SummaryPoint | ||
3 | from dir_map import DirMap | ||
4 | |||
5 | class ColMap(object): | ||
6 | def __init__(self): | ||
7 | self.rev_map = {} | ||
8 | self.col_list = [] | ||
9 | |||
10 | def columns(self): | ||
11 | return self.col_list | ||
12 | |||
13 | def get_key(self, kv): | ||
14 | key = () | ||
15 | added = 0 | ||
16 | |||
17 | for col in self.col_list: | ||
18 | if col not in kv: | ||
19 | key += (None,) | ||
20 | else: | ||
21 | added += 1 | ||
22 | key += (kv[col],) | ||
23 | |||
24 | if added != len(kv): | ||
25 | raise Exception("column map '%s' missed field in map\n%s" % | ||
26 | (self.col_list, kv)) | ||
27 | |||
28 | return key | ||
29 | |||
30 | def get_map(self, tuple): | ||
31 | map = {} | ||
32 | for i in range(0, len(tuple)): | ||
33 | map[self.col_list[i]] = tuple[i] | ||
34 | return map | ||
35 | |||
36 | def try_add(self, column): | ||
37 | if column not in self.rev_map: | ||
38 | self.rev_map[column] = len(self.col_list) | ||
39 | self.col_list += [column] | ||
40 | |||
41 | def __str__(self): | ||
42 | return "<ColMap>%s" % (self.rev_map) | ||
43 | |||
44 | class TupleTable(object): | ||
45 | def __init__(self, col_map): | ||
46 | self.col_map = col_map | ||
47 | self.table = defaultdict(lambda: []) | ||
48 | self.reduced = False | ||
49 | |||
50 | def add_exp(self, kv, point): | ||
51 | key = self.col_map.get_key(kv) | ||
52 | self.table[key] += [point] | ||
53 | |||
54 | def __reduce(self): | ||
55 | if self.reduced: | ||
56 | raise Exception("cannot reduce twice!") | ||
57 | self.reduced = True | ||
58 | for key, values in self.table.iteritems(): | ||
59 | self.table[key] = SummaryPoint(key, values) | ||
60 | |||
61 | def write_result(self, out_dir): | ||
62 | dir_map = DirMap(out_dir) | ||
63 | self.__reduce() | ||
64 | for key, point in self.table.iteritems(): | ||
65 | kv = self.col_map.get_map(key) | ||
66 | |||
67 | for col in self.col_map.columns(): | ||
68 | val = kv[col] | ||
69 | kv.pop(col) | ||
70 | |||
71 | dir_map.add_point(col, val, kv, point) | ||
72 | |||
73 | kv[col] = val | ||
74 | |||
75 | dir_map.reduce() | ||
76 | dir_map.write() | ||