diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-02-08 16:20:01 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-02-08 16:20:01 -0500 |
commit | 7c647198fc40e72ef6ca23c2484bf49eba2079ee (patch) | |
tree | 270de30a1cd896382a0954b2d8f7994ca84f88b7 | |
parent | d312e270ed5c2926c8651291a4026062213876f8 (diff) |
Added translation from DirMap to ReducedTupleTable.
-rw-r--r-- | parse/dir_map.py | 43 | ||||
-rw-r--r-- | parse/enum.py | 2 | ||||
-rw-r--r-- | parse/point.py | 37 | ||||
-rw-r--r-- | parse/tuple_table.py | 128 | ||||
-rwxr-xr-x | parse_exps.py | 42 |
5 files changed, 166 insertions, 86 deletions
diff --git a/parse/dir_map.py b/parse/dir_map.py index 51a1390..e4d83e6 100644 --- a/parse/dir_map.py +++ b/parse/dir_map.py | |||
@@ -3,18 +3,16 @@ import numpy as np | |||
3 | 3 | ||
4 | from collections import defaultdict | 4 | from collections import defaultdict |
5 | 5 | ||
6 | class TreeNode(object): | ||
7 | def __init__(self, parent = None): | ||
8 | self.parent = parent | ||
9 | self.children = defaultdict(lambda : TreeNode(self)) | ||
10 | self.values = [] | ||
11 | |||
12 | class DirMap(object): | 6 | class DirMap(object): |
13 | def __init__(self, in_dir = None): | 7 | class Node(object): |
14 | self.root = TreeNode(None) | 8 | def __init__(self, parent = None): |
9 | self.parent = parent | ||
10 | self.children = defaultdict(lambda : DirMap.Node(self)) | ||
11 | self.values = [] | ||
12 | |||
13 | def __init__(self): | ||
14 | self.root = DirMap.Node(None) | ||
15 | self.values = [] | 15 | self.values = [] |
16 | if in_dir: | ||
17 | self.__read(in_dir) | ||
18 | 16 | ||
19 | def add_values(self, path, values): | 17 | def add_values(self, path, values): |
20 | node = self.root | 18 | node = self.root |
@@ -22,18 +20,18 @@ class DirMap(object): | |||
22 | node = node.children[p] | 20 | node = node.children[p] |
23 | node.values += values | 21 | node.values += values |
24 | 22 | ||
25 | def reduce(self): | 23 | def remove_childless(self): |
26 | def reduce2(node): | 24 | def remove_childless2(node): |
27 | for key in node.children.keys(): | 25 | for key in node.children.keys(): |
28 | child = node.children[key] | 26 | child = node.children[key] |
29 | reduce2(child) | 27 | remove_childless2(child) |
30 | if not (child.children or child.values): | 28 | if not (child.children or child.values): |
31 | node.children.pop(key) | 29 | node.children.pop(key) |
32 | 30 | ||
33 | if len(node.values) == 1: | 31 | if len(node.values) == 1: |
34 | node.values = [] | 32 | node.values = [] |
35 | 33 | ||
36 | reduce2(self.root) | 34 | remove_childless2(self.root) |
37 | 35 | ||
38 | def write(self, out_dir): | 36 | def write(self, out_dir): |
39 | def write2(path, node): | 37 | def write2(path, node): |
@@ -42,6 +40,7 @@ class DirMap(object): | |||
42 | # Leaf | 40 | # Leaf |
43 | with open("/".join(path), "w") as f: | 41 | with open("/".join(path), "w") as f: |
44 | arr = [",".join([str(b) for b in n]) for n in node.values] | 42 | arr = [",".join([str(b) for b in n]) for n in node.values] |
43 | arr = sorted(arr, key=lambda x: x[0]) | ||
45 | f.write("\n".join(arr) + "\n") | 44 | f.write("\n".join(arr) + "\n") |
46 | elif not os.path.isdir(out_path): | 45 | elif not os.path.isdir(out_path): |
47 | os.mkdir(out_path) | 46 | os.mkdir(out_path) |
@@ -53,6 +52,21 @@ class DirMap(object): | |||
53 | 52 | ||
54 | write2([out_dir], self.root) | 53 | write2([out_dir], self.root) |
55 | 54 | ||
55 | |||
56 | def leafs(self): | ||
57 | def leafs2(path, node): | ||
58 | if node.children: | ||
59 | for child_name, child_node in node.children.iteritems(): | ||
60 | path += [child_name] | ||
61 | for leaf in leafs2(path, child_node): | ||
62 | yield leaf | ||
63 | path.pop() | ||
64 | elif path: | ||
65 | yield (path, node.values) | ||
66 | |||
67 | for leaf in leafs2([], self.root): | ||
68 | yield leaf | ||
69 | |||
56 | @staticmethod | 70 | @staticmethod |
57 | def read(in_dir): | 71 | def read(in_dir): |
58 | dir_map = DirMap() | 72 | dir_map = DirMap() |
@@ -72,6 +86,7 @@ class DirMap(object): | |||
72 | 86 | ||
73 | stripped = path if path.find(in_dir) else path[len(in_dir):] | 87 | stripped = path if path.find(in_dir) else path[len(in_dir):] |
74 | path_arr = stripped.split("/") | 88 | path_arr = stripped.split("/") |
89 | path_arr = filter(lambda x: x != '', path_arr) | ||
75 | 90 | ||
76 | dir_map.add_values(path_arr, values) | 91 | dir_map.add_values(path_arr, values) |
77 | 92 | ||
diff --git a/parse/enum.py b/parse/enum.py index bf35d01..53db9e1 100644 --- a/parse/enum.py +++ b/parse/enum.py | |||
@@ -3,5 +3,3 @@ class Enum(frozenset): | |||
3 | if name in self: | 3 | if name in self: |
4 | return name | 4 | return name |
5 | raise AttributeError | 5 | raise AttributeError |
6 | |||
7 | |||
diff --git a/parse/point.py b/parse/point.py index 8e27869..ce9cfb0 100644 --- a/parse/point.py +++ b/parse/point.py | |||
@@ -26,9 +26,9 @@ def dict_str(adict, sep = "\n"): | |||
26 | return sep.join([("%" + str(size) + "s: %9s") % (k, num_str(v)) for (k,v) in sorted(adict.iteritems())]) | 26 | return sep.join([("%" + str(size) + "s: %9s") % (k, num_str(v)) for (k,v) in sorted(adict.iteritems())]) |
27 | 27 | ||
28 | class Measurement(object): | 28 | class Measurement(object): |
29 | def __init__(self, id = None, kv = {}): | 29 | def __init__(self, id = None, kv = {}, default=list): |
30 | self.id = id | 30 | self.id = id |
31 | self.stats = {} | 31 | self.stats = defaultdict(default) |
32 | for k, v in kv.iteritems(): | 32 | for k, v in kv.iteritems(): |
33 | self[k] = v | 33 | self[k] = v |
34 | 34 | ||
@@ -55,20 +55,24 @@ class Measurement(object): | |||
55 | self.__check_type(type) | 55 | self.__check_type(type) |
56 | return type in self.stats | 56 | return type in self.stats |
57 | 57 | ||
58 | def __setitem__(self, type, value): | 58 | def __setitem__(self, t, value): |
59 | self.__check_type(type) | 59 | self.__check_type(t) |
60 | self.stats[type] = value | 60 | # Numpy returns single memmapped values which can't be pickled |
61 | # Convert them to floats which can be | ||
62 | if type(value) is np.memmap: | ||
63 | value = float(value) | ||
64 | self.stats[t] = value | ||
61 | 65 | ||
62 | def __str__(self): | 66 | def __str__(self): |
63 | return "%s" % dict_str(self.stats, " ") | 67 | return "%s" % dict_str(self.stats, " ") |
64 | 68 | ||
65 | |||
66 | class Summary(Measurement): | 69 | class Summary(Measurement): |
67 | def __init__(self, id, measures, typemap = default_typemap): | 70 | def __init__(self, id="", measures=[], typemap = default_typemap): |
68 | super(Summary, self).__init__(id) | 71 | super(Summary, self).__init__(id, default=Measurement) |
69 | 72 | ||
70 | self.__check_types(measures, typemap) | 73 | if measures: |
71 | self.__summarize(measures, typemap) | 74 | self.__check_types(measures, typemap) |
75 | self.__summarize(measures, typemap) | ||
72 | 76 | ||
73 | def __check_types(self, measures, typemap): | 77 | def __check_types(self, measures, typemap): |
74 | required_types = self.__get_required(typemap) | 78 | required_types = self.__get_required(typemap) |
@@ -100,8 +104,8 @@ class Summary(Measurement): | |||
100 | return required | 104 | return required |
101 | 105 | ||
102 | class ExpPoint(object): | 106 | class ExpPoint(object): |
103 | def __init__(self, id = "", init = {}): | 107 | def __init__(self, id = "", init = {}, default=Measurement): |
104 | self.stats = {} | 108 | self.stats = defaultdict(default) |
105 | for type, value in init.iteritems(): | 109 | for type, value in init.iteritems(): |
106 | self[type] = value | 110 | self[type] = value |
107 | self.id = id | 111 | self.id = id |
@@ -124,14 +128,17 @@ class ExpPoint(object): | |||
124 | self.stats[type] = value | 128 | self.stats[type] = value |
125 | 129 | ||
126 | def __str__(self): | 130 | def __str__(self): |
127 | return "<ExpPoint-%s>\n%s" % (self.id, dict_str(self.stats)) | 131 | # return "<ExpPoint-%s>\n%s" % (self.id, dict_str(self.stats)) |
132 | return "<ExpPoint-%s>" % (self.id) | ||
128 | 133 | ||
129 | def get_stats(self): | 134 | def get_stats(self): |
130 | return self.stats.keys() | 135 | return self.stats.keys() |
131 | 136 | ||
137 | |||
132 | class SummaryPoint(ExpPoint): | 138 | class SummaryPoint(ExpPoint): |
133 | def __init__(self, id, points, typemap = default_typemap): | 139 | def __init__(self, id="", points=[], typemap = default_typemap): |
134 | super(SummaryPoint,self).__init__("Summary-%s" % id) | 140 | super(SummaryPoint,self).__init__("Summary-%s" % id, |
141 | default=Summary) | ||
135 | 142 | ||
136 | grouped = defaultdict(lambda : []) | 143 | grouped = defaultdict(lambda : []) |
137 | 144 | ||
diff --git a/parse/tuple_table.py b/parse/tuple_table.py index 0b84296..469a424 100644 --- a/parse/tuple_table.py +++ b/parse/tuple_table.py | |||
@@ -1,61 +1,50 @@ | |||
1 | from collections import defaultdict | 1 | from Cheetah.Template import Template |
2 | from collections import defaultdict,namedtuple | ||
2 | from point import SummaryPoint,Type | 3 | from point import SummaryPoint,Type |
3 | from dir_map import DirMap | 4 | from dir_map import DirMap |
5 | from col_map import ColMap,ColMapBuilder | ||
6 | |||
4 | 7 | ||
5 | from pprint import pprint | 8 | from pprint import pprint |
6 | 9 | ||
7 | class TupleTable(object): | 10 | class TupleTable(object): |
8 | def __init__(self, col_map): | 11 | def __init__(self, col_map, default=lambda:[]): |
9 | self.col_map = col_map | 12 | self.col_map = col_map |
10 | self.table = defaultdict(lambda: []) | 13 | self.table = defaultdict(default) |
11 | self.reduced = False | ||
12 | |||
13 | # TODO: rename, make exp agnostic, extend for exps | ||
14 | def add_exp(self, kv, point): | ||
15 | key = self.col_map.get_key(kv) | ||
16 | self.table[key] += [point] | ||
17 | 14 | ||
18 | def col_map(self): | 15 | def col_map(self): |
19 | return self.col_map | 16 | return self.col_map |
20 | 17 | ||
21 | def get_exps(self, kv): | 18 | def __getitem__(self, kv): |
22 | key = self.col_map.get_key(kv) | 19 | key = self.col_map.get_key(kv) |
23 | return self.table[key] | 20 | return self.table[key] |
24 | 21 | ||
22 | def __setitem__(self, kv, value): | ||
23 | key = self.col_map.get_key(kv) | ||
24 | self.table[key] | ||
25 | |||
25 | def __contains__(self, kv): | 26 | def __contains__(self, kv): |
26 | key = self.col_map.get_key(kv) | 27 | key = self.col_map.get_key(kv) |
27 | return key in self.table | 28 | return key in self.table |
28 | 29 | ||
29 | def reduce(self): | 30 | def reduce(self): |
30 | if self.reduced: | 31 | reduced = ReducedTupleTable(self.col_map) |
31 | raise Exception("cannot reduce twice!") | 32 | for key, value in self.table.iteritems(): |
32 | self.reduced = True | 33 | if type(value) == type([]): |
33 | for key, values in self.table.iteritems(): | 34 | value = SummaryPoint(value[0].id, value) |
34 | self.table[key] = SummaryPoint(values[0].id, values) | 35 | reduced.table[key] = value |
35 | 36 | return reduced | |
36 | def write_map(self, out_map): | 37 | |
37 | if not self.reduced: | 38 | def __str__(self): |
38 | raise Exception("must reduce table to write map!") | 39 | s = str(Template("""ColMap: $col_map |
39 | 40 | #for $item in $table | |
40 | rows = {} | 41 | $item :$table[$item] |
41 | 42 | #end for""", searchList=vars(self))) | |
42 | for key, point in self.table.iteritems(): | 43 | return s |
43 | row = {} | 44 | |
44 | for name,measurement in point: | 45 | class ReducedTupleTable(TupleTable): |
45 | name = name.lower().replace('_','-') | 46 | def __init__(self, col_map): |
46 | row[name]={} | 47 | super(ReducedTupleTable, self).__init__(col_map, default=SummaryPoint) |
47 | for base_type in Type: | ||
48 | type_key = str(base_type).lower() | ||
49 | if base_type in measurement[Type.Avg]: | ||
50 | value = measurement[Type.Avg][base_type] | ||
51 | row[name][type_key] = value | ||
52 | rows[key] = row | ||
53 | |||
54 | result = {'columns': self.col_map.columns(), 'rows':rows} | ||
55 | |||
56 | with open(out_map, 'wc') as map_file: | ||
57 | pprint(result,stream=map_file, width=20) | ||
58 | |||
59 | 48 | ||
60 | def __add_to_dirmap(self, dir_map, variable, kv, point): | 49 | def __add_to_dirmap(self, dir_map, variable, kv, point): |
61 | value = kv.pop(variable) | 50 | value = kv.pop(variable) |
@@ -71,8 +60,7 @@ class TupleTable(object): | |||
71 | continue | 60 | continue |
72 | # Ex: release/num_tasks/measured-max/avg/x=5.csv | 61 | # Ex: release/num_tasks/measured-max/avg/x=5.csv |
73 | leaf = self.col_map.encode(kv) + ".csv" | 62 | leaf = self.col_map.encode(kv) + ".csv" |
74 | path = [ stat, variable, "taskset-" + base_type, | 63 | path = [ stat, variable, base_type, summary_type, leaf ] |
75 | summary_type, leaf ] | ||
76 | result = measurement[base_type] | 64 | result = measurement[base_type] |
77 | 65 | ||
78 | dir_map.add_values(path, [(value, result)]) | 66 | dir_map.add_values(path, [(value, result)]) |
@@ -96,5 +84,61 @@ class TupleTable(object): | |||
96 | 84 | ||
97 | self.__add_to_dirmap(dir_map, col, kv, point) | 85 | self.__add_to_dirmap(dir_map, col, kv, point) |
98 | 86 | ||
99 | dir_map.reduce() | 87 | dir_map.remove_childless() |
88 | print("wrote: %s" % self) | ||
100 | return dir_map | 89 | return dir_map |
90 | |||
91 | @staticmethod | ||
92 | def from_dir_map(dir_map): | ||
93 | Leaf = namedtuple('Leaf', ['stat', 'variable', 'base', | ||
94 | 'summary', 'config', 'values']) | ||
95 | def leafs(): | ||
96 | for path, values in dir_map.leafs(): | ||
97 | stat, variable, base_type, summary_type, leaf = path | ||
98 | |||
99 | config_str = leaf[:leaf.index('.csv')] | ||
100 | config = ColMap.decode(config_str) | ||
101 | |||
102 | yield Leaf(stat, variable, base_type, | ||
103 | summary_type, config, values) | ||
104 | |||
105 | builder = ColMapBuilder() | ||
106 | |||
107 | # Gather all possible config values for ColMap | ||
108 | for leaf_deets in leafs(): | ||
109 | for k, v in leaf_deets.config.iteritems(): | ||
110 | builder.try_add(k, v) | ||
111 | |||
112 | col_map = builder.build() | ||
113 | table = ReducedTupleTable(col_map) | ||
114 | |||
115 | # Set values at each point | ||
116 | for leaf in leafs(): | ||
117 | for (x, y) in leaf.values: | ||
118 | leaf.config[leaf.variable] = str(x) | ||
119 | summary = table[leaf.config][leaf.stat] | ||
120 | summary[leaf.summary][leaf.base] = y | ||
121 | |||
122 | print("read: %s" % table) | ||
123 | return table | ||
124 | |||
125 | def write_map(self, out_map): | ||
126 | rows = {} | ||
127 | |||
128 | for key, point in self.table.iteritems(): | ||
129 | row = {} | ||
130 | for name,measurement in point: | ||
131 | name = name.lower().replace('_','-') | ||
132 | row[name]={} | ||
133 | for base_type in Type: | ||
134 | type_key = str(base_type).lower() | ||
135 | if base_type in measurement[Type.Avg]: | ||
136 | value = measurement[Type.Avg][base_type] | ||
137 | row[name][type_key] = value | ||
138 | rows[key] = row | ||
139 | |||
140 | result = {'columns': self.col_map.columns(), 'rows':rows} | ||
141 | |||
142 | with open(out_map, 'wc') as map_file: | ||
143 | pprint(result,stream=map_file, width=20) | ||
144 | |||
diff --git a/parse_exps.py b/parse_exps.py index 1bd3d48..c2376de 100755 --- a/parse_exps.py +++ b/parse_exps.py | |||
@@ -5,14 +5,16 @@ import config.config as conf | |||
5 | import os | 5 | import os |
6 | import parse.ft as ft | 6 | import parse.ft as ft |
7 | import parse.sched as st | 7 | import parse.sched as st |
8 | import pickle | ||
8 | import shutil as sh | 9 | import shutil as sh |
9 | import sys | 10 | import sys |
10 | 11 | ||
11 | from collections import namedtuple | 12 | from collections import namedtuple |
12 | from common import load_params | 13 | from common import load_params |
13 | from optparse import OptionParser | 14 | from optparse import OptionParser |
15 | from parse.dir_map import DirMap | ||
14 | from parse.point import ExpPoint | 16 | from parse.point import ExpPoint |
15 | from parse.tuple_table import TupleTable | 17 | from parse.tuple_table import TupleTable,ReducedTupleTable |
16 | from parse.col_map import ColMapBuilder | 18 | from parse.col_map import ColMapBuilder |
17 | 19 | ||
18 | 20 | ||
@@ -83,6 +85,29 @@ def load_exps(exp_dirs, cm_builder, clean): | |||
83 | 85 | ||
84 | return exps | 86 | return exps |
85 | 87 | ||
88 | def parse_exp(exp, force): | ||
89 | result_file = exp.work_dir + "/exp_point.pkl" | ||
90 | should_load = not force and os.path.exists(result_file) | ||
91 | mode = 'r' if should_load else 'w' | ||
92 | |||
93 | with open(result_file, mode + 'b') as f: | ||
94 | if should_load: | ||
95 | # No need to go through this work twice | ||
96 | result = pickle.load(f) | ||
97 | else: | ||
98 | result = ExpPoint(exp.path) | ||
99 | cycles = exp.params[conf.PARAMS['cycles']] | ||
100 | |||
101 | # Write overheads into result | ||
102 | ft.extract_ft_data(result, exp.path, exp.work_dir, cycles) | ||
103 | |||
104 | # Write scheduling statistics into result | ||
105 | st.extract_sched_data(result, exp.path, exp.work_dir) | ||
106 | |||
107 | pickle.dump(result, f) | ||
108 | |||
109 | return result | ||
110 | |||
86 | def main(): | 111 | def main(): |
87 | opts, args = parse_args() | 112 | opts, args = parse_args() |
88 | 113 | ||
@@ -102,28 +127,19 @@ def main(): | |||
102 | 127 | ||
103 | sys.stderr.write("Parsing data...\n") | 128 | sys.stderr.write("Parsing data...\n") |
104 | for i,exp in enumerate(exps): | 129 | for i,exp in enumerate(exps): |
105 | result = ExpPoint(exp.path) | 130 | result = parse_exp(exp, opts.force) |
106 | cycles = exp.params[conf.PARAMS['cycles']] | ||
107 | |||
108 | # Write overheads into result | ||
109 | ft.extract_ft_data(result, exp.path, exp.work_dir, cycles) | ||
110 | |||
111 | # Write scheduling statistics into result | ||
112 | st.extract_sched_data(result, exp.path, exp.work_dir) | ||
113 | |||
114 | if opts.verbose: | 131 | if opts.verbose: |
115 | print(result) | 132 | print(result) |
116 | else: | 133 | else: |
117 | sys.stderr.write('\r {0:.2%}'.format(float(i)/len(exps))) | 134 | sys.stderr.write('\r {0:.2%}'.format(float(i)/len(exps))) |
118 | 135 | result_table[exp.params] += [result] | |
119 | result_table.add_exp(exp.params, result) | ||
120 | 136 | ||
121 | sys.stderr.write('\n') | 137 | sys.stderr.write('\n') |
122 | 138 | ||
123 | if opts.force and os.path.exists(opts.out): | 139 | if opts.force and os.path.exists(opts.out): |
124 | sh.rmtree(opts.out) | 140 | sh.rmtree(opts.out) |
125 | 141 | ||
126 | result_table.reduce() | 142 | result_table = result_table.reduce() |
127 | 143 | ||
128 | sys.stderr.write("Writing result...\n") | 144 | sys.stderr.write("Writing result...\n") |
129 | if opts.write_map: | 145 | if opts.write_map: |