diff options
| author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-02-08 11:54:49 -0500 |
|---|---|---|
| committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-02-08 11:54:49 -0500 |
| commit | d312e270ed5c2926c8651291a4026062213876f8 (patch) | |
| tree | 94992d8562332ae84be9f2b5aba169f0e287ea51 /parse | |
| parent | 2b416f696a50f4ae264d5aec8c78fa7686cc7927 (diff) | |
ColMaps are now created by ColMapBuilders which order columns by their number
of distinct values.
Diffstat (limited to 'parse')
| -rw-r--r-- | parse/col_map.py | 79 | ||||
| -rw-r--r-- | parse/dir_map.py | 8 | ||||
| -rw-r--r-- | parse/tuple_table.py | 72 |
3 files changed, 89 insertions, 70 deletions
diff --git a/parse/col_map.py b/parse/col_map.py new file mode 100644 index 0000000..6f83ca1 --- /dev/null +++ b/parse/col_map.py | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | from collections import defaultdict | ||
| 2 | |||
| 3 | class ColMapBuilder(object): | ||
| 4 | def __init__(self): | ||
| 5 | self.value_map = defaultdict(set) | ||
| 6 | |||
| 7 | def build(self): | ||
| 8 | columns = sorted(self.value_map.keys(), | ||
| 9 | key=lambda c: (len(self.value_map[c]), c)) | ||
| 10 | col_list = filter(lambda c : len(self.value_map[c]) > 1, columns) | ||
| 11 | return ColMap(col_list) | ||
| 12 | |||
| 13 | def try_add(self, column, value): | ||
| 14 | self.value_map[column].add( value ) | ||
| 15 | |||
| 16 | def try_remove(self, column): | ||
| 17 | del(self.value_map[column]) | ||
| 18 | |||
| 19 | class ColMap(object): | ||
| 20 | def __init__(self, col_list): | ||
| 21 | self.col_list = col_list | ||
| 22 | self.rev_map = {} | ||
| 23 | |||
| 24 | for i, col in enumerate(col_list): | ||
| 25 | self.rev_map[col] = i | ||
| 26 | |||
| 27 | def columns(self): | ||
| 28 | return self.col_list | ||
| 29 | |||
| 30 | def get_key(self, kv): | ||
| 31 | '''Convert a key-value dict into an ordered tuple of values.''' | ||
| 32 | key = () | ||
| 33 | |||
| 34 | for col in self.col_list: | ||
| 35 | if col not in kv: | ||
| 36 | key += (None,) | ||
| 37 | else: | ||
| 38 | key += (kv[col],) | ||
| 39 | |||
| 40 | return key | ||
| 41 | |||
| 42 | def get_kv(self, key): | ||
| 43 | '''Convert an ordered tuple of values into a key-value dict.''' | ||
| 44 | kv = {} | ||
| 45 | for i in range(0, len(key)): | ||
| 46 | kv[self.col_list[i]] = key[i] | ||
| 47 | return kv | ||
| 48 | |||
| 49 | |||
| 50 | def encode(self, kv): | ||
| 51 | '''Converted a dict into a string with items sorted according to | ||
| 52 | the ColMap key order.''' | ||
| 53 | def escape(val): | ||
| 54 | return str(val).replace("_", "-").replace("=", "-") | ||
| 55 | |||
| 56 | vals = [] | ||
| 57 | |||
| 58 | for key in self.col_list: | ||
| 59 | if key not in kv: | ||
| 60 | continue | ||
| 61 | k, v = escape(key), escape(kv[key]) | ||
| 62 | vals += ["%s=%s" % (k, v)] | ||
| 63 | |||
| 64 | return "_".join(vals) | ||
| 65 | |||
| 66 | @staticmethod | ||
| 67 | def decode(string): | ||
| 68 | '''Convert a string into a key-value dict.''' | ||
| 69 | vals = {} | ||
| 70 | for assignment in string.split("_"): | ||
| 71 | k, v = assignment.split("=") | ||
| 72 | vals[k] = v | ||
| 73 | return vals | ||
| 74 | |||
| 75 | def __contains__(self, col): | ||
| 76 | return col in self.rev_map | ||
| 77 | |||
| 78 | def __str__(self): | ||
| 79 | return "<ColMap>%s" % (self.rev_map) | ||
diff --git a/parse/dir_map.py b/parse/dir_map.py index b864318..51a1390 100644 --- a/parse/dir_map.py +++ b/parse/dir_map.py | |||
| @@ -53,7 +53,9 @@ class DirMap(object): | |||
| 53 | 53 | ||
| 54 | write2([out_dir], self.root) | 54 | write2([out_dir], self.root) |
| 55 | 55 | ||
| 56 | def __read(self, in_dir): | 56 | @staticmethod |
| 57 | def read(in_dir): | ||
| 58 | dir_map = DirMap() | ||
| 57 | if not os.path.exists(in_dir): | 59 | if not os.path.exists(in_dir): |
| 58 | raise ValueError("Can't load from nonexistent path : %s" % in_dir) | 60 | raise ValueError("Can't load from nonexistent path : %s" % in_dir) |
| 59 | 61 | ||
| @@ -71,10 +73,12 @@ class DirMap(object): | |||
| 71 | stripped = path if path.find(in_dir) else path[len(in_dir):] | 73 | stripped = path if path.find(in_dir) else path[len(in_dir):] |
| 72 | path_arr = stripped.split("/") | 74 | path_arr = stripped.split("/") |
| 73 | 75 | ||
| 74 | self.add_values(path_arr, values) | 76 | dir_map.add_values(path_arr, values) |
| 75 | 77 | ||
| 76 | read2(in_dir) | 78 | read2(in_dir) |
| 77 | 79 | ||
| 80 | return dir_map | ||
| 81 | |||
| 78 | def __str__(self): | 82 | def __str__(self): |
| 79 | def str2(node, level): | 83 | def str2(node, level): |
| 80 | header = " " * level | 84 | header = " " * level |
diff --git a/parse/tuple_table.py b/parse/tuple_table.py index 45b46af..0b84296 100644 --- a/parse/tuple_table.py +++ b/parse/tuple_table.py | |||
| @@ -4,68 +4,6 @@ from dir_map import DirMap | |||
| 4 | 4 | ||
| 5 | from pprint import pprint | 5 | from pprint import pprint |
| 6 | 6 | ||
| 7 | class ColMap(object): | ||
| 8 | def __init__(self): | ||
| 9 | self.rev_map = {} | ||
| 10 | self.value_map = {} | ||
| 11 | self.col_list = [] | ||
| 12 | |||
| 13 | def columns(self): | ||
| 14 | return self.col_list | ||
| 15 | |||
| 16 | def get_key(self, kv): | ||
| 17 | key = () | ||
| 18 | |||
| 19 | for col in self.col_list: | ||
| 20 | if col not in kv: | ||
| 21 | key += (None,) | ||
| 22 | else: | ||
| 23 | key += (kv[col],) | ||
| 24 | return key | ||
| 25 | |||
| 26 | def get_encoding(self, kv): | ||
| 27 | def escape(val): | ||
| 28 | return str(val).replace("_", "-").replace("=", "-") | ||
| 29 | vals = [] | ||
| 30 | for key in self.col_list: | ||
| 31 | if key not in kv: | ||
| 32 | continue | ||
| 33 | k, v = escape(key), escape(kv[key]) | ||
| 34 | vals += ["%s=%s" % (k, v)] | ||
| 35 | return "_".join(vals) | ||
| 36 | |||
| 37 | def __contains__(self, col): | ||
| 38 | return col in self.rev_map | ||
| 39 | |||
| 40 | def get_map(self, tuple): | ||
| 41 | map = {} | ||
| 42 | for i in range(0, len(tuple)): | ||
| 43 | map[self.col_list[i]] = tuple[i] | ||
| 44 | return map | ||
| 45 | |||
| 46 | def force_add(self, column): | ||
| 47 | self.rev_map[column] = len(self.col_list) | ||
| 48 | self.col_list += [column] | ||
| 49 | |||
| 50 | def try_add(self, column, value): | ||
| 51 | if column not in self.rev_map: | ||
| 52 | if column not in self.value_map: | ||
| 53 | self.value_map[column] = value | ||
| 54 | elif value != self.value_map[column]: | ||
| 55 | self.force_add(column) | ||
| 56 | del(self.value_map[column]) | ||
| 57 | |||
| 58 | def try_remove(self, column): | ||
| 59 | if column in self.rev_map: | ||
| 60 | idx = self.rev_map[column] | ||
| 61 | for value in self.col_list[idx+1:]: | ||
| 62 | self.rev_map[value] -= 1 | ||
| 63 | del(self.col_list[self.rev_map[column]]) | ||
| 64 | del(self.rev_map[column]) | ||
| 65 | |||
| 66 | def __str__(self): | ||
| 67 | return "<ColMap>%s" % (self.rev_map) | ||
| 68 | |||
| 69 | class TupleTable(object): | 7 | class TupleTable(object): |
| 70 | def __init__(self, col_map): | 8 | def __init__(self, col_map): |
| 71 | self.col_map = col_map | 9 | self.col_map = col_map |
| @@ -132,8 +70,9 @@ class TupleTable(object): | |||
| 132 | if not base_type in measurement: | 70 | if not base_type in measurement: |
| 133 | continue | 71 | continue |
| 134 | # Ex: release/num_tasks/measured-max/avg/x=5.csv | 72 | # Ex: release/num_tasks/measured-max/avg/x=5.csv |
| 135 | leaf = self.col_map.get_encoding(kv) + ".csv" | 73 | leaf = self.col_map.encode(kv) + ".csv" |
| 136 | path = [ stat, variable, "taskset-" + base_type, summary_type, leaf ] | 74 | path = [ stat, variable, "taskset-" + base_type, |
| 75 | summary_type, leaf ] | ||
| 137 | result = measurement[base_type] | 76 | result = measurement[base_type] |
| 138 | 77 | ||
| 139 | dir_map.add_values(path, [(value, result)]) | 78 | dir_map.add_values(path, [(value, result)]) |
| @@ -144,7 +83,7 @@ class TupleTable(object): | |||
| 144 | dir_map = DirMap() | 83 | dir_map = DirMap() |
| 145 | 84 | ||
| 146 | for key, point in self.table.iteritems(): | 85 | for key, point in self.table.iteritems(): |
| 147 | kv = self.col_map.get_map(key) | 86 | kv = self.col_map.get_kv(key) |
| 148 | 87 | ||
| 149 | for col in self.col_map.columns(): | 88 | for col in self.col_map.columns(): |
| 150 | val = kv[col] | 89 | val = kv[col] |
| @@ -159,6 +98,3 @@ class TupleTable(object): | |||
| 159 | 98 | ||
| 160 | dir_map.reduce() | 99 | dir_map.reduce() |
| 161 | return dir_map | 100 | return dir_map |
| 162 | |||
| 163 | def from_dir_map(dir_map): | ||
| 164 | pass | ||
