From d312e270ed5c2926c8651291a4026062213876f8 Mon Sep 17 00:00:00 2001 From: Jonathan Herman Date: Fri, 8 Feb 2013 11:54:49 -0500 Subject: ColMaps are now created by ColMapBuilders which order columns by their number of distinct values. --- parse/col_map.py | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++ parse/dir_map.py | 8 ++++-- parse/tuple_table.py | 72 +++-------------------------------------------- 3 files changed, 89 insertions(+), 70 deletions(-) create mode 100644 parse/col_map.py (limited to 'parse') diff --git a/parse/col_map.py b/parse/col_map.py new file mode 100644 index 0000000..6f83ca1 --- /dev/null +++ b/parse/col_map.py @@ -0,0 +1,79 @@ +from collections import defaultdict + +class ColMapBuilder(object): + def __init__(self): + self.value_map = defaultdict(set) + + def build(self): + columns = sorted(self.value_map.keys(), + key=lambda c: (len(self.value_map[c]), c)) + col_list = filter(lambda c : len(self.value_map[c]) > 1, columns) + return ColMap(col_list) + + def try_add(self, column, value): + self.value_map[column].add( value ) + + def try_remove(self, column): + del(self.value_map[column]) + +class ColMap(object): + def __init__(self, col_list): + self.col_list = col_list + self.rev_map = {} + + for i, col in enumerate(col_list): + self.rev_map[col] = i + + def columns(self): + return self.col_list + + def get_key(self, kv): + '''Convert a key-value dict into an ordered tuple of values.''' + key = () + + for col in self.col_list: + if col not in kv: + key += (None,) + else: + key += (kv[col],) + + return key + + def get_kv(self, key): + '''Convert an ordered tuple of values into a key-value dict.''' + kv = {} + for i in range(0, len(key)): + kv[self.col_list[i]] = key[i] + return kv + + + def encode(self, kv): + '''Converted a dict into a string with items sorted according to + the ColMap key order.''' + def escape(val): + return str(val).replace("_", "-").replace("=", "-") + + vals = [] + + for key in self.col_list: + if key not in kv: + continue + k, v = escape(key), escape(kv[key]) + vals += ["%s=%s" % (k, v)] + + return "_".join(vals) + + @staticmethod + def decode(string): + '''Convert a string into a key-value dict.''' + vals = {} + for assignment in string.split("_"): + k, v = assignment.split("=") + vals[k] = v + return vals + + def __contains__(self, col): + return col in self.rev_map + + def __str__(self): + return "%s" % (self.rev_map) diff --git a/parse/dir_map.py b/parse/dir_map.py index b864318..51a1390 100644 --- a/parse/dir_map.py +++ b/parse/dir_map.py @@ -53,7 +53,9 @@ class DirMap(object): write2([out_dir], self.root) - def __read(self, in_dir): + @staticmethod + def read(in_dir): + dir_map = DirMap() if not os.path.exists(in_dir): raise ValueError("Can't load from nonexistent path : %s" % in_dir) @@ -71,10 +73,12 @@ class DirMap(object): stripped = path if path.find(in_dir) else path[len(in_dir):] path_arr = stripped.split("/") - self.add_values(path_arr, values) + dir_map.add_values(path_arr, values) read2(in_dir) + return dir_map + def __str__(self): def str2(node, level): header = " " * level diff --git a/parse/tuple_table.py b/parse/tuple_table.py index 45b46af..0b84296 100644 --- a/parse/tuple_table.py +++ b/parse/tuple_table.py @@ -4,68 +4,6 @@ from dir_map import DirMap from pprint import pprint -class ColMap(object): - def __init__(self): - self.rev_map = {} - self.value_map = {} - self.col_list = [] - - def columns(self): - return self.col_list - - def get_key(self, kv): - key = () - - for col in self.col_list: - if col not in kv: - key += (None,) - else: - key += (kv[col],) - return key - - def get_encoding(self, kv): - def escape(val): - return str(val).replace("_", "-").replace("=", "-") - vals = [] - for key in self.col_list: - if key not in kv: - continue - k, v = escape(key), escape(kv[key]) - vals += ["%s=%s" % (k, v)] - return "_".join(vals) - - def __contains__(self, col): - return col in self.rev_map - - def get_map(self, tuple): - map = {} - for i in range(0, len(tuple)): - map[self.col_list[i]] = tuple[i] - return map - - def force_add(self, column): - self.rev_map[column] = len(self.col_list) - self.col_list += [column] - - def try_add(self, column, value): - if column not in self.rev_map: - if column not in self.value_map: - self.value_map[column] = value - elif value != self.value_map[column]: - self.force_add(column) - del(self.value_map[column]) - - def try_remove(self, column): - if column in self.rev_map: - idx = self.rev_map[column] - for value in self.col_list[idx+1:]: - self.rev_map[value] -= 1 - del(self.col_list[self.rev_map[column]]) - del(self.rev_map[column]) - - def __str__(self): - return "%s" % (self.rev_map) - class TupleTable(object): def __init__(self, col_map): self.col_map = col_map @@ -132,8 +70,9 @@ class TupleTable(object): if not base_type in measurement: continue # Ex: release/num_tasks/measured-max/avg/x=5.csv - leaf = self.col_map.get_encoding(kv) + ".csv" - path = [ stat, variable, "taskset-" + base_type, summary_type, leaf ] + leaf = self.col_map.encode(kv) + ".csv" + path = [ stat, variable, "taskset-" + base_type, + summary_type, leaf ] result = measurement[base_type] dir_map.add_values(path, [(value, result)]) @@ -144,7 +83,7 @@ class TupleTable(object): dir_map = DirMap() for key, point in self.table.iteritems(): - kv = self.col_map.get_map(key) + kv = self.col_map.get_kv(key) for col in self.col_map.columns(): val = kv[col] @@ -159,6 +98,3 @@ class TupleTable(object): dir_map.reduce() return dir_map - - def from_dir_map(dir_map): - pass -- cgit v1.2.2