From 7c647198fc40e72ef6ca23c2484bf49eba2079ee Mon Sep 17 00:00:00 2001 From: Jonathan Herman Date: Fri, 8 Feb 2013 16:20:01 -0500 Subject: Added translation from DirMap to ReducedTupleTable. --- parse/dir_map.py | 43 +++++++++++------ parse/enum.py | 2 - parse/point.py | 37 +++++++++------ parse/tuple_table.py | 128 ++++++++++++++++++++++++++++++++++----------------- 4 files changed, 137 insertions(+), 73 deletions(-) (limited to 'parse') diff --git a/parse/dir_map.py b/parse/dir_map.py index 51a1390..e4d83e6 100644 --- a/parse/dir_map.py +++ b/parse/dir_map.py @@ -3,18 +3,16 @@ import numpy as np from collections import defaultdict -class TreeNode(object): - def __init__(self, parent = None): - self.parent = parent - self.children = defaultdict(lambda : TreeNode(self)) - self.values = [] - class DirMap(object): - def __init__(self, in_dir = None): - self.root = TreeNode(None) + class Node(object): + def __init__(self, parent = None): + self.parent = parent + self.children = defaultdict(lambda : DirMap.Node(self)) + self.values = [] + + def __init__(self): + self.root = DirMap.Node(None) self.values = [] - if in_dir: - self.__read(in_dir) def add_values(self, path, values): node = self.root @@ -22,18 +20,18 @@ class DirMap(object): node = node.children[p] node.values += values - def reduce(self): - def reduce2(node): + def remove_childless(self): + def remove_childless2(node): for key in node.children.keys(): child = node.children[key] - reduce2(child) + remove_childless2(child) if not (child.children or child.values): node.children.pop(key) if len(node.values) == 1: node.values = [] - reduce2(self.root) + remove_childless2(self.root) def write(self, out_dir): def write2(path, node): @@ -42,6 +40,7 @@ class DirMap(object): # Leaf with open("/".join(path), "w") as f: arr = [",".join([str(b) for b in n]) for n in node.values] + arr = sorted(arr, key=lambda x: x[0]) f.write("\n".join(arr) + "\n") elif not os.path.isdir(out_path): os.mkdir(out_path) @@ -53,6 +52,21 @@ class DirMap(object): write2([out_dir], self.root) + + def leafs(self): + def leafs2(path, node): + if node.children: + for child_name, child_node in node.children.iteritems(): + path += [child_name] + for leaf in leafs2(path, child_node): + yield leaf + path.pop() + elif path: + yield (path, node.values) + + for leaf in leafs2([], self.root): + yield leaf + @staticmethod def read(in_dir): dir_map = DirMap() @@ -72,6 +86,7 @@ class DirMap(object): stripped = path if path.find(in_dir) else path[len(in_dir):] path_arr = stripped.split("/") + path_arr = filter(lambda x: x != '', path_arr) dir_map.add_values(path_arr, values) diff --git a/parse/enum.py b/parse/enum.py index bf35d01..53db9e1 100644 --- a/parse/enum.py +++ b/parse/enum.py @@ -3,5 +3,3 @@ class Enum(frozenset): if name in self: return name raise AttributeError - - diff --git a/parse/point.py b/parse/point.py index 8e27869..ce9cfb0 100644 --- a/parse/point.py +++ b/parse/point.py @@ -26,9 +26,9 @@ def dict_str(adict, sep = "\n"): return sep.join([("%" + str(size) + "s: %9s") % (k, num_str(v)) for (k,v) in sorted(adict.iteritems())]) class Measurement(object): - def __init__(self, id = None, kv = {}): + def __init__(self, id = None, kv = {}, default=list): self.id = id - self.stats = {} + self.stats = defaultdict(default) for k, v in kv.iteritems(): self[k] = v @@ -55,20 +55,24 @@ class Measurement(object): self.__check_type(type) return type in self.stats - def __setitem__(self, type, value): - self.__check_type(type) - self.stats[type] = value + def __setitem__(self, t, value): + self.__check_type(t) + # Numpy returns single memmapped values which can't be pickled + # Convert them to floats which can be + if type(value) is np.memmap: + value = float(value) + self.stats[t] = value def __str__(self): return "%s" % dict_str(self.stats, " ") - class Summary(Measurement): - def __init__(self, id, measures, typemap = default_typemap): - super(Summary, self).__init__(id) + def __init__(self, id="", measures=[], typemap = default_typemap): + super(Summary, self).__init__(id, default=Measurement) - self.__check_types(measures, typemap) - self.__summarize(measures, typemap) + if measures: + self.__check_types(measures, typemap) + self.__summarize(measures, typemap) def __check_types(self, measures, typemap): required_types = self.__get_required(typemap) @@ -100,8 +104,8 @@ class Summary(Measurement): return required class ExpPoint(object): - def __init__(self, id = "", init = {}): - self.stats = {} + def __init__(self, id = "", init = {}, default=Measurement): + self.stats = defaultdict(default) for type, value in init.iteritems(): self[type] = value self.id = id @@ -124,14 +128,17 @@ class ExpPoint(object): self.stats[type] = value def __str__(self): - return "\n%s" % (self.id, dict_str(self.stats)) + # return "\n%s" % (self.id, dict_str(self.stats)) + return "" % (self.id) def get_stats(self): return self.stats.keys() + class SummaryPoint(ExpPoint): - def __init__(self, id, points, typemap = default_typemap): - super(SummaryPoint,self).__init__("Summary-%s" % id) + def __init__(self, id="", points=[], typemap = default_typemap): + super(SummaryPoint,self).__init__("Summary-%s" % id, + default=Summary) grouped = defaultdict(lambda : []) diff --git a/parse/tuple_table.py b/parse/tuple_table.py index 0b84296..469a424 100644 --- a/parse/tuple_table.py +++ b/parse/tuple_table.py @@ -1,61 +1,50 @@ -from collections import defaultdict +from Cheetah.Template import Template +from collections import defaultdict,namedtuple from point import SummaryPoint,Type from dir_map import DirMap +from col_map import ColMap,ColMapBuilder + from pprint import pprint class TupleTable(object): - def __init__(self, col_map): + def __init__(self, col_map, default=lambda:[]): self.col_map = col_map - self.table = defaultdict(lambda: []) - self.reduced = False - - # TODO: rename, make exp agnostic, extend for exps - def add_exp(self, kv, point): - key = self.col_map.get_key(kv) - self.table[key] += [point] + self.table = defaultdict(default) def col_map(self): return self.col_map - def get_exps(self, kv): + def __getitem__(self, kv): key = self.col_map.get_key(kv) return self.table[key] + def __setitem__(self, kv, value): + key = self.col_map.get_key(kv) + self.table[key] + def __contains__(self, kv): key = self.col_map.get_key(kv) return key in self.table def reduce(self): - if self.reduced: - raise Exception("cannot reduce twice!") - self.reduced = True - for key, values in self.table.iteritems(): - self.table[key] = SummaryPoint(values[0].id, values) - - def write_map(self, out_map): - if not self.reduced: - raise Exception("must reduce table to write map!") - - rows = {} - - for key, point in self.table.iteritems(): - row = {} - for name,measurement in point: - name = name.lower().replace('_','-') - row[name]={} - for base_type in Type: - type_key = str(base_type).lower() - if base_type in measurement[Type.Avg]: - value = measurement[Type.Avg][base_type] - row[name][type_key] = value - rows[key] = row - - result = {'columns': self.col_map.columns(), 'rows':rows} - - with open(out_map, 'wc') as map_file: - pprint(result,stream=map_file, width=20) - + reduced = ReducedTupleTable(self.col_map) + for key, value in self.table.iteritems(): + if type(value) == type([]): + value = SummaryPoint(value[0].id, value) + reduced.table[key] = value + return reduced + + def __str__(self): + s = str(Template("""ColMap: $col_map + #for $item in $table + $item :$table[$item] + #end for""", searchList=vars(self))) + return s + +class ReducedTupleTable(TupleTable): + def __init__(self, col_map): + super(ReducedTupleTable, self).__init__(col_map, default=SummaryPoint) def __add_to_dirmap(self, dir_map, variable, kv, point): value = kv.pop(variable) @@ -71,8 +60,7 @@ class TupleTable(object): continue # Ex: release/num_tasks/measured-max/avg/x=5.csv leaf = self.col_map.encode(kv) + ".csv" - path = [ stat, variable, "taskset-" + base_type, - summary_type, leaf ] + path = [ stat, variable, base_type, summary_type, leaf ] result = measurement[base_type] dir_map.add_values(path, [(value, result)]) @@ -96,5 +84,61 @@ class TupleTable(object): self.__add_to_dirmap(dir_map, col, kv, point) - dir_map.reduce() + dir_map.remove_childless() + print("wrote: %s" % self) return dir_map + + @staticmethod + def from_dir_map(dir_map): + Leaf = namedtuple('Leaf', ['stat', 'variable', 'base', + 'summary', 'config', 'values']) + def leafs(): + for path, values in dir_map.leafs(): + stat, variable, base_type, summary_type, leaf = path + + config_str = leaf[:leaf.index('.csv')] + config = ColMap.decode(config_str) + + yield Leaf(stat, variable, base_type, + summary_type, config, values) + + builder = ColMapBuilder() + + # Gather all possible config values for ColMap + for leaf_deets in leafs(): + for k, v in leaf_deets.config.iteritems(): + builder.try_add(k, v) + + col_map = builder.build() + table = ReducedTupleTable(col_map) + + # Set values at each point + for leaf in leafs(): + for (x, y) in leaf.values: + leaf.config[leaf.variable] = str(x) + summary = table[leaf.config][leaf.stat] + summary[leaf.summary][leaf.base] = y + + print("read: %s" % table) + return table + + def write_map(self, out_map): + rows = {} + + for key, point in self.table.iteritems(): + row = {} + for name,measurement in point: + name = name.lower().replace('_','-') + row[name]={} + for base_type in Type: + type_key = str(base_type).lower() + if base_type in measurement[Type.Avg]: + value = measurement[Type.Avg][base_type] + row[name][type_key] = value + rows[key] = row + + result = {'columns': self.col_map.columns(), 'rows':rows} + + with open(out_map, 'wc') as map_file: + pprint(result,stream=map_file, width=20) + -- cgit v1.2.2