diff options
| author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-02-08 11:54:49 -0500 |
|---|---|---|
| committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-02-08 11:54:49 -0500 |
| commit | d312e270ed5c2926c8651291a4026062213876f8 (patch) | |
| tree | 94992d8562332ae84be9f2b5aba169f0e287ea51 | |
| parent | 2b416f696a50f4ae264d5aec8c78fa7686cc7927 (diff) | |
ColMaps are now created by ColMapBuilders which order columns by their number
of distinct values.
| -rw-r--r-- | gen/generators.py | 7 | ||||
| -rw-r--r-- | parse/col_map.py | 79 | ||||
| -rw-r--r-- | parse/dir_map.py | 8 | ||||
| -rw-r--r-- | parse/tuple_table.py | 72 | ||||
| -rwxr-xr-x | parse_exps.py | 23 |
5 files changed, 105 insertions, 84 deletions
diff --git a/gen/generators.py b/gen/generators.py index 2fc77a7..09ae979 100644 --- a/gen/generators.py +++ b/gen/generators.py | |||
| @@ -3,7 +3,7 @@ from collections import namedtuple | |||
| 3 | from common import get_config_option | 3 | from common import get_config_option |
| 4 | from config.config import DEFAULTS | 4 | from config.config import DEFAULTS |
| 5 | from gen.dp import DesignPointGenerator | 5 | from gen.dp import DesignPointGenerator |
| 6 | from parse.tuple_table import ColMap | 6 | from parse.col_map import ColMapBuilder |
| 7 | 7 | ||
| 8 | import gen.rv as rv | 8 | import gen.rv as rv |
| 9 | import os | 9 | import os |
| @@ -185,13 +185,14 @@ class BaseGenerator(object): | |||
| 185 | def create_exps(self, out_dir, force): | 185 | def create_exps(self, out_dir, force): |
| 186 | '''Create experiments for all possible combinations of params in | 186 | '''Create experiments for all possible combinations of params in |
| 187 | @out_dir. Overwrite existing files if @force is True.''' | 187 | @out_dir. Overwrite existing files if @force is True.''' |
| 188 | col_map = ColMap() | 188 | builder = ColMapBuilder() |
| 189 | 189 | ||
| 190 | # Track changing values so only relevant parameters are included | 190 | # Track changing values so only relevant parameters are included |
| 191 | # in directory names | 191 | # in directory names |
| 192 | for dp in DesignPointGenerator(self.params): | 192 | for dp in DesignPointGenerator(self.params): |
| 193 | for k, v in dp.iteritems(): | 193 | for k, v in dp.iteritems(): |
| 194 | col_map.try_add(k, v) | 194 | builder.try_add(k, v) |
| 195 | col_map = builder.build() | ||
| 195 | 196 | ||
| 196 | for dp in DesignPointGenerator(self.params): | 197 | for dp in DesignPointGenerator(self.params): |
| 197 | dir_leaf = "sched=%s_%s" % (self.name, col_map.get_encoding(dp)) | 198 | dir_leaf = "sched=%s_%s" % (self.name, col_map.get_encoding(dp)) |
diff --git a/parse/col_map.py b/parse/col_map.py new file mode 100644 index 0000000..6f83ca1 --- /dev/null +++ b/parse/col_map.py | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | from collections import defaultdict | ||
| 2 | |||
| 3 | class ColMapBuilder(object): | ||
| 4 | def __init__(self): | ||
| 5 | self.value_map = defaultdict(set) | ||
| 6 | |||
| 7 | def build(self): | ||
| 8 | columns = sorted(self.value_map.keys(), | ||
| 9 | key=lambda c: (len(self.value_map[c]), c)) | ||
| 10 | col_list = filter(lambda c : len(self.value_map[c]) > 1, columns) | ||
| 11 | return ColMap(col_list) | ||
| 12 | |||
| 13 | def try_add(self, column, value): | ||
| 14 | self.value_map[column].add( value ) | ||
| 15 | |||
| 16 | def try_remove(self, column): | ||
| 17 | del(self.value_map[column]) | ||
| 18 | |||
| 19 | class ColMap(object): | ||
| 20 | def __init__(self, col_list): | ||
| 21 | self.col_list = col_list | ||
| 22 | self.rev_map = {} | ||
| 23 | |||
| 24 | for i, col in enumerate(col_list): | ||
| 25 | self.rev_map[col] = i | ||
| 26 | |||
| 27 | def columns(self): | ||
| 28 | return self.col_list | ||
| 29 | |||
| 30 | def get_key(self, kv): | ||
| 31 | '''Convert a key-value dict into an ordered tuple of values.''' | ||
| 32 | key = () | ||
| 33 | |||
| 34 | for col in self.col_list: | ||
| 35 | if col not in kv: | ||
| 36 | key += (None,) | ||
| 37 | else: | ||
| 38 | key += (kv[col],) | ||
| 39 | |||
| 40 | return key | ||
| 41 | |||
| 42 | def get_kv(self, key): | ||
| 43 | '''Convert an ordered tuple of values into a key-value dict.''' | ||
| 44 | kv = {} | ||
| 45 | for i in range(0, len(key)): | ||
| 46 | kv[self.col_list[i]] = key[i] | ||
| 47 | return kv | ||
| 48 | |||
| 49 | |||
| 50 | def encode(self, kv): | ||
| 51 | '''Converted a dict into a string with items sorted according to | ||
| 52 | the ColMap key order.''' | ||
| 53 | def escape(val): | ||
| 54 | return str(val).replace("_", "-").replace("=", "-") | ||
| 55 | |||
| 56 | vals = [] | ||
| 57 | |||
| 58 | for key in self.col_list: | ||
| 59 | if key not in kv: | ||
| 60 | continue | ||
| 61 | k, v = escape(key), escape(kv[key]) | ||
| 62 | vals += ["%s=%s" % (k, v)] | ||
| 63 | |||
| 64 | return "_".join(vals) | ||
| 65 | |||
| 66 | @staticmethod | ||
| 67 | def decode(string): | ||
| 68 | '''Convert a string into a key-value dict.''' | ||
| 69 | vals = {} | ||
| 70 | for assignment in string.split("_"): | ||
| 71 | k, v = assignment.split("=") | ||
| 72 | vals[k] = v | ||
| 73 | return vals | ||
| 74 | |||
| 75 | def __contains__(self, col): | ||
| 76 | return col in self.rev_map | ||
| 77 | |||
| 78 | def __str__(self): | ||
| 79 | return "<ColMap>%s" % (self.rev_map) | ||
diff --git a/parse/dir_map.py b/parse/dir_map.py index b864318..51a1390 100644 --- a/parse/dir_map.py +++ b/parse/dir_map.py | |||
| @@ -53,7 +53,9 @@ class DirMap(object): | |||
| 53 | 53 | ||
| 54 | write2([out_dir], self.root) | 54 | write2([out_dir], self.root) |
| 55 | 55 | ||
| 56 | def __read(self, in_dir): | 56 | @staticmethod |
| 57 | def read(in_dir): | ||
| 58 | dir_map = DirMap() | ||
| 57 | if not os.path.exists(in_dir): | 59 | if not os.path.exists(in_dir): |
| 58 | raise ValueError("Can't load from nonexistent path : %s" % in_dir) | 60 | raise ValueError("Can't load from nonexistent path : %s" % in_dir) |
| 59 | 61 | ||
| @@ -71,10 +73,12 @@ class DirMap(object): | |||
| 71 | stripped = path if path.find(in_dir) else path[len(in_dir):] | 73 | stripped = path if path.find(in_dir) else path[len(in_dir):] |
| 72 | path_arr = stripped.split("/") | 74 | path_arr = stripped.split("/") |
| 73 | 75 | ||
| 74 | self.add_values(path_arr, values) | 76 | dir_map.add_values(path_arr, values) |
| 75 | 77 | ||
| 76 | read2(in_dir) | 78 | read2(in_dir) |
| 77 | 79 | ||
| 80 | return dir_map | ||
| 81 | |||
| 78 | def __str__(self): | 82 | def __str__(self): |
| 79 | def str2(node, level): | 83 | def str2(node, level): |
| 80 | header = " " * level | 84 | header = " " * level |
diff --git a/parse/tuple_table.py b/parse/tuple_table.py index 45b46af..0b84296 100644 --- a/parse/tuple_table.py +++ b/parse/tuple_table.py | |||
| @@ -4,68 +4,6 @@ from dir_map import DirMap | |||
| 4 | 4 | ||
| 5 | from pprint import pprint | 5 | from pprint import pprint |
| 6 | 6 | ||
| 7 | class ColMap(object): | ||
| 8 | def __init__(self): | ||
| 9 | self.rev_map = {} | ||
| 10 | self.value_map = {} | ||
| 11 | self.col_list = [] | ||
| 12 | |||
| 13 | def columns(self): | ||
| 14 | return self.col_list | ||
| 15 | |||
| 16 | def get_key(self, kv): | ||
| 17 | key = () | ||
| 18 | |||
| 19 | for col in self.col_list: | ||
| 20 | if col not in kv: | ||
| 21 | key += (None,) | ||
| 22 | else: | ||
| 23 | key += (kv[col],) | ||
| 24 | return key | ||
| 25 | |||
| 26 | def get_encoding(self, kv): | ||
| 27 | def escape(val): | ||
| 28 | return str(val).replace("_", "-").replace("=", "-") | ||
| 29 | vals = [] | ||
| 30 | for key in self.col_list: | ||
| 31 | if key not in kv: | ||
| 32 | continue | ||
| 33 | k, v = escape(key), escape(kv[key]) | ||
| 34 | vals += ["%s=%s" % (k, v)] | ||
| 35 | return "_".join(vals) | ||
| 36 | |||
| 37 | def __contains__(self, col): | ||
| 38 | return col in self.rev_map | ||
| 39 | |||
| 40 | def get_map(self, tuple): | ||
| 41 | map = {} | ||
| 42 | for i in range(0, len(tuple)): | ||
| 43 | map[self.col_list[i]] = tuple[i] | ||
| 44 | return map | ||
| 45 | |||
| 46 | def force_add(self, column): | ||
| 47 | self.rev_map[column] = len(self.col_list) | ||
| 48 | self.col_list += [column] | ||
| 49 | |||
| 50 | def try_add(self, column, value): | ||
| 51 | if column not in self.rev_map: | ||
| 52 | if column not in self.value_map: | ||
| 53 | self.value_map[column] = value | ||
| 54 | elif value != self.value_map[column]: | ||
| 55 | self.force_add(column) | ||
| 56 | del(self.value_map[column]) | ||
| 57 | |||
| 58 | def try_remove(self, column): | ||
| 59 | if column in self.rev_map: | ||
| 60 | idx = self.rev_map[column] | ||
| 61 | for value in self.col_list[idx+1:]: | ||
| 62 | self.rev_map[value] -= 1 | ||
| 63 | del(self.col_list[self.rev_map[column]]) | ||
| 64 | del(self.rev_map[column]) | ||
| 65 | |||
| 66 | def __str__(self): | ||
| 67 | return "<ColMap>%s" % (self.rev_map) | ||
| 68 | |||
| 69 | class TupleTable(object): | 7 | class TupleTable(object): |
| 70 | def __init__(self, col_map): | 8 | def __init__(self, col_map): |
| 71 | self.col_map = col_map | 9 | self.col_map = col_map |
| @@ -132,8 +70,9 @@ class TupleTable(object): | |||
| 132 | if not base_type in measurement: | 70 | if not base_type in measurement: |
| 133 | continue | 71 | continue |
| 134 | # Ex: release/num_tasks/measured-max/avg/x=5.csv | 72 | # Ex: release/num_tasks/measured-max/avg/x=5.csv |
| 135 | leaf = self.col_map.get_encoding(kv) + ".csv" | 73 | leaf = self.col_map.encode(kv) + ".csv" |
| 136 | path = [ stat, variable, "taskset-" + base_type, summary_type, leaf ] | 74 | path = [ stat, variable, "taskset-" + base_type, |
| 75 | summary_type, leaf ] | ||
| 137 | result = measurement[base_type] | 76 | result = measurement[base_type] |
| 138 | 77 | ||
| 139 | dir_map.add_values(path, [(value, result)]) | 78 | dir_map.add_values(path, [(value, result)]) |
| @@ -144,7 +83,7 @@ class TupleTable(object): | |||
| 144 | dir_map = DirMap() | 83 | dir_map = DirMap() |
| 145 | 84 | ||
| 146 | for key, point in self.table.iteritems(): | 85 | for key, point in self.table.iteritems(): |
| 147 | kv = self.col_map.get_map(key) | 86 | kv = self.col_map.get_kv(key) |
| 148 | 87 | ||
| 149 | for col in self.col_map.columns(): | 88 | for col in self.col_map.columns(): |
| 150 | val = kv[col] | 89 | val = kv[col] |
| @@ -159,6 +98,3 @@ class TupleTable(object): | |||
| 159 | 98 | ||
| 160 | dir_map.reduce() | 99 | dir_map.reduce() |
| 161 | return dir_map | 100 | return dir_map |
| 162 | |||
| 163 | def from_dir_map(dir_map): | ||
| 164 | pass | ||
diff --git a/parse_exps.py b/parse_exps.py index 5228cbe..1bd3d48 100755 --- a/parse_exps.py +++ b/parse_exps.py | |||
| @@ -11,9 +11,9 @@ import sys | |||
| 11 | from collections import namedtuple | 11 | from collections import namedtuple |
| 12 | from common import load_params | 12 | from common import load_params |
| 13 | from optparse import OptionParser | 13 | from optparse import OptionParser |
| 14 | from parse.dir_map import DirMap | ||
| 15 | from parse.point import ExpPoint | 14 | from parse.point import ExpPoint |
| 16 | from parse.tuple_table import ColMap,TupleTable | 15 | from parse.tuple_table import TupleTable |
| 16 | from parse.col_map import ColMapBuilder | ||
| 17 | 17 | ||
| 18 | 18 | ||
| 19 | def parse_args(): | 19 | def parse_args(): |
| @@ -41,17 +41,17 @@ def parse_args(): | |||
| 41 | 41 | ||
| 42 | ExpData = namedtuple('ExpData', ['path', 'params', 'work_dir']) | 42 | ExpData = namedtuple('ExpData', ['path', 'params', 'work_dir']) |
| 43 | 43 | ||
| 44 | def get_exp_params(data_dir, col_map): | 44 | def get_exp_params(data_dir, cm_builder): |
| 45 | param_file = "%s/%s" % (data_dir, conf.DEFAULTS['params_file']) | 45 | param_file = "%s/%s" % (data_dir, conf.DEFAULTS['params_file']) |
| 46 | if not os.path.isfile: | 46 | if not os.path.isfile: |
| 47 | raise Exception("No param file '%s' exists!" % param_file) | 47 | raise Exception("No param file '%s' exists!" % param_file) |
| 48 | 48 | ||
| 49 | params = load_params(param_file) | 49 | params = load_params(param_file) |
| 50 | 50 | ||
| 51 | # Store parameters in col_map, which will track which parameters change | 51 | # Store parameters in cm_builder, which will track which parameters change |
| 52 | # across experiments | 52 | # across experiments |
| 53 | for key, value in params.iteritems(): | 53 | for key, value in params.iteritems(): |
| 54 | col_map.try_add(key, value) | 54 | cm_builder.try_add(key, value) |
| 55 | 55 | ||
| 56 | # Cycles must be present for feather-trace measurement parsing | 56 | # Cycles must be present for feather-trace measurement parsing |
| 57 | if conf.PARAMS['cycles'] not in params: | 57 | if conf.PARAMS['cycles'] not in params: |
| @@ -60,7 +60,7 @@ def get_exp_params(data_dir, col_map): | |||
| 60 | return params | 60 | return params |
| 61 | 61 | ||
| 62 | 62 | ||
| 63 | def load_exps(exp_dirs, col_map, clean): | 63 | def load_exps(exp_dirs, cm_builder, clean): |
| 64 | exps = [] | 64 | exps = [] |
| 65 | 65 | ||
| 66 | sys.stderr.write("Loading experiments...\n") | 66 | sys.stderr.write("Loading experiments...\n") |
| @@ -77,7 +77,7 @@ def load_exps(exp_dirs, col_map, clean): | |||
| 77 | if not os.path.exists(work_dir): | 77 | if not os.path.exists(work_dir): |
| 78 | os.mkdir(work_dir) | 78 | os.mkdir(work_dir) |
| 79 | 79 | ||
| 80 | params = get_exp_params(data_dir, col_map) | 80 | params = get_exp_params(data_dir, cm_builder) |
| 81 | 81 | ||
| 82 | exps += [ ExpData(data_dir, params, work_dir) ] | 82 | exps += [ ExpData(data_dir, params, work_dir) ] |
| 83 | 83 | ||
| @@ -88,15 +88,16 @@ def main(): | |||
| 88 | 88 | ||
| 89 | args = args or [os.getcwd()] | 89 | args = args or [os.getcwd()] |
| 90 | 90 | ||
| 91 | # Load exp parameters into col_map | 91 | # Load exp parameters into a ColMap |
| 92 | col_map = ColMap() | 92 | builder = ColMapBuilder() |
| 93 | exps = load_exps(args, col_map, opts.force) | 93 | exps = load_exps(args, builder, opts.force) |
| 94 | 94 | ||
| 95 | # Don't track changes in ignored parameters | 95 | # Don't track changes in ignored parameters |
| 96 | if opts.ignore: | 96 | if opts.ignore: |
| 97 | for param in opts.ignore.split(","): | 97 | for param in opts.ignore.split(","): |
| 98 | col_map.try_remove(param) | 98 | builder.try_remove(param) |
| 99 | 99 | ||
| 100 | col_map = builder.build() | ||
| 100 | result_table = TupleTable(col_map) | 101 | result_table = TupleTable(col_map) |
| 101 | 102 | ||
| 102 | sys.stderr.write("Parsing data...\n") | 103 | sys.stderr.write("Parsing data...\n") |
