diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-02-11 18:28:31 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-02-11 18:28:31 -0500 |
commit | b2fa65ecfe14bb9377fbd8afa5f457a07472b6fb (patch) | |
tree | 5bb1402027e1c56eccf38682166b1850c8b89aa9 | |
parent | 7c647198fc40e72ef6ca23c2484bf49eba2079ee (diff) |
First attempt at plot_exps.py.
-rw-r--r-- | parse/col_map.py | 38 | ||||
-rw-r--r-- | parse/dir_map.py | 47 | ||||
-rw-r--r-- | parse/tuple_table.py | 18 | ||||
-rwxr-xr-x | plot_exps.py | 236 |
4 files changed, 201 insertions, 138 deletions
diff --git a/parse/col_map.py b/parse/col_map.py index 6f83ca1..8132639 100644 --- a/parse/col_map.py +++ b/parse/col_map.py | |||
@@ -6,9 +6,9 @@ class ColMapBuilder(object): | |||
6 | 6 | ||
7 | def build(self): | 7 | def build(self): |
8 | columns = sorted(self.value_map.keys(), | 8 | columns = sorted(self.value_map.keys(), |
9 | key=lambda c: (len(self.value_map[c]), c)) | 9 | key=lambda c: (-len(self.value_map[c]), c)) |
10 | col_list = filter(lambda c : len(self.value_map[c]) > 1, columns) | 10 | col_list = filter(lambda c : len(self.value_map[c]) > 1, columns) |
11 | return ColMap(col_list) | 11 | return ColMap(col_list, self.value_map) |
12 | 12 | ||
13 | def try_add(self, column, value): | 13 | def try_add(self, column, value): |
14 | self.value_map[column].add( value ) | 14 | self.value_map[column].add( value ) |
@@ -17,9 +17,17 @@ class ColMapBuilder(object): | |||
17 | del(self.value_map[column]) | 17 | del(self.value_map[column]) |
18 | 18 | ||
19 | class ColMap(object): | 19 | class ColMap(object): |
20 | def __init__(self, col_list): | 20 | def __init__(self, col_list, values = None): |
21 | self.col_list = col_list | 21 | self.col_list = col_list |
22 | self.rev_map = {} | 22 | self.rev_map = {} |
23 | self.values = values | ||
24 | |||
25 | self.minimums = [] | ||
26 | for c in col_list: | ||
27 | end = 1 | ||
28 | while c[:end] in self.minimums: | ||
29 | end += 1 | ||
30 | self.minimums += [c[:end]] | ||
23 | 31 | ||
24 | for i, col in enumerate(col_list): | 32 | for i, col in enumerate(col_list): |
25 | self.rev_map[col] = i | 33 | self.rev_map[col] = i |
@@ -27,6 +35,9 @@ class ColMap(object): | |||
27 | def columns(self): | 35 | def columns(self): |
28 | return self.col_list | 36 | return self.col_list |
29 | 37 | ||
38 | def get_values(self): | ||
39 | return self.values | ||
40 | |||
30 | def get_key(self, kv): | 41 | def get_key(self, kv): |
31 | '''Convert a key-value dict into an ordered tuple of values.''' | 42 | '''Convert a key-value dict into an ordered tuple of values.''' |
32 | key = () | 43 | key = () |
@@ -46,8 +57,7 @@ class ColMap(object): | |||
46 | kv[self.col_list[i]] = key[i] | 57 | kv[self.col_list[i]] = key[i] |
47 | return kv | 58 | return kv |
48 | 59 | ||
49 | 60 | def encode(self, kv, minimum=False): | |
50 | def encode(self, kv): | ||
51 | '''Converted a dict into a string with items sorted according to | 61 | '''Converted a dict into a string with items sorted according to |
52 | the ColMap key order.''' | 62 | the ColMap key order.''' |
53 | def escape(val): | 63 | def escape(val): |
@@ -55,13 +65,23 @@ class ColMap(object): | |||
55 | 65 | ||
56 | vals = [] | 66 | vals = [] |
57 | 67 | ||
58 | for key in self.col_list: | 68 | if minimum: |
69 | format = "%s:%s" | ||
70 | join = ", " | ||
71 | else: | ||
72 | format = "%s=%s" | ||
73 | join = "_" | ||
74 | |||
75 | reverse = list(self.col_list) | ||
76 | reverse.reverse() | ||
77 | for key in reverse: | ||
59 | if key not in kv: | 78 | if key not in kv: |
60 | continue | 79 | continue |
61 | k, v = escape(key), escape(kv[key]) | 80 | display = key if not minimum else self.minimums[self.rev_map[key]] |
62 | vals += ["%s=%s" % (k, v)] | 81 | k, v = escape(display), escape(kv[key]) |
82 | vals += [format % (k, v)] | ||
63 | 83 | ||
64 | return "_".join(vals) | 84 | return join.join(vals) |
65 | 85 | ||
66 | @staticmethod | 86 | @staticmethod |
67 | def decode(string): | 87 | def decode(string): |
diff --git a/parse/dir_map.py b/parse/dir_map.py index e4d83e6..11c872a 100644 --- a/parse/dir_map.py +++ b/parse/dir_map.py | |||
@@ -1,5 +1,6 @@ | |||
1 | import os | ||
2 | import numpy as np | 1 | import numpy as np |
2 | import os | ||
3 | import re | ||
3 | 4 | ||
4 | from collections import defaultdict | 5 | from collections import defaultdict |
5 | 6 | ||
@@ -10,6 +11,29 @@ class DirMap(object): | |||
10 | self.children = defaultdict(lambda : DirMap.Node(self)) | 11 | self.children = defaultdict(lambda : DirMap.Node(self)) |
11 | self.values = [] | 12 | self.values = [] |
12 | 13 | ||
14 | def heir(self, generation=1): | ||
15 | def heir2(node, generation): | ||
16 | if not generation: | ||
17 | return node | ||
18 | elif not node.children: | ||
19 | return None | ||
20 | else: | ||
21 | next_heir = node.children.values()[0] | ||
22 | return next_heir.heir(generation - 1) | ||
23 | return heir2(self, generation) | ||
24 | |||
25 | def leafs(self, path=[], offset=0): | ||
26 | path = list(path) | ||
27 | check_node = self.heir(offset) | ||
28 | if check_node and check_node.children: | ||
29 | for child_name, child_node in self.children.iteritems(): | ||
30 | path += [child_name] | ||
31 | for leaf in child_node.leafs(path, offset): | ||
32 | yield leaf | ||
33 | path.pop() | ||
34 | else: | ||
35 | yield (path, self) | ||
36 | |||
13 | def __init__(self): | 37 | def __init__(self): |
14 | self.root = DirMap.Node(None) | 38 | self.root = DirMap.Node(None) |
15 | self.values = [] | 39 | self.values = [] |
@@ -22,8 +46,7 @@ class DirMap(object): | |||
22 | 46 | ||
23 | def remove_childless(self): | 47 | def remove_childless(self): |
24 | def remove_childless2(node): | 48 | def remove_childless2(node): |
25 | for key in node.children.keys(): | 49 | for key, child in node: |
26 | child = node.children[key] | ||
27 | remove_childless2(child) | 50 | remove_childless2(child) |
28 | if not (child.children or child.values): | 51 | if not (child.children or child.values): |
29 | node.children.pop(key) | 52 | node.children.pop(key) |
@@ -52,19 +75,8 @@ class DirMap(object): | |||
52 | 75 | ||
53 | write2([out_dir], self.root) | 76 | write2([out_dir], self.root) |
54 | 77 | ||
55 | 78 | def leafs(self, offset=0): | |
56 | def leafs(self): | 79 | for leaf in self.root.leafs([], offset): |
57 | def leafs2(path, node): | ||
58 | if node.children: | ||
59 | for child_name, child_node in node.children.iteritems(): | ||
60 | path += [child_name] | ||
61 | for leaf in leafs2(path, child_node): | ||
62 | yield leaf | ||
63 | path.pop() | ||
64 | elif path: | ||
65 | yield (path, node.values) | ||
66 | |||
67 | for leaf in leafs2([], self.root): | ||
68 | yield leaf | 80 | yield leaf |
69 | 81 | ||
70 | @staticmethod | 82 | @staticmethod |
@@ -77,6 +89,9 @@ class DirMap(object): | |||
77 | if os.path.isdir(path): | 89 | if os.path.isdir(path): |
78 | map(lambda x : read2(path+"/"+x), os.listdir(path)) | 90 | map(lambda x : read2(path+"/"+x), os.listdir(path)) |
79 | else: | 91 | else: |
92 | if not re.match(r'.*\.csv', path): | ||
93 | return | ||
94 | |||
80 | with open(path, 'rb') as f: | 95 | with open(path, 'rb') as f: |
81 | data = np.loadtxt(f, delimiter=",") | 96 | data = np.loadtxt(f, delimiter=",") |
82 | 97 | ||
diff --git a/parse/tuple_table.py b/parse/tuple_table.py index 469a424..105b786 100644 --- a/parse/tuple_table.py +++ b/parse/tuple_table.py | |||
@@ -12,7 +12,7 @@ class TupleTable(object): | |||
12 | self.col_map = col_map | 12 | self.col_map = col_map |
13 | self.table = defaultdict(default) | 13 | self.table = defaultdict(default) |
14 | 14 | ||
15 | def col_map(self): | 15 | def get_col_map(self): |
16 | return self.col_map | 16 | return self.col_map |
17 | 17 | ||
18 | def __getitem__(self, kv): | 18 | def __getitem__(self, kv): |
@@ -93,14 +93,21 @@ class ReducedTupleTable(TupleTable): | |||
93 | Leaf = namedtuple('Leaf', ['stat', 'variable', 'base', | 93 | Leaf = namedtuple('Leaf', ['stat', 'variable', 'base', |
94 | 'summary', 'config', 'values']) | 94 | 'summary', 'config', 'values']) |
95 | def leafs(): | 95 | def leafs(): |
96 | for path, values in dir_map.leafs(): | 96 | for path, node in dir_map.leafs(): |
97 | stat, variable, base_type, summary_type, leaf = path | 97 | # The path will be of at least size 1: the filename |
98 | leaf = path.pop() | ||
99 | |||
100 | # Set acceptable defaults for the rest of the path | ||
101 | path += ['?', '?', 'Avg', 'Avg'][len(path):] | ||
102 | |||
103 | [stat, variable, base_type, summary_type] = path | ||
98 | 104 | ||
99 | config_str = leaf[:leaf.index('.csv')] | 105 | config_str = leaf[:leaf.index('.csv')] |
100 | config = ColMap.decode(config_str) | 106 | config = ColMap.decode(config_str) |
101 | 107 | ||
102 | yield Leaf(stat, variable, base_type, | 108 | leaf = Leaf(stat, variable, base_type, |
103 | summary_type, config, values) | 109 | summary_type, config, node.values) |
110 | yield leaf | ||
104 | 111 | ||
105 | builder = ColMapBuilder() | 112 | builder = ColMapBuilder() |
106 | 113 | ||
@@ -119,7 +126,6 @@ class ReducedTupleTable(TupleTable): | |||
119 | summary = table[leaf.config][leaf.stat] | 126 | summary = table[leaf.config][leaf.stat] |
120 | summary[leaf.summary][leaf.base] = y | 127 | summary[leaf.summary][leaf.base] = y |
121 | 128 | ||
122 | print("read: %s" % table) | ||
123 | return table | 129 | return table |
124 | 130 | ||
125 | def write_map(self, out_map): | 131 | def write_map(self, out_map): |
diff --git a/plot_exps.py b/plot_exps.py index 46784bc..39529bd 100755 --- a/plot_exps.py +++ b/plot_exps.py | |||
@@ -2,76 +2,14 @@ | |||
2 | from __future__ import print_function | 2 | from __future__ import print_function |
3 | 3 | ||
4 | import os | 4 | import os |
5 | import re | ||
6 | import plot | ||
7 | import shutil as sh | 5 | import shutil as sh |
8 | 6 | import sys | |
9 | from collections import defaultdict | ||
10 | from optparse import OptionParser | 7 | from optparse import OptionParser |
11 | from gnuplot import Plot, curve | 8 | from parse.dir_map import DirMap |
12 | from random import randrange | 9 | from parse.tuple_table import ReducedTupleTable |
13 | 10 | from parse.col_map import ColMap | |
14 | class StyleMaker(object): | 11 | from collections import namedtuple,defaultdict |
15 | LINE_WIDTH = 1.5 | 12 | import matplotlib.pyplot as plot |
16 | POINT_SIZE = 0.6 | ||
17 | BEST_COLORS = [ | ||
18 | '#ff0000', # red | ||
19 | '#000001', # black | ||
20 | '#0000ff', # blue | ||
21 | '#be00c4', # purple | ||
22 | '#ffd700', # yellow | ||
23 | ] | ||
24 | |||
25 | def __init__(csvs): | ||
26 | self.main_key, self.col_map = __find_columns(csvs) | ||
27 | self.cur_style = 1 | ||
28 | |||
29 | # Use this for least-common varying attribute | ||
30 | self.main_map = {} | ||
31 | # Everything else is a color | ||
32 | self.color_map = TupleTable(self.col_map) | ||
33 | |||
34 | def __find_columns(csvs): | ||
35 | vals = defaultdict(lambda:set) | ||
36 | |||
37 | for csv in csvs: | ||
38 | to_decode = os.path.splitext(csv_file)[0] | ||
39 | params = plot.decode(to_decode) | ||
40 | for k,v in params.iteritems: | ||
41 | vals[k].add(v) | ||
42 | |||
43 | try: | ||
44 | main_key = min([(k,v) for (k,v) in thing.iteritems() if len(v) > 1], | ||
45 | key=operator.itemgetter(1))[0] | ||
46 | except ValueError: | ||
47 | main_key = None | ||
48 | |||
49 | col_map = ColMap() | ||
50 | for k,v in vals.iterkeys(): | ||
51 | if k == self.main_key: continue | ||
52 | for i in v: | ||
53 | self.col_map.try_add(k, i) | ||
54 | return (main_key, col_map) | ||
55 | |||
56 | def __rand_color(): | ||
57 | return "#%s" % "".join([hex(randrange(0, 255))[2:] for i in range(3)]) | ||
58 | |||
59 | def get_style(csv): | ||
60 | to_decode = os.path.splitext(csv_file)[0] | ||
61 | params = plot.decode(to_decode) | ||
62 | |||
63 | if kv not in self.color_map: | ||
64 | color = best.pop() if BEST_COLORS else __rand_color() | ||
65 | self.color_map.add_exp(params, color) | ||
66 | |||
67 | if self.main_key in params: | ||
68 | val = params[self.main_key] | ||
69 | if val not in self.main_map: | ||
70 | self.main_map[val] = self.cur_style | ||
71 | self.cur_style += 1 | ||
72 | style = self.main_map[val] | ||
73 | else: | ||
74 | style = 1 | ||
75 | 13 | ||
76 | def parse_args(): | 14 | def parse_args(): |
77 | parser = OptionParser("usage: %prog [options] [csv_dir]...") | 15 | parser = OptionParser("usage: %prog [options] [csv_dir]...") |
@@ -83,54 +21,138 @@ def parse_args(): | |||
83 | 21 | ||
84 | return parser.parse_args() | 22 | return parser.parse_args() |
85 | 23 | ||
86 | def get_label(kv): | ||
87 | label = [] | ||
88 | for key, value in kv.iteritems(): | ||
89 | label += ["%s=%s" % (key.capitalize(), value)] | ||
90 | return ", ".join(label) | ||
91 | |||
92 | def add_line(plot, csv_file): | ||
93 | to_decode = os.path.splitext(csv_file)[0] | ||
94 | params = plot.decode(to_decode) | ||
95 | |||
96 | def get_stat(path, name): | ||
97 | full = os.path.abspath(path) | ||
98 | rstr = r"(?P<STAT>[^/]+)/((max|min|var|avg)/)*(%s/?)?$" % name | ||
99 | regex = re.compile(rstr, re.I | re.M) | ||
100 | match = regex.search(full) | ||
101 | return match.group("STAT") | ||
102 | |||
103 | def plot_exp(name, data_dir, out_dir): | ||
104 | p = Plot() | ||
105 | p.format = 'pdf' | ||
106 | p.output = "%s/%s.pdf" % (out_dir, name) | ||
107 | p.xlabel = name.replace("vary-", "") | ||
108 | p.ylabel = get_stat(data_dir, name) | ||
109 | p.font = 'Helvetica' | ||
110 | p.dashed_lines = True | ||
111 | p.enhanced_text = True | ||
112 | p.size = ('5.0cm', '5.0cm') | ||
113 | p.font_size = '6pt' | ||
114 | p.key = 'on bmargin center horizontal' | ||
115 | |||
116 | csvs = [f for f in os.listdir(data_dir) if re.match("*.csv", f)] | ||
117 | col_map = get_col_map(csvs) | ||
118 | 24 | ||
25 | ExpDetails = namedtuple('ExpDetails', ['variable', 'value', 'title', 'out']) | ||
26 | OUT_FORMAT = 'pdf' | ||
27 | |||
28 | def get_details(path): | ||
29 | out = "_".join(path) if path else "plot" | ||
30 | |||
31 | value = path.pop() if path else None | ||
32 | variable = path.pop() if path else None | ||
33 | |||
34 | title = value.capitalize() if value else "" | ||
35 | title += " by %s" % variable if variable else "" | ||
36 | title += " (%s)" % (", ".join(path)) if path else "" | ||
37 | |||
38 | return ExpDetails(variable, value, title, out) | ||
39 | |||
40 | |||
41 | |||
42 | class StyleMap(object): | ||
43 | COLORS = list('bgrcmyk') | ||
44 | LINES = ['-', ':', '--'] | ||
45 | MARKERS = list('.,ov^<>1234sp*hH+xDd|_') | ||
46 | ORDER = [MARKERS, COLORS, LINES] | ||
47 | DEFAULT = ["k", "-", "k"] | ||
48 | |||
49 | def __init__(self, col_list, col_values): | ||
50 | self.prop_map = dict(zip(col_list, StyleMap.ORDER)) | ||
51 | |||
52 | # Store 1 style per value | ||
53 | self.value_map = defaultdict(dict) | ||
54 | for column, styles in self.prop_map.iteritems(): | ||
55 | value_styles = self.value_map[column] | ||
56 | for value in sorted(col_values[column]): | ||
57 | value_styles[value] = styles.pop(0) | ||
58 | styles += [value_styles[value]] | ||
59 | |||
60 | def get_style(self, kv): | ||
61 | style = '' | ||
62 | for k,v in kv.iteritems(): | ||
63 | if k in self.value_map: | ||
64 | style += self.value_map[k][v] | ||
65 | return style | ||
66 | |||
67 | def get_key(self): | ||
68 | key = [] | ||
69 | for column, properties in self.prop_map.iteritems(): | ||
70 | idx = StyleMap.ORDER.index(properties) | ||
71 | prop_string = StyleMap.DEFAULT[idx] + "%s" | ||
72 | for value, prop in self.value_map[column].iteritems(): | ||
73 | style = plot.plot([],[], prop_string%prop)[0] | ||
74 | key += [(style, "%s:%s" % (column, value))] | ||
75 | return sorted(key, key=lambda x:x[1]) | ||
76 | |||
77 | def plot_by_variable(dir_map, col_map, out_dir, force): | ||
78 | num_plots = 0 | ||
79 | id = 0 | ||
80 | for _,_ in dir_map.leafs(1): | ||
81 | num_plots += 1 | ||
82 | sys.stderr.write("Plotting by variable...") | ||
83 | |||
84 | for plot_path, plot_node in dir_map.leafs(1): | ||
85 | id += 1 | ||
86 | details = get_details(plot_path) | ||
87 | out_fname = "%s/%s.%s" % (out_dir, details.out, OUT_FORMAT) | ||
88 | if os.path.exists(out_fname) and not force: | ||
89 | continue | ||
90 | |||
91 | # Kinda bad... | ||
92 | first_csv = plot_node.children.keys()[0] | ||
93 | first_config = ColMap.decode(first_csv[:first_csv.index('.csv')]) | ||
94 | columns = filter(lambda c: c in first_config, col_map.columns()) | ||
95 | |||
96 | style_map = StyleMap(columns, col_map.get_values()) | ||
97 | |||
98 | figure = plot.figure() | ||
99 | axes = figure.add_subplot(111) | ||
100 | |||
101 | for line_path, line_node in plot_node.children.iteritems(): | ||
102 | encoded = line_path[:line_path.index(".csv")] | ||
103 | config = ColMap.decode(encoded) | ||
104 | style = style_map.get_style(config) | ||
105 | |||
106 | values = sorted(line_node.values, key=lambda tup: tup[0]) | ||
107 | xvalues, yvalues = zip(*values) | ||
108 | |||
109 | plot.plot(xvalues, yvalues, style) | ||
110 | |||
111 | lines, labels = zip(*style_map.get_key()) | ||
112 | |||
113 | axes.legend(tuple(lines), tuple(labels), prop={'size':10}) | ||
114 | axes.set_ylabel(details.value) | ||
115 | axes.set_xlabel(details.variable) | ||
116 | axes.set_xlim(0, axes.get_xlim()[1] + 1) | ||
117 | axes.set_ylim(0, axes.get_ylim()[1] + 1) | ||
118 | |||
119 | axes.set_title(details.title) | ||
120 | |||
121 | plot.savefig(out_fname, format=OUT_FORMAT) | ||
122 | |||
123 | sys.stderr.write('\r {0:.2%}'.format(float(id)/num_plots)) | ||
124 | sys.stderr.write('\n') | ||
125 | |||
126 | def plot_exp(data_dir, out_dir, force): | ||
127 | print("Reading data...") | ||
128 | dir_map = DirMap.read(data_dir) | ||
129 | print("Sorting configs...") | ||
130 | tuple_table = ReducedTupleTable.from_dir_map(dir_map) | ||
131 | col_map = tuple_table.get_col_map() | ||
132 | |||
133 | if not os.path.exists(out_dir): | ||
134 | os.mkdir(out_dir) | ||
135 | |||
136 | print("Plotting data...") | ||
137 | plot_by_variable(dir_map, col_map, out_dir, force) | ||
138 | # plot_by_config(tuple_table, out_dir) | ||
119 | 139 | ||
120 | def main(): | 140 | def main(): |
121 | opts, args = parse_args() | 141 | opts, args = parse_args() |
122 | args = args or [os.getcwd()] | 142 | args = args or [os.getcwd()] |
123 | 143 | ||
124 | # if opts.force and os.path.exists(opts.out_dir): | 144 | if opts.force and os.path.exists(opts.out_dir): |
125 | # sh.rmtree(opts.out_dir) | 145 | sh.rmtree(opts.out_dir) |
126 | # if not os.path.exists(opts.out_dir): | 146 | if not os.path.exists(opts.out_dir): |
127 | # os.mkdir(opts.out_dir) | 147 | os.mkdir(opts.out_dir) |
128 | 148 | ||
129 | for exp in args: | 149 | for exp in args: |
130 | name = os.path.split(exp)[1] | 150 | name = os.path.split(exp)[1] |
131 | out_dir = "%s/%s" % (opts.out_dir, exp) | 151 | if exp != os.getcwd(): |
132 | 152 | out_dir = "%s/%s" % (opts.out_dir, name) | |
133 | plot_exp(name, exp, out_dir) | 153 | else: |
154 | out_dir = os.getcwd() | ||
155 | plot_exp(exp, out_dir, opts.force) | ||
134 | 156 | ||
135 | if __name__ == '__main__': | 157 | if __name__ == '__main__': |
136 | main() | 158 | main() |