diff options
-rw-r--r-- | parse/col_map.py | 2 | ||||
-rw-r--r-- | plot/style.py | 190 | ||||
-rwxr-xr-x | plot_exps.py | 42 |
3 files changed, 193 insertions, 41 deletions
diff --git a/parse/col_map.py b/parse/col_map.py index 59484e8..f643217 100644 --- a/parse/col_map.py +++ b/parse/col_map.py | |||
@@ -48,7 +48,7 @@ class ColMap(object): | |||
48 | 48 | ||
49 | for col in self.col_list: | 49 | for col in self.col_list: |
50 | if col not in kv: | 50 | if col not in kv: |
51 | key += (None,) | 51 | key += (str(None),) |
52 | else: | 52 | else: |
53 | key += (str(kv[col]),) | 53 | key += (str(kv[col]),) |
54 | 54 | ||
diff --git a/plot/style.py b/plot/style.py index 5c2d661..ad29dd3 100644 --- a/plot/style.py +++ b/plot/style.py | |||
@@ -1,22 +1,100 @@ | |||
1 | from common import log_once | ||
1 | from collections import namedtuple | 2 | from collections import namedtuple |
3 | from parse.tuple_table import TupleTable | ||
4 | |||
5 | import itertools | ||
2 | import matplotlib.pyplot as plot | 6 | import matplotlib.pyplot as plot |
3 | 7 | ||
8 | class ExcessVarietyException(Exception): | ||
9 | '''Too many fields or field values to use field style''' | ||
10 | pass | ||
11 | |||
4 | class Style(namedtuple('SS', ['marker', 'color', 'line'])): | 12 | class Style(namedtuple('SS', ['marker', 'color', 'line'])): |
5 | def fmt(self): | 13 | def fmt(self): |
6 | return self.marker + self.line + self.color | 14 | return self.marker + self.line + self.color |
7 | 15 | ||
16 | |||
17 | def make_styler(col_map): | ||
18 | try: | ||
19 | return FieldStyle(col_map.get_values()) | ||
20 | except ExcessVarietyException: | ||
21 | # Fallback, don't style by field values, instead create | ||
22 | # a unique style for every combination of field values possible | ||
23 | # This is significantly harder to visually parse | ||
24 | log_once("Too many columns and/or column values to create pretty " | ||
25 | "and simple graphs!\nGiving each combination of properties " | ||
26 | "its own line.") | ||
27 | return CombinationStyle(col_map) | ||
28 | |||
8 | class StyleMap(object): | 29 | class StyleMap(object): |
9 | '''Maps configs (dicts) to specific line styles.''' | 30 | # The base style, a solid black line |
31 | # The values of columns are used to change this line | ||
10 | DEFAULT = Style(marker='', line= '-', color='k') | 32 | DEFAULT = Style(marker='', line= '-', color='k') |
33 | |||
34 | def __init__(self, col_values): | ||
35 | raise NotImplementedError() | ||
36 | |||
37 | def _all_styles(self): | ||
38 | '''A dict holding all possible values for style each property.''' | ||
39 | return Style(marker=list('.,ov^<>1234sp*hH+xDd|_'), | ||
40 | line=['-', ':', '--'], | ||
41 | color=list('kbgrcmy'))._asdict() | ||
42 | |||
43 | def get_style(self, kv): | ||
44 | '''Translate column values to unique line style.''' | ||
45 | raise NotImplementedError() | ||
46 | |||
47 | def get_key(self): | ||
48 | '''A visual description of this StyleMap.''' | ||
49 | raise NotImplementedError() | ||
50 | |||
51 | |||
52 | class FieldStyle(StyleMap): | ||
53 | '''Changes properties of a line style by the values of each field.''' | ||
54 | |||
11 | ORDER = [ str, bool, float, int ] | 55 | ORDER = [ str, bool, float, int ] |
12 | 56 | ||
13 | def __init__(self, col_list, col_values): | 57 | def __init__(self, col_values): |
14 | '''Assign (some) columns in @col_list to fields in @Style to vary, and | 58 | '''Assign (some) columns in @col_list to fields in @Style to vary, and |
15 | assign values for these columns to specific field values.''' | 59 | assign values for these columns to specific field values.''' |
60 | # column->map(column_value->field_value) | ||
16 | self.value_map = {} | 61 | self.value_map = {} |
62 | # column->style_field | ||
17 | self.field_map = {} | 63 | self.field_map = {} |
18 | 64 | ||
19 | # Prioritize non-numbers | 65 | if len(col_values.keys()) > len(FieldStyle.DEFAULT): |
66 | raise ExcessVarietyException("Too many columns to style!") | ||
67 | |||
68 | col_list = self.__get_sorted_columns(col_values) | ||
69 | field_list = self.__get_sorted_fields() | ||
70 | field_dict = self._all_styles() | ||
71 | |||
72 | while len(col_list) < len(field_list): | ||
73 | curr_col = col_list[-1] | ||
74 | check_field = field_list[-2] | ||
75 | if len(col_values[curr_col]) <= len(field_dict[check_field]): | ||
76 | field_list.pop() | ||
77 | elif len(col_values[curr_col]) > len(field_dict[field_list[-1]]): | ||
78 | raise ExcessVarietyException("Too many values to style!") | ||
79 | else: | ||
80 | field_list.pop(0) | ||
81 | |||
82 | # Pair each column with a style field | ||
83 | for i in xrange(len(col_list)): | ||
84 | column = col_list[i] | ||
85 | field = field_list[i] | ||
86 | field_values = field_dict[field] | ||
87 | |||
88 | # Give each unique value of column a matching unique value of field | ||
89 | value_dict = {} | ||
90 | for value in sorted(col_values[column]): | ||
91 | value_dict[value] = field_values.pop(0) | ||
92 | |||
93 | self.value_map[column] = value_dict | ||
94 | self.field_map[column] = field | ||
95 | |||
96 | def __get_sorted_columns(self, col_values): | ||
97 | # Break ties using the type of the column | ||
20 | def type_priority(column): | 98 | def type_priority(column): |
21 | value = col_values[column].pop() | 99 | value = col_values[column].pop() |
22 | col_values[column].add(value) | 100 | col_values[column].add(value) |
@@ -24,35 +102,23 @@ class StyleMap(object): | |||
24 | t = float if float(value) % 1.0 else int | 102 | t = float if float(value) % 1.0 else int |
25 | except: | 103 | except: |
26 | t = bool if value in ['True','False'] else str | 104 | t = bool if value in ['True','False'] else str |
27 | # return StyleMap.ORDER.index(t) | 105 | return StyleMap.ORDER.index(t) |
28 | return len(col_values[column]) | ||
29 | col_list = sorted(col_list, key=type_priority, reverse=True) | ||
30 | |||
31 | # TODO: undo this, switch to popping mechanism | ||
32 | for field, values in [x for x in self.__get_all()._asdict().iteritems()]: | ||
33 | if not col_list: | ||
34 | break | ||
35 | |||
36 | next_column = col_list.pop(0) | ||
37 | value_dict = {} | ||
38 | 106 | ||
39 | for value in sorted(col_values[next_column]): | 107 | def column_compare(cola, colb): |
40 | try: | 108 | lena = len(col_values[cola]) |
41 | value_dict[value] = values.pop(0) | 109 | lenb = len(col_values[colb]) |
42 | except Exception as e: | 110 | if lena == lenb: |
43 | raise e | 111 | return type_priority(cola) - type_priority(colb) |
112 | else: | ||
113 | return lena - lenb | ||
44 | 114 | ||
45 | self.value_map[next_column] = value_dict | 115 | return sorted(col_values.keys(), cmp=column_compare) |
46 | self.field_map[next_column] = field | ||
47 | 116 | ||
48 | def __get_all(self): | 117 | def __get_sorted_fields(self): |
49 | '''A Style holding all possible values for each property.''' | 118 | fields = self._all_styles() |
50 | return Style(marker=list('.,ov^<>1234sp*hH+xDd|_'), | 119 | return sorted(fields.keys(), key=lambda x: len(fields[x])) |
51 | line=['-', ':', '--', '_'], | ||
52 | color=list('bgrcmyk')) | ||
53 | 120 | ||
54 | def get_style(self, kv): | 121 | def get_style(self, kv): |
55 | '''Translate column values to unique line style.''' | ||
56 | style_fields = {} | 122 | style_fields = {} |
57 | 123 | ||
58 | for column, values in self.value_map.iteritems(): | 124 | for column, values in self.value_map.iteritems(): |
@@ -64,7 +130,6 @@ class StyleMap(object): | |||
64 | return StyleMap.DEFAULT._replace(**style_fields) | 130 | return StyleMap.DEFAULT._replace(**style_fields) |
65 | 131 | ||
66 | def get_key(self): | 132 | def get_key(self): |
67 | '''A visual description of this StyleMap.''' | ||
68 | key = [] | 133 | key = [] |
69 | 134 | ||
70 | for column, values in self.value_map.iteritems(): | 135 | for column, values in self.value_map.iteritems(): |
@@ -79,3 +144,72 @@ class StyleMap(object): | |||
79 | 144 | ||
80 | return sorted(key, key=lambda x:x[1]) | 145 | return sorted(key, key=lambda x:x[1]) |
81 | 146 | ||
147 | class CombinationStyle(StyleMap): | ||
148 | def __init__(self, col_map): | ||
149 | self.col_map = col_map | ||
150 | self.kv_styles = TupleTable(col_map) | ||
151 | self.kv_seen = TupleTable(col_map, lambda:False) | ||
152 | |||
153 | all_styles = self._all_styles() | ||
154 | styles_order = sorted(all_styles.keys(), | ||
155 | key=lambda x: len(all_styles[x]), | ||
156 | reverse = True) | ||
157 | |||
158 | # Add a 'None' option in case some lines are plotted without | ||
159 | # any value specified for this kv | ||
160 | column_values = col_map.get_values() | ||
161 | for key in column_values.keys(): | ||
162 | column_values[key].add(None) | ||
163 | |||
164 | styles_iter = self.__dict_combinations(all_styles, styles_order) | ||
165 | kv_iter = self.__dict_combinations(column_values) | ||
166 | |||
167 | # Cycle in case there are more kv combinations than styles | ||
168 | # This will be really, really ugly.. | ||
169 | styles_iter = itertools.cycle(styles_iter) | ||
170 | |||
171 | for kv, style in zip(kv_iter, styles_iter): | ||
172 | self.kv_styles[kv] = Style(**style) | ||
173 | |||
174 | for kv_tup, style in self.kv_styles: | ||
175 | kv = self.col_map.get_kv(kv_tup) | ||
176 | if not self.kv_styles[kv]: | ||
177 | raise Exception("Didn't initialize %s" % kv) | ||
178 | |||
179 | def __dict_combinations(self, list_dict, column_order = None): | ||
180 | def helper(set_columns, remaining_columns): | ||
181 | if not remaining_columns: | ||
182 | yield set_columns | ||
183 | return | ||
184 | |||
185 | next_column = remaining_columns.pop(0) | ||
186 | |||
187 | for v in list_dict[next_column]: | ||
188 | set_columns[next_column] = v | ||
189 | for vals in helper(dict(set_columns), list(remaining_columns)): | ||
190 | yield vals | ||
191 | |||
192 | if not column_order: | ||
193 | # Just use the random order returned by the dict | ||
194 | column_order = list_dict.keys() | ||
195 | |||
196 | return helper({}, column_order) | ||
197 | |||
198 | def get_style(self, kv): | ||
199 | self.kv_seen[kv] = True | ||
200 | return self.kv_styles[kv] | ||
201 | |||
202 | def get_key(self): | ||
203 | key = [] | ||
204 | |||
205 | for kv_tup, style in self.kv_styles: | ||
206 | kv = self.col_map.get_kv(kv_tup) | ||
207 | if not self.kv_seen[kv]: | ||
208 | continue | ||
209 | |||
210 | styled_line = plot.plot([], [], style.fmt())[0] | ||
211 | description = self.col_map.encode(kv, minimum=True) | ||
212 | |||
213 | key += [(styled_line, description)] | ||
214 | |||
215 | return sorted(key, key=lambda x:x[1]) | ||
diff --git a/plot_exps.py b/plot_exps.py index 2d6f06b..d49e69d 100755 --- a/plot_exps.py +++ b/plot_exps.py | |||
@@ -1,7 +1,17 @@ | |||
1 | #!/usr/bin/env python | 1 | #!/usr/bin/env python |
2 | from __future__ import print_function | 2 | from __future__ import print_function |
3 | 3 | ||
4 | # Without this trickery, matplotlib uses the current X windows session | ||
5 | # to create graphs. Problem 1 with this: requires user has an X windows, | ||
6 | # through ssh -X or otherws. Problem 2: it kills the performance on the | ||
7 | # computer running the X session, even if that computer isn't the one | ||
8 | # running plot_exps.py! | ||
9 | import matplotlib | ||
10 | matplotlib.use('Agg') | ||
4 | import matplotlib.pyplot as plot | 11 | import matplotlib.pyplot as plot |
12 | |||
13 | import common as com | ||
14 | import multiprocessing | ||
5 | import os | 15 | import os |
6 | import shutil as sh | 16 | import shutil as sh |
7 | import sys | 17 | import sys |
@@ -9,11 +19,11 @@ import traceback | |||
9 | 19 | ||
10 | from collections import namedtuple | 20 | from collections import namedtuple |
11 | from config.config import DEFAULTS | 21 | from config.config import DEFAULTS |
12 | from multiprocessing import Pool, cpu_count | 22 | |
13 | from optparse import OptionParser | 23 | from optparse import OptionParser |
14 | from parse.col_map import ColMap,ColMapBuilder | 24 | from parse.col_map import ColMap,ColMapBuilder |
15 | from parse.dir_map import DirMap | 25 | from parse.dir_map import DirMap |
16 | from plot.style import StyleMap | 26 | from plot.style import make_styler |
17 | 27 | ||
18 | def parse_args(): | 28 | def parse_args(): |
19 | parser = OptionParser("usage: %prog [options] [csv_dir]...") | 29 | parser = OptionParser("usage: %prog [options] [csv_dir]...") |
@@ -23,7 +33,8 @@ def parse_args(): | |||
23 | default=DEFAULTS['out-plot']) | 33 | default=DEFAULTS['out-plot']) |
24 | parser.add_option('-f', '--force', action='store_true', default=False, | 34 | parser.add_option('-f', '--force', action='store_true', default=False, |
25 | dest='force', help='overwrite existing data') | 35 | dest='force', help='overwrite existing data') |
26 | parser.add_option('-p', '--processors', default=max(cpu_count() - 1, 1), | 36 | parser.add_option('-p', '--processors', |
37 | default=max(multiprocessing.cpu_count() - 1, 1), | ||
27 | type='int', dest='processors', | 38 | type='int', dest='processors', |
28 | help='number of threads for processing') | 39 | help='number of threads for processing') |
29 | 40 | ||
@@ -53,8 +64,7 @@ def plot_by_variable(details): | |||
53 | builder = ColMapBuilder() | 64 | builder = ColMapBuilder() |
54 | config_nodes = [] | 65 | config_nodes = [] |
55 | 66 | ||
56 | # Generate mapping of (column)=>(line property to vary) for consistently | 67 | # Decode file names into configuration dicts |
57 | # formatted plots | ||
58 | for line_path, line_node in details.node.children.iteritems(): | 68 | for line_path, line_node in details.node.children.iteritems(): |
59 | encoded = line_path[:line_path.index(".csv")] | 69 | encoded = line_path[:line_path.index(".csv")] |
60 | 70 | ||
@@ -68,14 +78,13 @@ def plot_by_variable(details): | |||
68 | config_nodes += [(line_config, line_node)] | 78 | config_nodes += [(line_config, line_node)] |
69 | 79 | ||
70 | col_map = builder.build() | 80 | col_map = builder.build() |
71 | style_map = StyleMap(col_map.columns(), col_map.get_values()) | 81 | style_map = make_styler(col_map) |
72 | 82 | ||
73 | figure = plot.figure() | 83 | figure = plot.figure() |
74 | axes = figure.add_subplot(111) | 84 | axes = figure.add_subplot(111) |
75 | 85 | ||
76 | # Create a line for each file node and its configuration | 86 | # Create a line for each file node and its configuration |
77 | for line_config, line_node in config_nodes: | 87 | for line_config, line_node in config_nodes: |
78 | # Create line style to match this configuration | ||
79 | style = style_map.get_style(line_config) | 88 | style = style_map.get_style(line_config) |
80 | values = sorted(line_node.values, key=lambda tup: tup[0]) | 89 | values = sorted(line_node.values, key=lambda tup: tup[0]) |
81 | xvalues, yvalues = zip(*values) | 90 | xvalues, yvalues = zip(*values) |
@@ -85,14 +94,19 @@ def plot_by_variable(details): | |||
85 | axes.set_title(details.title) | 94 | axes.set_title(details.title) |
86 | 95 | ||
87 | lines, labels = zip(*style_map.get_key()) | 96 | lines, labels = zip(*style_map.get_key()) |
88 | axes.legend(tuple(lines), tuple(labels), prop={'size':10}, loc=2) | 97 | axes.legend(tuple(lines), tuple(labels), prop={'size':10}, |
98 | # This code places the legend slightly to the right of the plot | ||
99 | bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0) | ||
89 | 100 | ||
90 | axes.set_ylabel(details.value) | 101 | axes.set_ylabel(details.value) |
91 | axes.set_xlabel(details.variable) | 102 | axes.set_xlabel(details.variable) |
92 | axes.set_xlim(0, axes.get_xlim()[1]) | 103 | axes.set_xlim(0, axes.get_xlim()[1]) |
93 | axes.set_ylim(0, axes.get_ylim()[1]) | 104 | axes.set_ylim(0, axes.get_ylim()[1]) |
94 | 105 | ||
95 | plot.savefig(details.out, format=OUT_FORMAT) | 106 | plot.savefig(details.out, format=OUT_FORMAT, |
107 | # Using 'tight' causes savefig to rescale the image for non-plot | ||
108 | # artists, which in our case is just the legend | ||
109 | bbox_inches='tight') | ||
96 | 110 | ||
97 | return True | 111 | return True |
98 | 112 | ||
@@ -125,8 +139,12 @@ def plot_dir(data_dir, out_dir, max_procs, force): | |||
125 | if not plot_details: | 139 | if not plot_details: |
126 | return | 140 | return |
127 | 141 | ||
128 | procs = min(len(plot_details), max_procs) | 142 | procs = min(len(plot_details), max_procs) |
129 | pool = Pool(processes=procs) | 143 | logged = multiprocessing.Manager().list() |
144 | |||
145 | pool = multiprocessing.Pool(processes=procs, | ||
146 | initializer=com.set_logged_list, initargs=(logged,)) | ||
147 | |||
130 | enum = pool.imap_unordered(plot_wrapper, plot_details) | 148 | enum = pool.imap_unordered(plot_wrapper, plot_details) |
131 | 149 | ||
132 | try: | 150 | try: |