aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-05-03 16:30:10 -0400
committerJonathan Herman <hermanjl@cs.unc.edu>2013-05-03 16:30:10 -0400
commitec4c208196a190a8b771bef7e1533b642b59f8df (patch)
tree1b7146871a8c93dc1c425acbd6b770fefddbd945
parenta530b618fb32321e61b2a90001caa4d6b1b2b192 (diff)
Improved flexibility of plot_exps.py script.wip-color-mc
* No longer needs an X connection to render. This also vastly increases performance. * If too many configuration values are plotted, a key with color=column1, line=column2, marker=column3 etc is not created. Instead, each combination of values is given its own line/color/marker style and plotted, and each line has an entry in the key. Ugly, but better than nothing.
-rw-r--r--parse/col_map.py2
-rw-r--r--plot/style.py190
-rwxr-xr-xplot_exps.py42
3 files changed, 193 insertions, 41 deletions
diff --git a/parse/col_map.py b/parse/col_map.py
index 59484e8..f643217 100644
--- a/parse/col_map.py
+++ b/parse/col_map.py
@@ -48,7 +48,7 @@ class ColMap(object):
48 48
49 for col in self.col_list: 49 for col in self.col_list:
50 if col not in kv: 50 if col not in kv:
51 key += (None,) 51 key += (str(None),)
52 else: 52 else:
53 key += (str(kv[col]),) 53 key += (str(kv[col]),)
54 54
diff --git a/plot/style.py b/plot/style.py
index 5c2d661..ad29dd3 100644
--- a/plot/style.py
+++ b/plot/style.py
@@ -1,22 +1,100 @@
1from common import log_once
1from collections import namedtuple 2from collections import namedtuple
3from parse.tuple_table import TupleTable
4
5import itertools
2import matplotlib.pyplot as plot 6import matplotlib.pyplot as plot
3 7
8class ExcessVarietyException(Exception):
9 '''Too many fields or field values to use field style'''
10 pass
11
4class Style(namedtuple('SS', ['marker', 'color', 'line'])): 12class Style(namedtuple('SS', ['marker', 'color', 'line'])):
5 def fmt(self): 13 def fmt(self):
6 return self.marker + self.line + self.color 14 return self.marker + self.line + self.color
7 15
16
17def make_styler(col_map):
18 try:
19 return FieldStyle(col_map.get_values())
20 except ExcessVarietyException:
21 # Fallback, don't style by field values, instead create
22 # a unique style for every combination of field values possible
23 # This is significantly harder to visually parse
24 log_once("Too many columns and/or column values to create pretty "
25 "and simple graphs!\nGiving each combination of properties "
26 "its own line.")
27 return CombinationStyle(col_map)
28
8class StyleMap(object): 29class StyleMap(object):
9 '''Maps configs (dicts) to specific line styles.''' 30 # The base style, a solid black line
31 # The values of columns are used to change this line
10 DEFAULT = Style(marker='', line= '-', color='k') 32 DEFAULT = Style(marker='', line= '-', color='k')
33
34 def __init__(self, col_values):
35 raise NotImplementedError()
36
37 def _all_styles(self):
38 '''A dict holding all possible values for style each property.'''
39 return Style(marker=list('.,ov^<>1234sp*hH+xDd|_'),
40 line=['-', ':', '--'],
41 color=list('kbgrcmy'))._asdict()
42
43 def get_style(self, kv):
44 '''Translate column values to unique line style.'''
45 raise NotImplementedError()
46
47 def get_key(self):
48 '''A visual description of this StyleMap.'''
49 raise NotImplementedError()
50
51
52class FieldStyle(StyleMap):
53 '''Changes properties of a line style by the values of each field.'''
54
11 ORDER = [ str, bool, float, int ] 55 ORDER = [ str, bool, float, int ]
12 56
13 def __init__(self, col_list, col_values): 57 def __init__(self, col_values):
14 '''Assign (some) columns in @col_list to fields in @Style to vary, and 58 '''Assign (some) columns in @col_list to fields in @Style to vary, and
15 assign values for these columns to specific field values.''' 59 assign values for these columns to specific field values.'''
60 # column->map(column_value->field_value)
16 self.value_map = {} 61 self.value_map = {}
62 # column->style_field
17 self.field_map = {} 63 self.field_map = {}
18 64
19 # Prioritize non-numbers 65 if len(col_values.keys()) > len(FieldStyle.DEFAULT):
66 raise ExcessVarietyException("Too many columns to style!")
67
68 col_list = self.__get_sorted_columns(col_values)
69 field_list = self.__get_sorted_fields()
70 field_dict = self._all_styles()
71
72 while len(col_list) < len(field_list):
73 curr_col = col_list[-1]
74 check_field = field_list[-2]
75 if len(col_values[curr_col]) <= len(field_dict[check_field]):
76 field_list.pop()
77 elif len(col_values[curr_col]) > len(field_dict[field_list[-1]]):
78 raise ExcessVarietyException("Too many values to style!")
79 else:
80 field_list.pop(0)
81
82 # Pair each column with a style field
83 for i in xrange(len(col_list)):
84 column = col_list[i]
85 field = field_list[i]
86 field_values = field_dict[field]
87
88 # Give each unique value of column a matching unique value of field
89 value_dict = {}
90 for value in sorted(col_values[column]):
91 value_dict[value] = field_values.pop(0)
92
93 self.value_map[column] = value_dict
94 self.field_map[column] = field
95
96 def __get_sorted_columns(self, col_values):
97 # Break ties using the type of the column
20 def type_priority(column): 98 def type_priority(column):
21 value = col_values[column].pop() 99 value = col_values[column].pop()
22 col_values[column].add(value) 100 col_values[column].add(value)
@@ -24,35 +102,23 @@ class StyleMap(object):
24 t = float if float(value) % 1.0 else int 102 t = float if float(value) % 1.0 else int
25 except: 103 except:
26 t = bool if value in ['True','False'] else str 104 t = bool if value in ['True','False'] else str
27 # return StyleMap.ORDER.index(t) 105 return StyleMap.ORDER.index(t)
28 return len(col_values[column])
29 col_list = sorted(col_list, key=type_priority, reverse=True)
30
31 # TODO: undo this, switch to popping mechanism
32 for field, values in [x for x in self.__get_all()._asdict().iteritems()]:
33 if not col_list:
34 break
35
36 next_column = col_list.pop(0)
37 value_dict = {}
38 106
39 for value in sorted(col_values[next_column]): 107 def column_compare(cola, colb):
40 try: 108 lena = len(col_values[cola])
41 value_dict[value] = values.pop(0) 109 lenb = len(col_values[colb])
42 except Exception as e: 110 if lena == lenb:
43 raise e 111 return type_priority(cola) - type_priority(colb)
112 else:
113 return lena - lenb
44 114
45 self.value_map[next_column] = value_dict 115 return sorted(col_values.keys(), cmp=column_compare)
46 self.field_map[next_column] = field
47 116
48 def __get_all(self): 117 def __get_sorted_fields(self):
49 '''A Style holding all possible values for each property.''' 118 fields = self._all_styles()
50 return Style(marker=list('.,ov^<>1234sp*hH+xDd|_'), 119 return sorted(fields.keys(), key=lambda x: len(fields[x]))
51 line=['-', ':', '--', '_'],
52 color=list('bgrcmyk'))
53 120
54 def get_style(self, kv): 121 def get_style(self, kv):
55 '''Translate column values to unique line style.'''
56 style_fields = {} 122 style_fields = {}
57 123
58 for column, values in self.value_map.iteritems(): 124 for column, values in self.value_map.iteritems():
@@ -64,7 +130,6 @@ class StyleMap(object):
64 return StyleMap.DEFAULT._replace(**style_fields) 130 return StyleMap.DEFAULT._replace(**style_fields)
65 131
66 def get_key(self): 132 def get_key(self):
67 '''A visual description of this StyleMap.'''
68 key = [] 133 key = []
69 134
70 for column, values in self.value_map.iteritems(): 135 for column, values in self.value_map.iteritems():
@@ -79,3 +144,72 @@ class StyleMap(object):
79 144
80 return sorted(key, key=lambda x:x[1]) 145 return sorted(key, key=lambda x:x[1])
81 146
147class CombinationStyle(StyleMap):
148 def __init__(self, col_map):
149 self.col_map = col_map
150 self.kv_styles = TupleTable(col_map)
151 self.kv_seen = TupleTable(col_map, lambda:False)
152
153 all_styles = self._all_styles()
154 styles_order = sorted(all_styles.keys(),
155 key=lambda x: len(all_styles[x]),
156 reverse = True)
157
158 # Add a 'None' option in case some lines are plotted without
159 # any value specified for this kv
160 column_values = col_map.get_values()
161 for key in column_values.keys():
162 column_values[key].add(None)
163
164 styles_iter = self.__dict_combinations(all_styles, styles_order)
165 kv_iter = self.__dict_combinations(column_values)
166
167 # Cycle in case there are more kv combinations than styles
168 # This will be really, really ugly..
169 styles_iter = itertools.cycle(styles_iter)
170
171 for kv, style in zip(kv_iter, styles_iter):
172 self.kv_styles[kv] = Style(**style)
173
174 for kv_tup, style in self.kv_styles:
175 kv = self.col_map.get_kv(kv_tup)
176 if not self.kv_styles[kv]:
177 raise Exception("Didn't initialize %s" % kv)
178
179 def __dict_combinations(self, list_dict, column_order = None):
180 def helper(set_columns, remaining_columns):
181 if not remaining_columns:
182 yield set_columns
183 return
184
185 next_column = remaining_columns.pop(0)
186
187 for v in list_dict[next_column]:
188 set_columns[next_column] = v
189 for vals in helper(dict(set_columns), list(remaining_columns)):
190 yield vals
191
192 if not column_order:
193 # Just use the random order returned by the dict
194 column_order = list_dict.keys()
195
196 return helper({}, column_order)
197
198 def get_style(self, kv):
199 self.kv_seen[kv] = True
200 return self.kv_styles[kv]
201
202 def get_key(self):
203 key = []
204
205 for kv_tup, style in self.kv_styles:
206 kv = self.col_map.get_kv(kv_tup)
207 if not self.kv_seen[kv]:
208 continue
209
210 styled_line = plot.plot([], [], style.fmt())[0]
211 description = self.col_map.encode(kv, minimum=True)
212
213 key += [(styled_line, description)]
214
215 return sorted(key, key=lambda x:x[1])
diff --git a/plot_exps.py b/plot_exps.py
index 2d6f06b..d49e69d 100755
--- a/plot_exps.py
+++ b/plot_exps.py
@@ -1,7 +1,17 @@
1#!/usr/bin/env python 1#!/usr/bin/env python
2from __future__ import print_function 2from __future__ import print_function
3 3
4# Without this trickery, matplotlib uses the current X windows session
5# to create graphs. Problem 1 with this: requires user has an X windows,
6# through ssh -X or otherws. Problem 2: it kills the performance on the
7# computer running the X session, even if that computer isn't the one
8# running plot_exps.py!
9import matplotlib
10matplotlib.use('Agg')
4import matplotlib.pyplot as plot 11import matplotlib.pyplot as plot
12
13import common as com
14import multiprocessing
5import os 15import os
6import shutil as sh 16import shutil as sh
7import sys 17import sys
@@ -9,11 +19,11 @@ import traceback
9 19
10from collections import namedtuple 20from collections import namedtuple
11from config.config import DEFAULTS 21from config.config import DEFAULTS
12from multiprocessing import Pool, cpu_count 22
13from optparse import OptionParser 23from optparse import OptionParser
14from parse.col_map import ColMap,ColMapBuilder 24from parse.col_map import ColMap,ColMapBuilder
15from parse.dir_map import DirMap 25from parse.dir_map import DirMap
16from plot.style import StyleMap 26from plot.style import make_styler
17 27
18def parse_args(): 28def parse_args():
19 parser = OptionParser("usage: %prog [options] [csv_dir]...") 29 parser = OptionParser("usage: %prog [options] [csv_dir]...")
@@ -23,7 +33,8 @@ def parse_args():
23 default=DEFAULTS['out-plot']) 33 default=DEFAULTS['out-plot'])
24 parser.add_option('-f', '--force', action='store_true', default=False, 34 parser.add_option('-f', '--force', action='store_true', default=False,
25 dest='force', help='overwrite existing data') 35 dest='force', help='overwrite existing data')
26 parser.add_option('-p', '--processors', default=max(cpu_count() - 1, 1), 36 parser.add_option('-p', '--processors',
37 default=max(multiprocessing.cpu_count() - 1, 1),
27 type='int', dest='processors', 38 type='int', dest='processors',
28 help='number of threads for processing') 39 help='number of threads for processing')
29 40
@@ -53,8 +64,7 @@ def plot_by_variable(details):
53 builder = ColMapBuilder() 64 builder = ColMapBuilder()
54 config_nodes = [] 65 config_nodes = []
55 66
56 # Generate mapping of (column)=>(line property to vary) for consistently 67 # Decode file names into configuration dicts
57 # formatted plots
58 for line_path, line_node in details.node.children.iteritems(): 68 for line_path, line_node in details.node.children.iteritems():
59 encoded = line_path[:line_path.index(".csv")] 69 encoded = line_path[:line_path.index(".csv")]
60 70
@@ -68,14 +78,13 @@ def plot_by_variable(details):
68 config_nodes += [(line_config, line_node)] 78 config_nodes += [(line_config, line_node)]
69 79
70 col_map = builder.build() 80 col_map = builder.build()
71 style_map = StyleMap(col_map.columns(), col_map.get_values()) 81 style_map = make_styler(col_map)
72 82
73 figure = plot.figure() 83 figure = plot.figure()
74 axes = figure.add_subplot(111) 84 axes = figure.add_subplot(111)
75 85
76 # Create a line for each file node and its configuration 86 # Create a line for each file node and its configuration
77 for line_config, line_node in config_nodes: 87 for line_config, line_node in config_nodes:
78 # Create line style to match this configuration
79 style = style_map.get_style(line_config) 88 style = style_map.get_style(line_config)
80 values = sorted(line_node.values, key=lambda tup: tup[0]) 89 values = sorted(line_node.values, key=lambda tup: tup[0])
81 xvalues, yvalues = zip(*values) 90 xvalues, yvalues = zip(*values)
@@ -85,14 +94,19 @@ def plot_by_variable(details):
85 axes.set_title(details.title) 94 axes.set_title(details.title)
86 95
87 lines, labels = zip(*style_map.get_key()) 96 lines, labels = zip(*style_map.get_key())
88 axes.legend(tuple(lines), tuple(labels), prop={'size':10}, loc=2) 97 axes.legend(tuple(lines), tuple(labels), prop={'size':10},
98 # This code places the legend slightly to the right of the plot
99 bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)
89 100
90 axes.set_ylabel(details.value) 101 axes.set_ylabel(details.value)
91 axes.set_xlabel(details.variable) 102 axes.set_xlabel(details.variable)
92 axes.set_xlim(0, axes.get_xlim()[1]) 103 axes.set_xlim(0, axes.get_xlim()[1])
93 axes.set_ylim(0, axes.get_ylim()[1]) 104 axes.set_ylim(0, axes.get_ylim()[1])
94 105
95 plot.savefig(details.out, format=OUT_FORMAT) 106 plot.savefig(details.out, format=OUT_FORMAT,
107 # Using 'tight' causes savefig to rescale the image for non-plot
108 # artists, which in our case is just the legend
109 bbox_inches='tight')
96 110
97 return True 111 return True
98 112
@@ -125,8 +139,12 @@ def plot_dir(data_dir, out_dir, max_procs, force):
125 if not plot_details: 139 if not plot_details:
126 return 140 return
127 141
128 procs = min(len(plot_details), max_procs) 142 procs = min(len(plot_details), max_procs)
129 pool = Pool(processes=procs) 143 logged = multiprocessing.Manager().list()
144
145 pool = multiprocessing.Pool(processes=procs,
146 initializer=com.set_logged_list, initargs=(logged,))
147
130 enum = pool.imap_unordered(plot_wrapper, plot_details) 148 enum = pool.imap_unordered(plot_wrapper, plot_details)
131 149
132 try: 150 try: