aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-05-03 16:30:10 -0400
committerJonathan Herman <hermanjl@cs.unc.edu>2013-05-03 16:35:47 -0400
commite15736509ab36e33bc71a0fe1120f2974e389725 (patch)
treed3d405d12df469d601b01e247d7301651629b547
parent38e4029048bb693525369ea7eb0e639f73844ed2 (diff)
Improved flexibility of plot_exps.py script.
* No longer needs an X connection to render. This also vastly increases performance. * If too many configuration values are plotted, a key with color=column1, line=column2, marker=column3 etc is not created. Instead, each combination of values is given its own line/color/marker style and plotted, and each line has an entry in the key. Ugly, but better than nothing.
-rw-r--r--parse/col_map.py2
-rw-r--r--plot/style.py185
-rwxr-xr-xplot_exps.py42
3 files changed, 192 insertions, 37 deletions
diff --git a/parse/col_map.py b/parse/col_map.py
index ccce865..f1f9e52 100644
--- a/parse/col_map.py
+++ b/parse/col_map.py
@@ -45,7 +45,7 @@ class ColMap(object):
45 45
46 for col in self.col_list: 46 for col in self.col_list:
47 if col not in kv: 47 if col not in kv:
48 key += (None,) 48 key += (str(None),)
49 else: 49 else:
50 key += (str(kv[col]),) 50 key += (str(kv[col]),)
51 51
diff --git a/plot/style.py b/plot/style.py
index 4e2057f..f7b3a35 100644
--- a/plot/style.py
+++ b/plot/style.py
@@ -1,22 +1,99 @@
1from common import log_once
1from collections import namedtuple 2from collections import namedtuple
3from parse.tuple_table import TupleTable
4
5import itertools
2import matplotlib.pyplot as plot 6import matplotlib.pyplot as plot
3 7
4class Style(namedtuple('SS', ['marker', 'line', 'color'])): 8class Style(namedtuple('SS', ['marker', 'color', 'line'])):
5 def fmt(self): 9 def fmt(self):
6 return self.marker + self.line + self.color 10 return self.marker + self.line + self.color
7 11
12class ExcessVarietyException(Exception):
13 '''Too many fields or field values to use field style'''
14 pass
15
16def make_styler(col_map):
17 try:
18 return FieldStyle(col_map.get_values())
19 except ExcessVarietyException:
20 # Fallback, don't style by field values, instead create
21 # a unique style for every combination of field values possible
22 # This is significantly harder to visually parse
23 log_once("Too many columns and/or column values to create pretty "
24 "and simple graphs!\nGiving each combination of properties "
25 "its own line.")
26 return CombinationStyle(col_map)
27
8class StyleMap(object): 28class StyleMap(object):
9 '''Maps configs (dicts) to specific line styles.''' 29 # The base style, a solid black line
30 # The values of columns are used to change this line
10 DEFAULT = Style(marker='', line= '-', color='k') 31 DEFAULT = Style(marker='', line= '-', color='k')
32
33 def __init__(self, col_values):
34 raise NotImplementedError()
35
36 def _all_styles(self):
37 '''A dict holding all possible values for style each property.'''
38 return Style(marker=list('.,ov^<>1234sp*hH+xDd|_'),
39 line=['-', ':', '--'],
40 color=list('kbgrcmy'))._asdict()
41
42 def get_style(self, kv):
43 '''Translate column values to unique line style.'''
44 raise NotImplementedError()
45
46 def get_key(self):
47 '''A visual description of this StyleMap.'''
48 raise NotImplementedError()
49
50
51class FieldStyle(StyleMap):
52 '''Changes properties of a line style by the values of each field.'''
53
11 ORDER = [ str, bool, float, int ] 54 ORDER = [ str, bool, float, int ]
12 55
13 def __init__(self, col_list, col_values): 56 def __init__(self, col_values):
14 '''Assign (some) columns in @col_list to fields in @Style to vary, and 57 '''Assign (some) columns in @col_list to fields in @Style to vary, and
15 assign values for these columns to specific field values.''' 58 assign values for these columns to specific field values.'''
59 # column->map(column_value->field_value)
16 self.value_map = {} 60 self.value_map = {}
61 # column->style_field
17 self.field_map = {} 62 self.field_map = {}
18 63
19 # Prioritize non-numbers 64 if len(col_values.keys()) > len(FieldStyle.DEFAULT):
65 raise ExcessVarietyException("Too many columns to style!")
66
67 col_list = self.__get_sorted_columns(col_values)
68 field_list = self.__get_sorted_fields()
69 field_dict = self._all_styles()
70
71 while len(col_list) < len(field_list):
72 curr_col = col_list[-1]
73 check_field = field_list[-2]
74 if len(col_values[curr_col]) <= len(field_dict[check_field]):
75 field_list.pop()
76 elif len(col_values[curr_col]) > len(field_dict[field_list[-1]]):
77 raise ExcessVarietyException("Too many values to style!")
78 else:
79 field_list.pop(0)
80
81 # Pair each column with a style field
82 for i in xrange(len(col_list)):
83 column = col_list[i]
84 field = field_list[i]
85 field_values = field_dict[field]
86
87 # Give each unique value of column a matching unique value of field
88 value_dict = {}
89 for value in sorted(col_values[column]):
90 value_dict[value] = field_values.pop(0)
91
92 self.value_map[column] = value_dict
93 self.field_map[column] = field
94
95 def __get_sorted_columns(self, col_values):
96 # Break ties using the type of the column
20 def type_priority(column): 97 def type_priority(column):
21 value = col_values[column].pop() 98 value = col_values[column].pop()
22 col_values[column].add(value) 99 col_values[column].add(value)
@@ -25,30 +102,22 @@ class StyleMap(object):
25 except: 102 except:
26 t = bool if value in ['True','False'] else str 103 t = bool if value in ['True','False'] else str
27 return StyleMap.ORDER.index(t) 104 return StyleMap.ORDER.index(t)
28 col_list = sorted(col_list, key=type_priority)
29 105
30 # TODO: undo this, switch to popping mechanism 106 def column_compare(cola, colb):
31 for field, values in reversed([x for x in self.__get_all()._asdict().iteritems()]): 107 lena = len(col_values[cola])
32 if not col_list: 108 lenb = len(col_values[colb])
33 break 109 if lena == lenb:
110 return type_priority(cola) - type_priority(colb)
111 else:
112 return lena - lenb
34 113
35 next_column = col_list.pop(0) 114 return sorted(col_values.keys(), cmp=column_compare)
36 value_dict = {}
37
38 for value in sorted(col_values[next_column]):
39 value_dict[value] = values.pop(0)
40
41 self.value_map[next_column] = value_dict
42 self.field_map[next_column] = field
43 115
44 def __get_all(self): 116 def __get_sorted_fields(self):
45 '''A Style holding all possible values for each property.''' 117 fields = self._all_styles()
46 return Style(marker=list('.,ov^<>1234sp*hH+xDd|_'), 118 return sorted(fields.keys(), key=lambda x: len(fields[x]))
47 line=['-', ':', '--'],
48 color=list('bgrcmyk'))
49 119
50 def get_style(self, kv): 120 def get_style(self, kv):
51 '''Translate column values to unique line style.'''
52 style_fields = {} 121 style_fields = {}
53 122
54 for column, values in self.value_map.iteritems(): 123 for column, values in self.value_map.iteritems():
@@ -60,7 +129,6 @@ class StyleMap(object):
60 return StyleMap.DEFAULT._replace(**style_fields) 129 return StyleMap.DEFAULT._replace(**style_fields)
61 130
62 def get_key(self): 131 def get_key(self):
63 '''A visual description of this StyleMap.'''
64 key = [] 132 key = []
65 133
66 for column, values in self.value_map.iteritems(): 134 for column, values in self.value_map.iteritems():
@@ -75,3 +143,72 @@ class StyleMap(object):
75 143
76 return sorted(key, key=lambda x:x[1]) 144 return sorted(key, key=lambda x:x[1])
77 145
146class CombinationStyle(StyleMap):
147 def __init__(self, col_map):
148 self.col_map = col_map
149 self.kv_styles = TupleTable(col_map)
150 self.kv_seen = TupleTable(col_map, lambda:False)
151
152 all_styles = self._all_styles()
153 styles_order = sorted(all_styles.keys(),
154 key=lambda x: len(all_styles[x]),
155 reverse = True)
156
157 # Add a 'None' option in case some lines are plotted without
158 # any value specified for this kv
159 column_values = col_map.get_values()
160 for key in column_values.keys():
161 column_values[key].add(None)
162
163 styles_iter = self.__dict_combinations(all_styles, styles_order)
164 kv_iter = self.__dict_combinations(column_values)
165
166 # Cycle in case there are more kv combinations than styles
167 # This will be really, really ugly..
168 styles_iter = itertools.cycle(styles_iter)
169
170 for kv, style in zip(kv_iter, styles_iter):
171 self.kv_styles[kv] = Style(**style)
172
173 for kv_tup, style in self.kv_styles:
174 kv = self.col_map.get_kv(kv_tup)
175 if not self.kv_styles[kv]:
176 raise Exception("Didn't initialize %s" % kv)
177
178 def __dict_combinations(self, list_dict, column_order = None):
179 def helper(set_columns, remaining_columns):
180 if not remaining_columns:
181 yield set_columns
182 return
183
184 next_column = remaining_columns.pop(0)
185
186 for v in list_dict[next_column]:
187 set_columns[next_column] = v
188 for vals in helper(dict(set_columns), list(remaining_columns)):
189 yield vals
190
191 if not column_order:
192 # Just use the random order returned by the dict
193 column_order = list_dict.keys()
194
195 return helper({}, column_order)
196
197 def get_style(self, kv):
198 self.kv_seen[kv] = True
199 return self.kv_styles[kv]
200
201 def get_key(self):
202 key = []
203
204 for kv_tup, style in self.kv_styles:
205 kv = self.col_map.get_kv(kv_tup)
206 if not self.kv_seen[kv]:
207 continue
208
209 styled_line = plot.plot([], [], style.fmt())[0]
210 description = self.col_map.encode(kv, minimum=True)
211
212 key += [(styled_line, description)]
213
214 return sorted(key, key=lambda x:x[1])
diff --git a/plot_exps.py b/plot_exps.py
index 2d6f06b..d49e69d 100755
--- a/plot_exps.py
+++ b/plot_exps.py
@@ -1,7 +1,17 @@
1#!/usr/bin/env python 1#!/usr/bin/env python
2from __future__ import print_function 2from __future__ import print_function
3 3
4# Without this trickery, matplotlib uses the current X windows session
5# to create graphs. Problem 1 with this: requires user has an X windows,
6# through ssh -X or otherws. Problem 2: it kills the performance on the
7# computer running the X session, even if that computer isn't the one
8# running plot_exps.py!
9import matplotlib
10matplotlib.use('Agg')
4import matplotlib.pyplot as plot 11import matplotlib.pyplot as plot
12
13import common as com
14import multiprocessing
5import os 15import os
6import shutil as sh 16import shutil as sh
7import sys 17import sys
@@ -9,11 +19,11 @@ import traceback
9 19
10from collections import namedtuple 20from collections import namedtuple
11from config.config import DEFAULTS 21from config.config import DEFAULTS
12from multiprocessing import Pool, cpu_count 22
13from optparse import OptionParser 23from optparse import OptionParser
14from parse.col_map import ColMap,ColMapBuilder 24from parse.col_map import ColMap,ColMapBuilder
15from parse.dir_map import DirMap 25from parse.dir_map import DirMap
16from plot.style import StyleMap 26from plot.style import make_styler
17 27
18def parse_args(): 28def parse_args():
19 parser = OptionParser("usage: %prog [options] [csv_dir]...") 29 parser = OptionParser("usage: %prog [options] [csv_dir]...")
@@ -23,7 +33,8 @@ def parse_args():
23 default=DEFAULTS['out-plot']) 33 default=DEFAULTS['out-plot'])
24 parser.add_option('-f', '--force', action='store_true', default=False, 34 parser.add_option('-f', '--force', action='store_true', default=False,
25 dest='force', help='overwrite existing data') 35 dest='force', help='overwrite existing data')
26 parser.add_option('-p', '--processors', default=max(cpu_count() - 1, 1), 36 parser.add_option('-p', '--processors',
37 default=max(multiprocessing.cpu_count() - 1, 1),
27 type='int', dest='processors', 38 type='int', dest='processors',
28 help='number of threads for processing') 39 help='number of threads for processing')
29 40
@@ -53,8 +64,7 @@ def plot_by_variable(details):
53 builder = ColMapBuilder() 64 builder = ColMapBuilder()
54 config_nodes = [] 65 config_nodes = []
55 66
56 # Generate mapping of (column)=>(line property to vary) for consistently 67 # Decode file names into configuration dicts
57 # formatted plots
58 for line_path, line_node in details.node.children.iteritems(): 68 for line_path, line_node in details.node.children.iteritems():
59 encoded = line_path[:line_path.index(".csv")] 69 encoded = line_path[:line_path.index(".csv")]
60 70
@@ -68,14 +78,13 @@ def plot_by_variable(details):
68 config_nodes += [(line_config, line_node)] 78 config_nodes += [(line_config, line_node)]
69 79
70 col_map = builder.build() 80 col_map = builder.build()
71 style_map = StyleMap(col_map.columns(), col_map.get_values()) 81 style_map = make_styler(col_map)
72 82
73 figure = plot.figure() 83 figure = plot.figure()
74 axes = figure.add_subplot(111) 84 axes = figure.add_subplot(111)
75 85
76 # Create a line for each file node and its configuration 86 # Create a line for each file node and its configuration
77 for line_config, line_node in config_nodes: 87 for line_config, line_node in config_nodes:
78 # Create line style to match this configuration
79 style = style_map.get_style(line_config) 88 style = style_map.get_style(line_config)
80 values = sorted(line_node.values, key=lambda tup: tup[0]) 89 values = sorted(line_node.values, key=lambda tup: tup[0])
81 xvalues, yvalues = zip(*values) 90 xvalues, yvalues = zip(*values)
@@ -85,14 +94,19 @@ def plot_by_variable(details):
85 axes.set_title(details.title) 94 axes.set_title(details.title)
86 95
87 lines, labels = zip(*style_map.get_key()) 96 lines, labels = zip(*style_map.get_key())
88 axes.legend(tuple(lines), tuple(labels), prop={'size':10}, loc=2) 97 axes.legend(tuple(lines), tuple(labels), prop={'size':10},
98 # This code places the legend slightly to the right of the plot
99 bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)
89 100
90 axes.set_ylabel(details.value) 101 axes.set_ylabel(details.value)
91 axes.set_xlabel(details.variable) 102 axes.set_xlabel(details.variable)
92 axes.set_xlim(0, axes.get_xlim()[1]) 103 axes.set_xlim(0, axes.get_xlim()[1])
93 axes.set_ylim(0, axes.get_ylim()[1]) 104 axes.set_ylim(0, axes.get_ylim()[1])
94 105
95 plot.savefig(details.out, format=OUT_FORMAT) 106 plot.savefig(details.out, format=OUT_FORMAT,
107 # Using 'tight' causes savefig to rescale the image for non-plot
108 # artists, which in our case is just the legend
109 bbox_inches='tight')
96 110
97 return True 111 return True
98 112
@@ -125,8 +139,12 @@ def plot_dir(data_dir, out_dir, max_procs, force):
125 if not plot_details: 139 if not plot_details:
126 return 140 return
127 141
128 procs = min(len(plot_details), max_procs) 142 procs = min(len(plot_details), max_procs)
129 pool = Pool(processes=procs) 143 logged = multiprocessing.Manager().list()
144
145 pool = multiprocessing.Pool(processes=procs,
146 initializer=com.set_logged_list, initargs=(logged,))
147
130 enum = pool.imap_unordered(plot_wrapper, plot_details) 148 enum = pool.imap_unordered(plot_wrapper, plot_details)
131 149
132 try: 150 try: