From e15736509ab36e33bc71a0fe1120f2974e389725 Mon Sep 17 00:00:00 2001 From: Jonathan Herman Date: Fri, 3 May 2013 16:30:10 -0400 Subject: Improved flexibility of plot_exps.py script. * No longer needs an X connection to render. This also vastly increases performance. * If too many configuration values are plotted, a key with color=column1, line=column2, marker=column3 etc is not created. Instead, each combination of values is given its own line/color/marker style and plotted, and each line has an entry in the key. Ugly, but better than nothing. --- parse/col_map.py | 2 +- plot/style.py | 185 +++++++++++++++++++++++++++++++++++++++++++++++-------- plot_exps.py | 42 +++++++++---- 3 files changed, 192 insertions(+), 37 deletions(-) diff --git a/parse/col_map.py b/parse/col_map.py index ccce865..f1f9e52 100644 --- a/parse/col_map.py +++ b/parse/col_map.py @@ -45,7 +45,7 @@ class ColMap(object): for col in self.col_list: if col not in kv: - key += (None,) + key += (str(None),) else: key += (str(kv[col]),) diff --git a/plot/style.py b/plot/style.py index 4e2057f..f7b3a35 100644 --- a/plot/style.py +++ b/plot/style.py @@ -1,22 +1,99 @@ +from common import log_once from collections import namedtuple +from parse.tuple_table import TupleTable + +import itertools import matplotlib.pyplot as plot -class Style(namedtuple('SS', ['marker', 'line', 'color'])): +class Style(namedtuple('SS', ['marker', 'color', 'line'])): def fmt(self): return self.marker + self.line + self.color +class ExcessVarietyException(Exception): + '''Too many fields or field values to use field style''' + pass + +def make_styler(col_map): + try: + return FieldStyle(col_map.get_values()) + except ExcessVarietyException: + # Fallback, don't style by field values, instead create + # a unique style for every combination of field values possible + # This is significantly harder to visually parse + log_once("Too many columns and/or column values to create pretty " + "and simple graphs!\nGiving each combination of properties " + "its own line.") + return CombinationStyle(col_map) + class StyleMap(object): - '''Maps configs (dicts) to specific line styles.''' + # The base style, a solid black line + # The values of columns are used to change this line DEFAULT = Style(marker='', line= '-', color='k') + + def __init__(self, col_values): + raise NotImplementedError() + + def _all_styles(self): + '''A dict holding all possible values for style each property.''' + return Style(marker=list('.,ov^<>1234sp*hH+xDd|_'), + line=['-', ':', '--'], + color=list('kbgrcmy'))._asdict() + + def get_style(self, kv): + '''Translate column values to unique line style.''' + raise NotImplementedError() + + def get_key(self): + '''A visual description of this StyleMap.''' + raise NotImplementedError() + + +class FieldStyle(StyleMap): + '''Changes properties of a line style by the values of each field.''' + ORDER = [ str, bool, float, int ] - def __init__(self, col_list, col_values): + def __init__(self, col_values): '''Assign (some) columns in @col_list to fields in @Style to vary, and assign values for these columns to specific field values.''' + # column->map(column_value->field_value) self.value_map = {} + # column->style_field self.field_map = {} - # Prioritize non-numbers + if len(col_values.keys()) > len(FieldStyle.DEFAULT): + raise ExcessVarietyException("Too many columns to style!") + + col_list = self.__get_sorted_columns(col_values) + field_list = self.__get_sorted_fields() + field_dict = self._all_styles() + + while len(col_list) < len(field_list): + curr_col = col_list[-1] + check_field = field_list[-2] + if len(col_values[curr_col]) <= len(field_dict[check_field]): + field_list.pop() + elif len(col_values[curr_col]) > len(field_dict[field_list[-1]]): + raise ExcessVarietyException("Too many values to style!") + else: + field_list.pop(0) + + # Pair each column with a style field + for i in xrange(len(col_list)): + column = col_list[i] + field = field_list[i] + field_values = field_dict[field] + + # Give each unique value of column a matching unique value of field + value_dict = {} + for value in sorted(col_values[column]): + value_dict[value] = field_values.pop(0) + + self.value_map[column] = value_dict + self.field_map[column] = field + + def __get_sorted_columns(self, col_values): + # Break ties using the type of the column def type_priority(column): value = col_values[column].pop() col_values[column].add(value) @@ -25,30 +102,22 @@ class StyleMap(object): except: t = bool if value in ['True','False'] else str return StyleMap.ORDER.index(t) - col_list = sorted(col_list, key=type_priority) - # TODO: undo this, switch to popping mechanism - for field, values in reversed([x for x in self.__get_all()._asdict().iteritems()]): - if not col_list: - break + def column_compare(cola, colb): + lena = len(col_values[cola]) + lenb = len(col_values[colb]) + if lena == lenb: + return type_priority(cola) - type_priority(colb) + else: + return lena - lenb - next_column = col_list.pop(0) - value_dict = {} - - for value in sorted(col_values[next_column]): - value_dict[value] = values.pop(0) - - self.value_map[next_column] = value_dict - self.field_map[next_column] = field + return sorted(col_values.keys(), cmp=column_compare) - def __get_all(self): - '''A Style holding all possible values for each property.''' - return Style(marker=list('.,ov^<>1234sp*hH+xDd|_'), - line=['-', ':', '--'], - color=list('bgrcmyk')) + def __get_sorted_fields(self): + fields = self._all_styles() + return sorted(fields.keys(), key=lambda x: len(fields[x])) def get_style(self, kv): - '''Translate column values to unique line style.''' style_fields = {} for column, values in self.value_map.iteritems(): @@ -60,7 +129,6 @@ class StyleMap(object): return StyleMap.DEFAULT._replace(**style_fields) def get_key(self): - '''A visual description of this StyleMap.''' key = [] for column, values in self.value_map.iteritems(): @@ -75,3 +143,72 @@ class StyleMap(object): return sorted(key, key=lambda x:x[1]) +class CombinationStyle(StyleMap): + def __init__(self, col_map): + self.col_map = col_map + self.kv_styles = TupleTable(col_map) + self.kv_seen = TupleTable(col_map, lambda:False) + + all_styles = self._all_styles() + styles_order = sorted(all_styles.keys(), + key=lambda x: len(all_styles[x]), + reverse = True) + + # Add a 'None' option in case some lines are plotted without + # any value specified for this kv + column_values = col_map.get_values() + for key in column_values.keys(): + column_values[key].add(None) + + styles_iter = self.__dict_combinations(all_styles, styles_order) + kv_iter = self.__dict_combinations(column_values) + + # Cycle in case there are more kv combinations than styles + # This will be really, really ugly.. + styles_iter = itertools.cycle(styles_iter) + + for kv, style in zip(kv_iter, styles_iter): + self.kv_styles[kv] = Style(**style) + + for kv_tup, style in self.kv_styles: + kv = self.col_map.get_kv(kv_tup) + if not self.kv_styles[kv]: + raise Exception("Didn't initialize %s" % kv) + + def __dict_combinations(self, list_dict, column_order = None): + def helper(set_columns, remaining_columns): + if not remaining_columns: + yield set_columns + return + + next_column = remaining_columns.pop(0) + + for v in list_dict[next_column]: + set_columns[next_column] = v + for vals in helper(dict(set_columns), list(remaining_columns)): + yield vals + + if not column_order: + # Just use the random order returned by the dict + column_order = list_dict.keys() + + return helper({}, column_order) + + def get_style(self, kv): + self.kv_seen[kv] = True + return self.kv_styles[kv] + + def get_key(self): + key = [] + + for kv_tup, style in self.kv_styles: + kv = self.col_map.get_kv(kv_tup) + if not self.kv_seen[kv]: + continue + + styled_line = plot.plot([], [], style.fmt())[0] + description = self.col_map.encode(kv, minimum=True) + + key += [(styled_line, description)] + + return sorted(key, key=lambda x:x[1]) diff --git a/plot_exps.py b/plot_exps.py index 2d6f06b..d49e69d 100755 --- a/plot_exps.py +++ b/plot_exps.py @@ -1,7 +1,17 @@ #!/usr/bin/env python from __future__ import print_function +# Without this trickery, matplotlib uses the current X windows session +# to create graphs. Problem 1 with this: requires user has an X windows, +# through ssh -X or otherws. Problem 2: it kills the performance on the +# computer running the X session, even if that computer isn't the one +# running plot_exps.py! +import matplotlib +matplotlib.use('Agg') import matplotlib.pyplot as plot + +import common as com +import multiprocessing import os import shutil as sh import sys @@ -9,11 +19,11 @@ import traceback from collections import namedtuple from config.config import DEFAULTS -from multiprocessing import Pool, cpu_count + from optparse import OptionParser from parse.col_map import ColMap,ColMapBuilder from parse.dir_map import DirMap -from plot.style import StyleMap +from plot.style import make_styler def parse_args(): parser = OptionParser("usage: %prog [options] [csv_dir]...") @@ -23,7 +33,8 @@ def parse_args(): default=DEFAULTS['out-plot']) parser.add_option('-f', '--force', action='store_true', default=False, dest='force', help='overwrite existing data') - parser.add_option('-p', '--processors', default=max(cpu_count() - 1, 1), + parser.add_option('-p', '--processors', + default=max(multiprocessing.cpu_count() - 1, 1), type='int', dest='processors', help='number of threads for processing') @@ -53,8 +64,7 @@ def plot_by_variable(details): builder = ColMapBuilder() config_nodes = [] - # Generate mapping of (column)=>(line property to vary) for consistently - # formatted plots + # Decode file names into configuration dicts for line_path, line_node in details.node.children.iteritems(): encoded = line_path[:line_path.index(".csv")] @@ -68,14 +78,13 @@ def plot_by_variable(details): config_nodes += [(line_config, line_node)] col_map = builder.build() - style_map = StyleMap(col_map.columns(), col_map.get_values()) + style_map = make_styler(col_map) figure = plot.figure() - axes = figure.add_subplot(111) + axes = figure.add_subplot(111) # Create a line for each file node and its configuration for line_config, line_node in config_nodes: - # Create line style to match this configuration style = style_map.get_style(line_config) values = sorted(line_node.values, key=lambda tup: tup[0]) xvalues, yvalues = zip(*values) @@ -85,14 +94,19 @@ def plot_by_variable(details): axes.set_title(details.title) lines, labels = zip(*style_map.get_key()) - axes.legend(tuple(lines), tuple(labels), prop={'size':10}, loc=2) + axes.legend(tuple(lines), tuple(labels), prop={'size':10}, + # This code places the legend slightly to the right of the plot + bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0) axes.set_ylabel(details.value) axes.set_xlabel(details.variable) axes.set_xlim(0, axes.get_xlim()[1]) axes.set_ylim(0, axes.get_ylim()[1]) - plot.savefig(details.out, format=OUT_FORMAT) + plot.savefig(details.out, format=OUT_FORMAT, + # Using 'tight' causes savefig to rescale the image for non-plot + # artists, which in our case is just the legend + bbox_inches='tight') return True @@ -125,8 +139,12 @@ def plot_dir(data_dir, out_dir, max_procs, force): if not plot_details: return - procs = min(len(plot_details), max_procs) - pool = Pool(processes=procs) + procs = min(len(plot_details), max_procs) + logged = multiprocessing.Manager().list() + + pool = multiprocessing.Pool(processes=procs, + initializer=com.set_logged_list, initargs=(logged,)) + enum = pool.imap_unordered(plot_wrapper, plot_details) try: -- cgit v1.2.2