From e15736509ab36e33bc71a0fe1120f2974e389725 Mon Sep 17 00:00:00 2001
From: Jonathan Herman <hermanjl@cs.unc.edu>
Date: Fri, 3 May 2013 16:30:10 -0400
Subject: Improved flexibility of plot_exps.py script.

* No longer needs an X connection to render. This also vastly increases
performance.
* If too many configuration values are plotted, a key with color=column1,
line=column2, marker=column3 etc is not created. Instead, each combination
of values is given its own line/color/marker style and plotted, and each
line has an entry in the key. Ugly, but better than nothing.
---
 parse/col_map.py |   2 +-
 plot/style.py    | 185 +++++++++++++++++++++++++++++++++++++++++++++++--------
 plot_exps.py     |  42 +++++++++----
 3 files changed, 192 insertions(+), 37 deletions(-)

diff --git a/parse/col_map.py b/parse/col_map.py
index ccce865..f1f9e52 100644
--- a/parse/col_map.py
+++ b/parse/col_map.py
@@ -45,7 +45,7 @@ class ColMap(object):
 
         for col in self.col_list:
             if col not in kv:
-                key += (None,)
+                key += (str(None),)
             else:
                 key += (str(kv[col]),)
 
diff --git a/plot/style.py b/plot/style.py
index 4e2057f..f7b3a35 100644
--- a/plot/style.py
+++ b/plot/style.py
@@ -1,22 +1,99 @@
+from common import log_once
 from collections import namedtuple
+from parse.tuple_table import TupleTable
+
+import itertools
 import matplotlib.pyplot as plot
 
-class Style(namedtuple('SS', ['marker', 'line', 'color'])):
+class Style(namedtuple('SS', ['marker', 'color', 'line'])):
     def fmt(self):
         return self.marker + self.line + self.color
 
+class ExcessVarietyException(Exception):
+    '''Too many fields or field values to use field style'''
+    pass
+
+def make_styler(col_map):
+    try:
+        return FieldStyle(col_map.get_values())
+    except ExcessVarietyException:
+        # Fallback, don't style by field values, instead create
+        # a unique style for every combination of field values possible
+        # This is significantly harder to visually parse
+        log_once("Too many columns and/or column values to create pretty "
+                 "and simple graphs!\nGiving each combination of properties "
+                 "its own line.")
+        return CombinationStyle(col_map)
+
 class StyleMap(object):
-    '''Maps configs (dicts) to specific line styles.'''
+    # The base style, a solid black line
+    # The values of columns are used to change this line
     DEFAULT = Style(marker='', line= '-', color='k')
+
+    def __init__(self, col_values):
+        raise NotImplementedError()
+
+    def _all_styles(self):
+        '''A dict holding all possible values for style each property.'''
+        return Style(marker=list('.,ov^<>1234sp*hH+xDd|_'),
+                     line=['-', ':', '--'],
+                     color=list('kbgrcmy'))._asdict()
+
+    def get_style(self, kv):
+        '''Translate column values to unique line style.'''
+        raise NotImplementedError()
+
+    def get_key(self):
+        '''A visual description of this StyleMap.'''
+        raise NotImplementedError()
+
+
+class FieldStyle(StyleMap):
+    '''Changes properties of a line style by the values of each field.'''
+
     ORDER = [ str, bool, float, int ]
 
-    def __init__(self, col_list, col_values):
+    def __init__(self, col_values):
         '''Assign (some) columns in @col_list to fields in @Style to vary, and
         assign values for these columns to specific field values.'''
+        # column->map(column_value->field_value)
         self.value_map = {}
+        # column->style_field
         self.field_map = {}
 
-        # Prioritize non-numbers
+        if len(col_values.keys()) > len(FieldStyle.DEFAULT):
+            raise ExcessVarietyException("Too many columns to style!")
+
+        col_list   = self.__get_sorted_columns(col_values)
+        field_list = self.__get_sorted_fields()
+        field_dict = self._all_styles()
+
+        while len(col_list) < len(field_list):
+            curr_col = col_list[-1]
+            check_field = field_list[-2]
+            if len(col_values[curr_col]) <= len(field_dict[check_field]):
+                field_list.pop()
+            elif len(col_values[curr_col]) > len(field_dict[field_list[-1]]):
+                raise ExcessVarietyException("Too many values to style!")
+            else:
+                field_list.pop(0)
+
+        # Pair each column with a style field
+        for i in xrange(len(col_list)):
+            column = col_list[i]
+            field  = field_list[i]
+            field_values = field_dict[field]
+
+            # Give each unique value of column a matching unique value of field
+            value_dict  = {}
+            for value in sorted(col_values[column]):
+                value_dict[value] = field_values.pop(0)
+
+            self.value_map[column] = value_dict
+            self.field_map[column] = field
+
+    def __get_sorted_columns(self, col_values):
+        # Break ties using the type of the column
         def type_priority(column):
             value = col_values[column].pop()
             col_values[column].add(value)
@@ -25,30 +102,22 @@ class StyleMap(object):
             except:
                 t = bool if value in ['True','False'] else str
             return StyleMap.ORDER.index(t)
-        col_list = sorted(col_list, key=type_priority)
 
-        # TODO: undo this, switch to popping mechanism
-        for field, values in reversed([x for x in self.__get_all()._asdict().iteritems()]):
-            if not col_list:
-                break
+        def column_compare(cola, colb):
+            lena = len(col_values[cola])
+            lenb = len(col_values[colb])
+            if lena == lenb:
+                return type_priority(cola) - type_priority(colb)
+            else:
+                return lena - lenb
 
-            next_column = col_list.pop(0)
-            value_dict  = {}
-
-            for value in sorted(col_values[next_column]):
-                value_dict[value] = values.pop(0)
-
-            self.value_map[next_column] = value_dict
-            self.field_map[next_column] = field
+        return sorted(col_values.keys(), cmp=column_compare)
 
-    def __get_all(self):
-        '''A Style holding all possible values for each property.'''
-        return Style(marker=list('.,ov^<>1234sp*hH+xDd|_'),
-                     line=['-', ':', '--'],
-                     color=list('bgrcmyk'))
+    def __get_sorted_fields(self):
+        fields = self._all_styles()
+        return sorted(fields.keys(), key=lambda x: len(fields[x]))
 
     def get_style(self, kv):
-        '''Translate column values to unique line style.'''
         style_fields = {}
 
         for column, values in self.value_map.iteritems():
@@ -60,7 +129,6 @@ class StyleMap(object):
         return StyleMap.DEFAULT._replace(**style_fields)
 
     def get_key(self):
-        '''A visual description of this StyleMap.'''
         key = []
 
         for column, values in self.value_map.iteritems():
@@ -75,3 +143,72 @@ class StyleMap(object):
 
         return sorted(key, key=lambda x:x[1])
 
+class CombinationStyle(StyleMap):
+    def __init__(self, col_map):
+        self.col_map   = col_map
+        self.kv_styles = TupleTable(col_map)
+        self.kv_seen   = TupleTable(col_map, lambda:False)
+
+        all_styles = self._all_styles()
+        styles_order = sorted(all_styles.keys(),
+                              key=lambda x: len(all_styles[x]),
+                              reverse = True)
+
+        # Add a 'None' option in case some lines are plotted without
+        # any value specified for this kv
+        column_values = col_map.get_values()
+        for key in column_values.keys():
+            column_values[key].add(None)
+
+        styles_iter = self.__dict_combinations(all_styles, styles_order)
+        kv_iter     = self.__dict_combinations(column_values)
+
+        # Cycle in case there are more kv combinations than styles
+        # This will be really, really ugly..
+        styles_iter = itertools.cycle(styles_iter)
+
+        for kv, style in zip(kv_iter, styles_iter):
+            self.kv_styles[kv] = Style(**style)
+
+        for kv_tup, style in self.kv_styles:
+            kv = self.col_map.get_kv(kv_tup)
+            if not self.kv_styles[kv]:
+                raise Exception("Didn't initialize %s" % kv)
+
+    def __dict_combinations(self, list_dict, column_order = None):
+        def helper(set_columns, remaining_columns):
+            if not remaining_columns:
+                yield set_columns
+                return
+
+            next_column = remaining_columns.pop(0)
+
+            for v in list_dict[next_column]:
+                set_columns[next_column] = v
+                for vals in helper(dict(set_columns), list(remaining_columns)):
+                    yield vals
+
+        if not column_order:
+            # Just use the random order returned by the dict
+            column_order = list_dict.keys()
+
+        return helper({}, column_order)
+
+    def get_style(self, kv):
+        self.kv_seen[kv] = True
+        return self.kv_styles[kv]
+
+    def get_key(self):
+        key = []
+
+        for kv_tup, style in self.kv_styles:
+            kv = self.col_map.get_kv(kv_tup)
+            if not self.kv_seen[kv]:
+                continue
+
+            styled_line = plot.plot([], [], style.fmt())[0]
+            description = self.col_map.encode(kv, minimum=True)
+
+            key += [(styled_line, description)]
+
+        return sorted(key, key=lambda x:x[1])
diff --git a/plot_exps.py b/plot_exps.py
index 2d6f06b..d49e69d 100755
--- a/plot_exps.py
+++ b/plot_exps.py
@@ -1,7 +1,17 @@
 #!/usr/bin/env python
 from __future__ import print_function
 
+# Without this trickery, matplotlib uses the current X windows session
+# to create graphs. Problem 1 with this: requires user has an X windows,
+# through ssh -X or otherws. Problem 2: it kills the performance on the
+# computer running the X session, even if that computer isn't the one
+# running plot_exps.py!
+import matplotlib
+matplotlib.use('Agg')
 import matplotlib.pyplot as plot
+
+import common as com
+import multiprocessing
 import os
 import shutil as sh
 import sys
@@ -9,11 +19,11 @@ import traceback
 
 from collections import namedtuple
 from config.config import DEFAULTS
-from multiprocessing import Pool, cpu_count
+
 from optparse import OptionParser
 from parse.col_map import ColMap,ColMapBuilder
 from parse.dir_map import DirMap
-from plot.style import StyleMap
+from plot.style import make_styler
 
 def parse_args():
     parser = OptionParser("usage: %prog [options] [csv_dir]...")
@@ -23,7 +33,8 @@ def parse_args():
                       default=DEFAULTS['out-plot'])
     parser.add_option('-f', '--force', action='store_true', default=False,
                       dest='force', help='overwrite existing data')
-    parser.add_option('-p', '--processors', default=max(cpu_count() - 1, 1),
+    parser.add_option('-p', '--processors',
+                      default=max(multiprocessing.cpu_count() - 1, 1),
                       type='int', dest='processors',
                       help='number of threads for processing')
 
@@ -53,8 +64,7 @@ def plot_by_variable(details):
     builder = ColMapBuilder()
     config_nodes = []
 
-    # Generate mapping of (column)=>(line property to vary) for consistently
-    # formatted plots
+    # Decode file names into configuration dicts
     for line_path, line_node in details.node.children.iteritems():
         encoded = line_path[:line_path.index(".csv")]
 
@@ -68,14 +78,13 @@ def plot_by_variable(details):
         config_nodes += [(line_config, line_node)]
 
     col_map   = builder.build()
-    style_map = StyleMap(col_map.columns(), col_map.get_values())
+    style_map = make_styler(col_map)
 
     figure = plot.figure()
-    axes = figure.add_subplot(111)
+    axes   = figure.add_subplot(111)
 
     # Create a line for each file node and its configuration
     for line_config, line_node in config_nodes:
-        # Create line style to match this configuration
         style  = style_map.get_style(line_config)
         values = sorted(line_node.values, key=lambda tup: tup[0])
         xvalues, yvalues = zip(*values)
@@ -85,14 +94,19 @@ def plot_by_variable(details):
     axes.set_title(details.title)
 
     lines, labels = zip(*style_map.get_key())
-    axes.legend(tuple(lines), tuple(labels), prop={'size':10}, loc=2)
+    axes.legend(tuple(lines), tuple(labels), prop={'size':10},
+	    # This code places the legend slightly to the right of the plot
+        bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)
 
     axes.set_ylabel(details.value)
     axes.set_xlabel(details.variable)
     axes.set_xlim(0, axes.get_xlim()[1])
     axes.set_ylim(0, axes.get_ylim()[1])
 
-    plot.savefig(details.out, format=OUT_FORMAT)
+    plot.savefig(details.out, format=OUT_FORMAT,
+    	# Using 'tight' causes savefig to rescale the image for non-plot
+	    # artists, which in our case is just the legend
+        bbox_inches='tight')
 
     return True
 
@@ -125,8 +139,12 @@ def plot_dir(data_dir, out_dir, max_procs, force):
     if not plot_details:
         return
 
-    procs = min(len(plot_details), max_procs)
-    pool  = Pool(processes=procs)
+    procs  = min(len(plot_details), max_procs)
+    logged = multiprocessing.Manager().list()
+
+    pool   = multiprocessing.Pool(processes=procs,
+                initializer=com.set_logged_list, initargs=(logged,))
+
     enum  = pool.imap_unordered(plot_wrapper, plot_details)
 
     try:
-- 
cgit v1.2.2