From e01b51b677e925bc2cd8d55afa89f29674b6f356 Mon Sep 17 00:00:00 2001
From: Jonathan Herman <hermanjl@cs.unc.edu>
Date: Thu, 2 May 2013 15:49:11 -0400
Subject: Added --collapse option to parse_exps.py for creating simpler graphs.

This creates csvs which have merged the lines which only differ by variables
whose values are numbers (e.g. 'wss', 'number of tasks', or 'duration',
but not 'scheduler').

For example, consider a csv which is tracking the change in working set sizes.
There are 3 different experiment configurations per WSS, one with 12 tasks,
one for 14, and another for 16. Without --collapse, a seperate csv
(and line to plot in plot_exps.py) will be created for each of the 12, 14, and
16 configurations. With --collapse, a single csv (and a single line plotted
by plot_exps.py) will be created which averages the values of 12, 14, and 16
tasks for each WSS.

This is very useful for noticing trends or presenting overhead bar charts
or other examples in papers.
---
 parse_exps.py | 82 +++++++++++++++++++++++++++++++++++++++++++++++++----------
 plot_exps.py  |  6 ++---
 2 files changed, 71 insertions(+), 17 deletions(-)

diff --git a/parse_exps.py b/parse_exps.py
index 37667aa..82febfc 100755
--- a/parse_exps.py
+++ b/parse_exps.py
@@ -43,6 +43,10 @@ def parse_args():
     parser.add_option('-s', '--scale-against', dest='scale_against',
                       metavar='PARAM=VALUE', default="type=unmanaged",
                       help='calculate task scaling factors against these configs')
+    parser.add_option('-c', '--collapse', dest='collapse',
+                      action='store_true', default=False,
+                      help=('simplify graphs where possible by averaging ' +
+                            'parameter values which are numbers (dangerous)'))
 
     return parser.parse_args()
 
@@ -226,30 +230,80 @@ def fill_table(table, builder, exps, opts):
     sys.stderr.write('\n')
 
 
-def write_output(table, opts):
+def write_csvs(table, out, print_empty=False):
     reduced_table = table.reduce()
 
+    # Write out csv directories for all variable params
+    dir_map = reduced_table.to_dir_map()
+
+    # No csvs to write, assume user meant to print out data
+    if dir_map.is_empty():
+        if print_empty:
+            sys.stderr.write("Too little data to make csv files, " +
+                             "printing results.\n")
+            for key, exp in table:
+                for e in exp:
+                    print(e)
+    else:
+        dir_map.write(out)
+
+
+def write_collapsed_csvs(table, opts):
+    sys.stderr.write("Collapse option specified. "
+                     "Only one numeric column at a time will be plotted.\n"
+                     "The values of others will be averaged. "
+                     "This is dangerous and can hide important trends!\n")
+
+    original_map = table.get_col_map()
+
+    builder = ColMapBuilder()
+    numeric_cols = []
+
+    # Add only nonnumeric fields to builder
+    for column in original_map.columns():
+        numeric = True
+        for v in original_map.get_values()[column]:
+            try:
+                float(v)
+            except ValueError:
+                numeric = False
+                builder.try_add(column, v)
+        if numeric:
+            numeric_cols += [column]
+
+    for num_column in numeric_cols:
+        # Only going to consider a single number column at a time
+        for num_value in original_map.get_values()[column]:
+            builder.try_add(num_column, num_value)
+
+        next_map = builder.build()
+        next_table = TupleTable(next_map)
+
+        # Re-sort data into new table using this new key
+        for mapped_key, points in table:
+            kv = original_map.get_kv(mapped_key)
+            next_table[kv] += points
+
+        write_csvs(next_table, opts.out)
+
+        builder.try_remove(num_column)
+
+
+def write_output(table, opts):
     if opts.write_map:
         sys.stderr.write("Writing python map into %s...\n" % opts.out)
+        reduced_table = table.reduce()
         reduced_table.write_map(opts.out)
     else:
         if opts.force and os.path.exists(opts.out):
             sh.rmtree(opts.out)
 
-        # Write out csv directories for all variable params
-        dir_map = reduced_table.to_dir_map()
-
-        # No csvs to write, assume user meant to print out data
-        if dir_map.is_empty():
-            if not opts.verbose:
-                sys.stderr.write("Too little data to make csv files, " +
-                                 "printing results.\n")
-                for key, exp in table:
-                    for e in exp:
-                        print(e)
+        sys.stderr.write("Writing csvs into %s...\n" % opts.out)
+
+        if opts.collapse:
+            write_collapsed_csvs(table, opts)
         else:
-            sys.stderr.write("Writing csvs into %s...\n" % opts.out)
-            dir_map.write(opts.out)
+            write_csvs(table, opts.out, not opts.verbose)
 
 
 def main():
diff --git a/plot_exps.py b/plot_exps.py
index 15c54d0..2d6f06b 100755
--- a/plot_exps.py
+++ b/plot_exps.py
@@ -85,12 +85,12 @@ def plot_by_variable(details):
     axes.set_title(details.title)
 
     lines, labels = zip(*style_map.get_key())
-    axes.legend(tuple(lines), tuple(labels), prop={'size':10})
+    axes.legend(tuple(lines), tuple(labels), prop={'size':10}, loc=2)
 
     axes.set_ylabel(details.value)
     axes.set_xlabel(details.variable)
-    axes.set_xlim(0, axes.get_xlim()[1] + 1)
-    axes.set_ylim(0, axes.get_ylim()[1] + 1)
+    axes.set_xlim(0, axes.get_xlim()[1])
+    axes.set_ylim(0, axes.get_ylim()[1])
 
     plot.savefig(details.out, format=OUT_FORMAT)
 
-- 
cgit v1.2.2