Added --collapse option to parse_exps.py for creating simpler graphs.

This creates csvs which have merged the lines which only differ by variables whose values are numbers (e.g. 'wss', 'number of tasks', or 'duration', but not 'scheduler'). For example, consider a csv which is tracking the change in working set sizes. There are 3 different experiment configurations per WSS, one with 12 tasks, one for 14, and another for 16. Without --collapse, a seperate csv (and line to plot in plot_exps.py) will be created for each of the 12, 14, and 16 configurations. With --collapse, a single csv (and a single line plotted by plot_exps.py) will be created which averages the values of 12, 14, and 16 tasks for each WSS. This is very useful for noticing trends or presenting overhead bar charts or other examples in papers.
author: Jonathan Herman <hermanjl@cs.unc.edu> 2013-05-02 15:49:11 -0400
committer: Jonathan Herman <hermanjl@cs.unc.edu> 2013-05-02 15:57:36 -0400
commit: 38e4029048bb693525369ea7eb0e639f73844ed2 (patch)
tree: 9d77366ac7a5d2ae87acf54482b6cde5275eaa10 /parse_exps.py
parent: 6f2558b8c4f4e33630b40dfbe20024f7a372a8f0 (diff)
1 files changed, 68 insertions, 14 deletions
diff --git a/parse_exps.py b/parse_exps.py
index 94c30a4..ee7b29f 100755
--- a/parse_exps.py
+++ b/parse_exps.py
@@ -38,6 +38,10 @@ def parse_args():
                      default=max(multiprocessing.cpu_count() - 1, 1),
                      type='int', dest='processors',
                      help='number of threads for processing')
+    parser.add_option('-c', '--collapse', dest='collapse',
+                      action='store_true', default=False,
+                      help=('simplify graphs where possible by averaging ' +
+                            'parameter values which are numbers (dangerous)'))
    return parser.parse_args()
@@ -175,30 +179,80 @@ def fill_table(table, exps, opts):
    sys.stderr.write('\n')
-def write_output(table, opts):
+def write_csvs(table, out, print_empty=False):
    reduced_table = table.reduce()
+    # Write out csv directories for all variable params
+    dir_map = reduced_table.to_dir_map()
+    # No csvs to write, assume user meant to print out data
+    if dir_map.is_empty():
+        if print_empty:
+            sys.stderr.write("Too little data to make csv files, " +
+                             "printing results.\n")
+            for key, exp in table:
+                for e in exp:
+                    print(e)
+    else:
+        dir_map.write(out)
+def write_collapsed_csvs(table, opts):
+    sys.stderr.write("Collapse option specified. "
+                     "Only one numeric column at a time will be plotted.\n"
+                     "The values of others will be averaged. "
+                     "This is dangerous and can hide important trends!\n")
+    original_map = table.get_col_map()
+    builder = ColMapBuilder()
+    numeric_cols = []
+    # Add only nonnumeric fields to builder
+    for column in original_map.columns():
+        numeric = True
+        for v in original_map.get_values()[column]:
+            try:
+                float(v)
+            except ValueError:
+                numeric = False
+                builder.try_add(column, v)
+        if numeric:
+            numeric_cols += [column]
+    for num_column in numeric_cols:
+        # Only going to consider a single number column at a time
+        for num_value in original_map.get_values()[column]:
+            builder.try_add(num_column, num_value)
+        next_map = builder.build()
+        next_table = TupleTable(next_map)
+        # Re-sort data into new table using this new key
+        for mapped_key, points in table:
+            kv = original_map.get_kv(mapped_key)
+            next_table[kv] += points
+        write_csvs(next_table, opts.out)
+        builder.try_remove(num_column)
+def write_output(table, opts):
    if opts.write_map:
        sys.stderr.write("Writing python map into %s...\n" % opts.out)
+        reduced_table = table.reduce()
        reduced_table.write_map(opts.out)
    else:
        if opts.force and os.path.exists(opts.out):
            sh.rmtree(opts.out)
-        # Write out csv directories for all variable params
+        sys.stderr.write("Writing csvs into %s...\n" % opts.out)
-        dir_map = reduced_table.to_dir_map()
+        if opts.collapse:
-        # No csvs to write, assume user meant to print out data
+            write_collapsed_csvs(table, opts)
-        if dir_map.is_empty():
-            if not opts.verbose:
-                sys.stderr.write("Too little data to make csv files, " +
-                                 "printing results.\n")
-                for key, exp in table:
-                    for e in exp:
-                        print(e)
        else:
-            sys.stderr.write("Writing csvs into %s...\n" % opts.out)
+            write_csvs(table, opts.out, not opts.verbose)
-            dir_map.write(opts.out)
 def main():
author	Jonathan Herman <hermanjl@cs.unc.edu>	2013-05-02 15:49:11 -0400
committer	Jonathan Herman <hermanjl@cs.unc.edu>	2013-05-02 15:57:36 -0400
commit	38e4029048bb693525369ea7eb0e639f73844ed2 (patch)
tree	9d77366ac7a5d2ae87acf54482b6cde5275eaa10 /parse_exps.py
parent	6f2558b8c4f4e33630b40dfbe20024f7a372a8f0 (diff)

diff --git a/parse_exps.py b/parse_exps.py index 94c30a4..ee7b29f 100755 --- a/parse_exps.py +++ b/parse_exps.py
@@ -38,6 +38,10 @@ def parse_args():
38	default=max(multiprocessing.cpu_count() - 1, 1),	38	default=max(multiprocessing.cpu_count() - 1, 1),
39	type='int', dest='processors',	39	type='int', dest='processors',
40	help='number of threads for processing')	40	help='number of threads for processing')
		41	parser.add_option('-c', '--collapse', dest='collapse',
		42	action='store_true', default=False,
		43	help=('simplify graphs where possible by averaging ' +
		44	'parameter values which are numbers (dangerous)'))
41		45
42	return parser.parse_args()	46	return parser.parse_args()
43		47
@@ -175,30 +179,80 @@ def fill_table(table, exps, opts):
175	sys.stderr.write('\n')	179	sys.stderr.write('\n')
176		180
177		181
178	def write_output(table, opts):	182	def write_csvs(table, out, print_empty=False):
179	reduced_table = table.reduce()	183	reduced_table = table.reduce()
180		184
		185	# Write out csv directories for all variable params
		186	dir_map = reduced_table.to_dir_map()
		187
		188	# No csvs to write, assume user meant to print out data
		189	if dir_map.is_empty():
		190	if print_empty:
		191	sys.stderr.write("Too little data to make csv files, " +
		192	"printing results.\n")
		193	for key, exp in table:
		194	for e in exp:
		195	print(e)
		196	else:
		197	dir_map.write(out)
		198
		199
		200	def write_collapsed_csvs(table, opts):
		201	sys.stderr.write("Collapse option specified. "
		202	"Only one numeric column at a time will be plotted.\n"
		203	"The values of others will be averaged. "
		204	"This is dangerous and can hide important trends!\n")
		205
		206	original_map = table.get_col_map()
		207
		208	builder = ColMapBuilder()
		209	numeric_cols = []
		210
		211	# Add only nonnumeric fields to builder
		212	for column in original_map.columns():
		213	numeric = True
		214	for v in original_map.get_values()[column]:
		215	try:
		216	float(v)
		217	except ValueError:
		218	numeric = False
		219	builder.try_add(column, v)
		220	if numeric:
		221	numeric_cols += [column]
		222
		223	for num_column in numeric_cols:
		224	# Only going to consider a single number column at a time
		225	for num_value in original_map.get_values()[column]:
		226	builder.try_add(num_column, num_value)
		227
		228	next_map = builder.build()
		229	next_table = TupleTable(next_map)
		230
		231	# Re-sort data into new table using this new key
		232	for mapped_key, points in table:
		233	kv = original_map.get_kv(mapped_key)
		234	next_table[kv] += points
		235
		236	write_csvs(next_table, opts.out)
		237
		238	builder.try_remove(num_column)
		239
		240
		241	def write_output(table, opts):
181	if opts.write_map:	242	if opts.write_map:
182	sys.stderr.write("Writing python map into %s...\n" % opts.out)	243	sys.stderr.write("Writing python map into %s...\n" % opts.out)
		244	reduced_table = table.reduce()
183	reduced_table.write_map(opts.out)	245	reduced_table.write_map(opts.out)
184	else:	246	else:
185	if opts.force and os.path.exists(opts.out):	247	if opts.force and os.path.exists(opts.out):
186	sh.rmtree(opts.out)	248	sh.rmtree(opts.out)
187		249
188	# Write out csv directories for all variable params	250	sys.stderr.write("Writing csvs into %s...\n" % opts.out)
189	dir_map = reduced_table.to_dir_map()	251
190		252	if opts.collapse:
191	# No csvs to write, assume user meant to print out data	253	write_collapsed_csvs(table, opts)
192	if dir_map.is_empty():
193	if not opts.verbose:
194	sys.stderr.write("Too little data to make csv files, " +
195	"printing results.\n")
196	for key, exp in table:
197	for e in exp:
198	print(e)
199	else:	254	else:
200	sys.stderr.write("Writing csvs into %s...\n" % opts.out)	255	write_csvs(table, opts.out, not opts.verbose)
201	dir_map.write(opts.out)
202		256
203		257
204	def main():	258	def main():