From 53cfcf10531256d0e4411a7e0bda431ec27f28e7 Mon Sep 17 00:00:00 2001
From: Jonathan Herman <hermanjl@cs.unc.edu>
Date: Tue, 30 Oct 2012 16:04:23 -0400
Subject: Process all non-scaling statistics for base experiments.

---
 parse/ft.py          |  2 --
 parse/sched.py       | 13 +++++++------
 parse/tuple_table.py | 10 +++++++++-
 parse_exps.py        | 53 +++++++++++++++++++++++++++++++++++++++-------------
 4 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/parse/ft.py b/parse/ft.py
index feb338f..4e310b0 100644
--- a/parse/ft.py
+++ b/parse/ft.py
@@ -20,7 +20,6 @@ def get_ft_output(data_dir, out_dir, force=False):
         if force:
             os.remove(output_file)
         else:
-            print("ft-output already exists for %s" % data_dir)
             return output_file
 
     if len(bins) != 0:
@@ -41,7 +40,6 @@ def get_ft_output(data_dir, out_dir, force=False):
         # Analyze will summarize those
         # todo pass in f
         cmd_arr = [conf.BINS['analyze']]
-        print("cmd arr: %s-%s" % (cmd_arr, bins))
         cmd_arr.extend(bins)
         with open(output_file, "w") as f:
             subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file)
diff --git a/parse/sched.py b/parse/sched.py
index 80764b6..bbf6e10 100644
--- a/parse/sched.py
+++ b/parse/sched.py
@@ -11,6 +11,7 @@ import os
 import re
 import numpy as np
 import subprocess
+import pprint
 
 from collections import namedtuple,defaultdict
 from operator import methodcaller
@@ -54,6 +55,7 @@ class LeveledArray(object):
     def add(self, task, value):
         self.vals[task.config.level] += [value]
 
+
     def write_measurements(self, result):
         for level, arr in self.vals.iteritems():
             name = "%s%s" % ("%s-" % level if level else "", self.name)
@@ -72,7 +74,6 @@ def get_st_output(data_dir, out_dir, force=False):
         if force:
             os.remove(output_file)
         else:
-            print("st-output already exists for %s" % data_dir)
             return output_file
 
     if len(bins) != 0:
@@ -195,8 +196,11 @@ def extract_variance(task_dict, data, exp_point):
         completions[pid] += [duration]
 
     for pid, durations in completions.iteritems():
+        m = Measurement(pid).from_array(durations)
+
         # TODO: not this, please
-        task_dict[pid].run.append(Measurement(pid).from_array(durations))
+        if not task_dict[pid].run:
+            task_dict[pid].run.append(m)
 
         job_times = np.array(durations)
         mean = job_times.mean()
@@ -210,6 +214,7 @@ def extract_variance(task_dict, data, exp_point):
         corrected = (1 + 1/(4 * len(job_times))) * cv
 
         varz.add(task_dict[pid], corrected)
+        # varz.add(task_dict[pid], m[Type.Var])
 
     if exp_point:
         map(methodcaller('write_measurements', exp_point),
@@ -272,17 +277,13 @@ def extract_scaling_data(task_dict, data, result, base_file):
         for data_stat, base_stat in zip(data_stats[config],base_stats[config]):
             if not base_stat[Type.Avg] or not base_stat[Type.Max] or \
                not data_stat[Type.Avg] or not data_stat[Type.Max]:
-               print("missing a thing: {},{}".format(base_stat, data_stat))
                continue
             # How much larger is their exec stat than ours?
-            print("%s vs %s" % (base_stat, data_stat))
             avg_scale = float(base_stat[Type.Avg]) / float(data_stat[Type.Avg])
             max_scale = float(base_stat[Type.Max]) / float(data_stat[Type.Max])
 
             task = task_dict[data_stat.id]
 
-            print("scaling for %s" % data_stat.id)
-
             avg_scales.add(task, avg_scale)
             max_scales.add(task, max_scale)
 
diff --git a/parse/tuple_table.py b/parse/tuple_table.py
index 465abb3..e5dc39b 100644
--- a/parse/tuple_table.py
+++ b/parse/tuple_table.py
@@ -21,7 +21,6 @@ class ColMap(object):
                 key += (None,)
             else:
                 key += (kv[col],)
-
         return key
 
     def __contains__(self, col):
@@ -43,6 +42,15 @@ class ColMap(object):
                 self.value_map[column] = value
             elif value != self.value_map[column]:
                 self.force_add(column)
+                del(self.value_map[column])
+
+    def try_remove(self, column):
+        if column in self.rev_map:
+            idx = self.rev_map[column]
+            for value in self.col_list[idx+1:]:
+                self.rev_map[value] -= 1
+            del(self.col_list[self.rev_map[column]])
+            del(self.rev_map[column])
 
     def __str__(self):
         return "<ColMap>%s" % (self.rev_map)
diff --git a/parse_exps.py b/parse_exps.py
index 2d1c370..87d0783 100755
--- a/parse_exps.py
+++ b/parse_exps.py
@@ -8,6 +8,7 @@ import parse.ft as ft
 import parse.sched as st
 import re
 import shutil as sh
+import sys
 
 from collections import namedtuple
 from common import load_params
@@ -16,18 +17,20 @@ from parse.point import ExpPoint
 from parse.tuple_table import ColMap,TupleTable
 
 def parse_args():
-    # TODO: convert data-dir to proper option
+    # TODO: convert data-dir to proper option, clean 'dest' options
     parser = OptionParser("usage: %prog [options] [data_dir]...")
 
     parser.add_option('-o', '--out', dest='out',
                       help='file or directory for data output', default='parse-data')
 
-    # TODO: this means nothing
+    # TODO: this means nothing, also remove dests
     parser.add_option('-c', '--clean', action='store_true', default=False,
                       dest='clean', help='do not output single-point csvs')
     parser.add_option('-s', '--scale-against', dest='scale_against',
                       metavar='PARAM=VALUE', default="",
                       help='calculate task scaling factors against these configs')
+    parser.add_option('-i', '--ignore', metavar='[PARAM...]', default="",
+                      help='ignore changing parameter values')
     parser.add_option('-f', '--force', action='store_true', default=False,
                       dest='force', help='overwrite existing data')
     parser.add_option('-v', '--verbose', action='store_true', default=False,
@@ -38,7 +41,7 @@ def parse_args():
 
     return parser.parse_args()
 
-ExpData   = namedtuple('ExpData', ['name', 'params', 'data_files'])
+ExpData   = namedtuple('ExpData', ['name', 'params', 'data_files', 'is_base'])
 DataFiles = namedtuple('DataFiles', ['ft','st'])
 
 def get_exp_params(data_dir, col_map):
@@ -63,7 +66,9 @@ def gen_exp_data(exp_dirs, base_conf, col_map, force):
     plain_exps = []
     scaling_bases  = []
 
-    for data_dir in exp_dirs:
+    sys.stderr.write("Generating data...\n")
+
+    for i, data_dir in enumerate(exp_dirs):
         if not os.path.isdir(data_dir):
             raise IOError("Invalid experiment '%s'" % os.path.abspath(data_dir))
 
@@ -76,18 +81,30 @@ def gen_exp_data(exp_dirs, base_conf, col_map, force):
         st_output = st.get_st_output(data_dir, tmp_dir, force)
         ft_output = ft.get_ft_output(data_dir, tmp_dir, force)
 
-        # Create experiment named after the data dir
-        exp_data = ExpData(data_dir, params, DataFiles(ft_output, st_output))
 
         if base_conf and base_conf.viewitems() & params.viewitems():
             if not st_output:
                 raise Exception("Scaling base '%s' useless without sched data!"
                                 % data_dir)
-            params.pop(base_conf.keys()[0])
-            scaling_bases += [exp_data]
+            is_base = True
+
+            base_params = copy.deepcopy(params)
+            base_params.pop(base_conf.keys()[0])
+
+            base_exp = ExpData(data_dir, base_params,
+                               DataFiles(ft_output, st_output), True)
+            scaling_bases += [base_exp]
         else:
-            plain_exps += [exp_data]
+            is_base = False
 
+        # Create experiment named after the data dir
+        exp_data = ExpData(data_dir, params,
+                           DataFiles(ft_output, st_output), is_base)
+
+        plain_exps += [exp_data]
+
+        sys.stderr.write('\r {0:.2%}'.format(float(i)/len(exp_dirs)))
+    sys.stderr.write('\n')
     return (plain_exps, scaling_bases)
 
 def main():
@@ -107,14 +124,20 @@ def main():
         raise IOError("Base column '%s' not present in any parameters!" %
                       base_conf.keys()[0])
 
-    base_table = TupleTable(col_map) # For tracking 'base' experiments
-    result_table  = TupleTable(col_map) # For generating csv directories
+    base_map = copy.deepcopy(col_map)
+    if opts.ignore:
+        for param in opts.ignore.split(","):
+            col_map.try_remove(param)
+
+    base_table   = TupleTable(base_map) # For tracking 'base' experiments
+    result_table = TupleTable(col_map)  # For generating output
 
     # Used to find matching scaling_base for each experiment
     for base in scaling_bases:
         base_table.add_exp(base.params, base)
 
-    for exp in plain_exps:
+    sys.stderr.write("Parsing data...\n")
+    for i,exp in enumerate(plain_exps):
         result = ExpPoint(exp.name)
 
         if exp.data_files.ft:
@@ -123,7 +146,7 @@ def main():
 
         if exp.data_files.st:
             base = None
-            if base_conf:
+            if base_conf and not exp.is_base:
                 # Try to find a scaling base
                 base_params = copy.deepcopy(exp.params)
                 base_params.pop(base_conf.keys()[0])
@@ -137,12 +160,16 @@ def main():
 
         if opts.verbose:
             print(result)
+        else:
+            sys.stderr.write('\r {0:.2%}'.format(float(i)/len(plain_exps)))
+    sys.stderr.write('\n')
 
     if opts.force and os.path.exists(opts.out):
         sh.rmtree(opts.out)
 
     result_table.reduce()
 
+    sys.stderr.write("Writing result...\n")
     if opts.write_map:
         # Write summarized results into map
         result_table.write_map(opts.out)
-- 
cgit v1.2.2