From fbd1df6f63eb551b99f71330d2370c570ff323f5 Mon Sep 17 00:00:00 2001
From: Jonathan Herman <hermanjl@cs.unc.edu>
Date: Sun, 21 Apr 2013 13:28:38 -0400
Subject: Scripts read directories created by other scripts if no arguments.

With no arguments, all scripts first try to load the current directory. If the
current directory has no data, the scripts search for the output of the previous
scripts in the toolchain, e.g. parse_exps.py loads run-data/*, created by
run_exps.py.

This commit also switched messages to stderr where they belong, and adds in
missing lock and unlock overheads.
---
 parse_exps.py | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

(limited to 'parse_exps.py')

diff --git a/parse_exps.py b/parse_exps.py
index c254536..d07378c 100755
--- a/parse_exps.py
+++ b/parse_exps.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 from __future__ import print_function
 
-import config.config as conf
 import os
 import parse.ft as ft
 import parse.sched as st
@@ -12,6 +11,7 @@ import traceback
 
 from collections import namedtuple
 from common import load_params
+from config.config import DEFAULTS,PARAMS
 from optparse import OptionParser
 from parse.point import ExpPoint
 from parse.tuple_table import TupleTable
@@ -22,7 +22,8 @@ def parse_args():
     parser = OptionParser("usage: %prog [options] [data_dir]...")
 
     parser.add_option('-o', '--out', dest='out',
-                      help='file or directory for data output', default='parse-data')
+                      help='file or directory for data output',
+                      default=DEFAULTS['out-parse'])
     parser.add_option('-i', '--ignore', metavar='[PARAM...]', default="",
                       help='ignore changing parameter values')
     parser.add_option('-f', '--force', action='store_true', default=False,
@@ -41,7 +42,7 @@ def parse_args():
 ExpData = namedtuple('ExpData', ['path', 'params', 'work_dir'])
 
 def get_exp_params(data_dir, cm_builder):
-    param_file = "%s/%s" % (data_dir, conf.DEFAULTS['params_file'])
+    param_file = "%s/%s" % (data_dir, DEFAULTS['params_file'])
     if os.path.isfile(param_file):
         params = load_params(param_file)
 
@@ -53,8 +54,8 @@ def get_exp_params(data_dir, cm_builder):
          params = {}
 
     # Cycles must be present for feather-trace measurement parsing
-    if conf.PARAMS['cycles'] not in params:
-        params[conf.PARAMS['cycles']] = conf.DEFAULTS['cycles']
+    if PARAMS['cycles'] not in params:
+        params[PARAMS['cycles']] = DEFAULTS['cycles']
 
     return params
 
@@ -101,7 +102,7 @@ def parse_exp(exp_force):
     if not result:
         try:
             result = ExpPoint(exp.path)
-            cycles = exp.params[conf.PARAMS['cycles']]
+            cycles = exp.params[PARAMS['cycles']]
 
             # Write overheads into result
             ft.extract_ft_data(result, exp.path, exp.work_dir, cycles)
@@ -116,21 +117,31 @@ def parse_exp(exp_force):
 
     return (exp, result)
 
+def get_exps(args):
+    if args:
+        return args
+    elif os.path.exists(DEFAULTS['out-run']):
+        sys.stderr.write("Reading data from %s/*\n" % DEFAULTS['out-run'])
+        sched_dirs = os.listdir(DEFAULTS['out-run'])
+        return ['%s/%s' % (DEFAULTS['out-run'], d) for d in sched_dirs]
+    else:
+        sys.stderr.write("Reading data from current directory.\n")
+        return [os.getcwd()]
+
 def main():
     opts, args = parse_args()
-
-    args = args or [os.getcwd()]
+    exp_dirs  = get_exps(args)
 
     # Load exp parameters into a ColMap
     builder = ColMapBuilder()
-    exps = load_exps(args, builder, opts.force)
+    exps = load_exps(exp_dirs, builder, opts.force)
 
     # Don't track changes in ignored parameters
     if opts.ignore:
         for param in opts.ignore.split(","):
             builder.try_remove(param)
     # Always average multiple trials
-    builder.try_remove(conf.PARAMS['trial'])
+    builder.try_remove(PARAMS['trial'])
 
     col_map = builder.build()
     result_table = TupleTable(col_map)
@@ -175,7 +186,8 @@ def main():
         # No csvs to write, assume user meant to print out data
         if dir_map.is_empty():
             if not opts.verbose:
-                sys.stderr.write("Too little data to make csv files.\n")
+                sys.stderr.write("Too little data to make csv files, " +
+                                 "printing results.\n")
                 for key, exp in result_table:
                     for e in exp:
                         print(e)
-- 
cgit v1.2.2


From 25ccdb0cbc6b959b1f96c89b8bce91963cb67b4c Mon Sep 17 00:00:00 2001
From: Jonathan Herman <hermanjl@cs.unc.edu>
Date: Mon, 22 Apr 2013 15:32:12 -0400
Subject: Improved robustness of run_exps.py execution.

Thanks to bcw and gelliott for debugging and ideas.

* Print out experiment number and total experiments when starting experiments.
* Only sleep and re-release tasks if tasks are waiting to release.
* Fail experiment with verbose messages if any tasks fail before becoming ready
to release.
* When waiting for tasks to become ready for release, reset the waiting time
whenever a new task (or task(s)) become ready.
* Start regular tracers BEFORE the plugin switch to log data from the switch.
* Check the number of running tasks AFTER trying to switch the linux scheduler.
This gives plugin deactivate code the opportunity to kill these tasks.
* If an invalid executable is specified in the schedule file, fail before
attempting to run the experiment and print out the problem.
* Propogate exceptions up from experiment failures instead of creating
ExperimentFailed exceptions.

This commit also made clock-frequency automatically ignored by parse_exps.py.
The value of this would change by +- a Mhz between experiments, ruining graphs.
---
 parse_exps.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'parse_exps.py')

diff --git a/parse_exps.py b/parse_exps.py
index d07378c..c2cbedb 100755
--- a/parse_exps.py
+++ b/parse_exps.py
@@ -140,8 +140,8 @@ def main():
     if opts.ignore:
         for param in opts.ignore.split(","):
             builder.try_remove(param)
-    # Always average multiple trials
-    builder.try_remove(PARAMS['trial'])
+    builder.try_remove(PARAMS['trial'])  # Always average multiple trials
+    builder.try_remove(PARAMS['cycles']) # Only need for feather-trace parsing
 
     col_map = builder.build()
     result_table = TupleTable(col_map)
-- 
cgit v1.2.2


From 7545402506aa76261e18d85af585ff0ac1cf05c1 Mon Sep 17 00:00:00 2001
From: Jonathan Herman <hermanjl@cs.unc.edu>
Date: Tue, 23 Apr 2013 14:01:35 -0400
Subject: Improved accuracy of sched_trace measurement parsing.

* Measurements from tasks missing > 20% of their scheduling records are
ignored. This is configurable in config/config.py.
* Measurements which only have zero values are ignored.
* If either of these 2 situations are encountered print out a message the
first time using the common.log_once() method. See parse_exps.py for how
this is used with multiple threads.
* Measurements from a task's last job are ignored.
* Miss ratio is calculated only as a fraction of the number of jobs whose
matching release and completion records were found, not just release.
---
 parse_exps.py | 159 +++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 97 insertions(+), 62 deletions(-)

(limited to 'parse_exps.py')

diff --git a/parse_exps.py b/parse_exps.py
index c2cbedb..cc4372a 100755
--- a/parse_exps.py
+++ b/parse_exps.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python
 from __future__ import print_function
 
+import common as com
+import multiprocessing
 import os
 import parse.ft as ft
 import parse.sched as st
@@ -10,13 +12,12 @@ import sys
 import traceback
 
 from collections import namedtuple
-from common import load_params
 from config.config import DEFAULTS,PARAMS
 from optparse import OptionParser
 from parse.point import ExpPoint
 from parse.tuple_table import TupleTable
 from parse.col_map import ColMapBuilder
-from multiprocessing import Pool, cpu_count
+
 
 def parse_args():
     parser = OptionParser("usage: %prog [options] [data_dir]...")
@@ -33,18 +34,60 @@ def parse_args():
     parser.add_option('-m', '--write-map', action='store_true', default=False,
                       dest='write_map',
                       help='Output map of values instead of csv tree')
-    parser.add_option('-p', '--processors', default=max(cpu_count() - 1, 1),
+    parser.add_option('-p', '--processors',
+                      default=max(multiprocessing.cpu_count() - 1, 1),
                       type='int', dest='processors',
                       help='number of threads for processing')
 
     return parser.parse_args()
 
+
 ExpData = namedtuple('ExpData', ['path', 'params', 'work_dir'])
 
+
+def parse_exp(exp_force):
+    # Tupled for multiprocessing
+    exp, force = exp_force
+
+    result_file = exp.work_dir + "/exp_point.pkl"
+    should_load = not force and os.path.exists(result_file)
+
+    result = None
+    if should_load:
+        with open(result_file, 'rb') as f:
+            try:
+                # No need to go through this work twice
+                result = pickle.load(f)
+            except:
+                pass
+
+    if not result:
+        try:
+            # Create a readable name
+            name = os.path.relpath(exp.path)
+            name = name if name != "." else os.path.split(os.getcwd())[1]
+
+            result = ExpPoint(name)
+
+            # Write overheads into result
+            cycles = exp.params[PARAMS['cycles']]
+            ft.extract_ft_data(result, exp.path, exp.work_dir, cycles)
+
+            # Write scheduling statistics into result
+            st.extract_sched_data(result, exp.path, exp.work_dir)
+
+            with open(result_file, 'wb') as f:
+                pickle.dump(result, f)
+        except:
+            traceback.print_exc()
+
+    return (exp, result)
+
+
 def get_exp_params(data_dir, cm_builder):
     param_file = "%s/%s" % (data_dir, DEFAULTS['params_file'])
     if os.path.isfile(param_file):
-        params = load_params(param_file)
+        params = com.load_params(param_file)
 
         # Store parameters in cm_builder, which will track which parameters change
         # across experiments
@@ -83,41 +126,8 @@ def load_exps(exp_dirs, cm_builder, force):
 
     return exps
 
-def parse_exp(exp_force):
-    # Tupled for multiprocessing
-    exp, force  = exp_force
-
-    result_file = exp.work_dir + "/exp_point.pkl"
-    should_load = not force and os.path.exists(result_file)
-
-    result = None
-    if should_load:
-        with open(result_file, 'rb') as f:
-            try:
-                # No need to go through this work twice
-                result = pickle.load(f)
-            except:
-                pass
 
-    if not result:
-        try:
-            result = ExpPoint(exp.path)
-            cycles = exp.params[PARAMS['cycles']]
-
-            # Write overheads into result
-            ft.extract_ft_data(result, exp.path, exp.work_dir, cycles)
-
-            # Write scheduling statistics into result
-            st.extract_sched_data(result, exp.path, exp.work_dir)
-
-            with open(result_file, 'wb') as f:
-                pickle.dump(result, f)
-        except:
-            traceback.print_exc()
-
-    return (exp, result)
-
-def get_exps(args):
+def get_dirs(args):
     if args:
         return args
     elif os.path.exists(DEFAULTS['out-run']):
@@ -128,38 +138,32 @@ def get_exps(args):
         sys.stderr.write("Reading data from current directory.\n")
         return [os.getcwd()]
 
-def main():
-    opts, args = parse_args()
-    exp_dirs  = get_exps(args)
-
-    # Load exp parameters into a ColMap
-    builder = ColMapBuilder()
-    exps = load_exps(exp_dirs, builder, opts.force)
 
-    # Don't track changes in ignored parameters
-    if opts.ignore:
-        for param in opts.ignore.split(","):
-            builder.try_remove(param)
-    builder.try_remove(PARAMS['trial'])  # Always average multiple trials
-    builder.try_remove(PARAMS['cycles']) # Only need for feather-trace parsing
+def fill_table(table, exps, opts):
+    sys.stderr.write("Parsing data...\n")
 
-    col_map = builder.build()
-    result_table = TupleTable(col_map)
+    procs  = min(len(exps), opts.processors)
+    logged = multiprocessing.Manager().list()
 
-    sys.stderr.write("Parsing data...\n")
+    pool = multiprocessing.Pool(processes=procs,
+    # Share a list of previously logged messages amongst processes
+    # This is for the com.log_once method to use
+                initializer=com.set_logged_list, initargs=(logged,))
 
-    procs = min(len(exps), opts.processors)
-    pool = Pool(processes=procs)
     pool_args = zip(exps, [opts.force]*len(exps))
     enum = pool.imap_unordered(parse_exp, pool_args, 1)
 
     try:
         for i, (exp, result) in enumerate(enum):
+            if not result:
+                continue
+
             if opts.verbose:
                 print(result)
             else:
                 sys.stderr.write('\r {0:.2%}'.format(float(i)/len(exps)))
-                result_table[exp.params] += [result]
+                table[exp.params] += [result]
+
         pool.close()
     except:
         pool.terminate()
@@ -170,16 +174,17 @@ def main():
 
     sys.stderr.write('\n')
 
-    if opts.force and os.path.exists(opts.out):
-        sh.rmtree(opts.out)
 
-    reduced_table = result_table.reduce()
+def write_output(table, opts):
+    reduced_table = table.reduce()
 
     if opts.write_map:
         sys.stderr.write("Writing python map into %s...\n" % opts.out)
-        # Write summarized results into map
         reduced_table.write_map(opts.out)
     else:
+        if opts.force and os.path.exists(opts.out):
+            sh.rmtree(opts.out)
+
         # Write out csv directories for all variable params
         dir_map = reduced_table.to_dir_map()
 
@@ -188,12 +193,42 @@ def main():
             if not opts.verbose:
                 sys.stderr.write("Too little data to make csv files, " +
                                  "printing results.\n")
-                for key, exp in result_table:
+                for key, exp in table:
                     for e in exp:
                         print(e)
         else:
             sys.stderr.write("Writing csvs into %s...\n" % opts.out)
             dir_map.write(opts.out)
 
+
+def main():
+    opts, args = parse_args()
+    exp_dirs = get_dirs(args)
+
+    # Load experiment parameters into a ColMap
+    builder = ColMapBuilder()
+    exps = load_exps(exp_dirs, builder, opts.force)
+
+    # Don't track changes in ignored parameters
+    if opts.ignore:
+        for param in opts.ignore.split(","):
+            builder.try_remove(param)
+
+    # Always average multiple trials
+    builder.try_remove(PARAMS['trial'])
+    # Only need this for feather-trace parsing
+    builder.try_remove(PARAMS['cycles'])
+
+    col_map = builder.build()
+    table = TupleTable(col_map)
+
+    fill_table(table, exps, opts)
+
+    if not table:
+        sys.stderr.write("Found no data to parse!")
+        sys.exit(1)
+
+    write_output(table, opts)
+
 if __name__ == '__main__':
     main()
-- 
cgit v1.2.2