From 7545402506aa76261e18d85af585ff0ac1cf05c1 Mon Sep 17 00:00:00 2001
From: Jonathan Herman <hermanjl@cs.unc.edu>
Date: Tue, 23 Apr 2013 14:01:35 -0400
Subject: Improved accuracy of sched_trace measurement parsing.

* Measurements from tasks missing > 20% of their scheduling records are
ignored. This is configurable in config/config.py.
* Measurements which only have zero values are ignored.
* If either of these 2 situations are encountered print out a message the
first time using the common.log_once() method. See parse_exps.py for how
this is used with multiple threads.
* Measurements from a task's last job are ignored.
* Miss ratio is calculated only as a fraction of the number of jobs whose
matching release and completion records were found, not just release.
---
 parse_exps.py | 159 +++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 97 insertions(+), 62 deletions(-)

(limited to 'parse_exps.py')

diff --git a/parse_exps.py b/parse_exps.py
index c2cbedb..cc4372a 100755
--- a/parse_exps.py
+++ b/parse_exps.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python
 from __future__ import print_function
 
+import common as com
+import multiprocessing
 import os
 import parse.ft as ft
 import parse.sched as st
@@ -10,13 +12,12 @@ import sys
 import traceback
 
 from collections import namedtuple
-from common import load_params
 from config.config import DEFAULTS,PARAMS
 from optparse import OptionParser
 from parse.point import ExpPoint
 from parse.tuple_table import TupleTable
 from parse.col_map import ColMapBuilder
-from multiprocessing import Pool, cpu_count
+
 
 def parse_args():
     parser = OptionParser("usage: %prog [options] [data_dir]...")
@@ -33,18 +34,60 @@ def parse_args():
     parser.add_option('-m', '--write-map', action='store_true', default=False,
                       dest='write_map',
                       help='Output map of values instead of csv tree')
-    parser.add_option('-p', '--processors', default=max(cpu_count() - 1, 1),
+    parser.add_option('-p', '--processors',
+                      default=max(multiprocessing.cpu_count() - 1, 1),
                       type='int', dest='processors',
                       help='number of threads for processing')
 
     return parser.parse_args()
 
+
 ExpData = namedtuple('ExpData', ['path', 'params', 'work_dir'])
 
+
+def parse_exp(exp_force):
+    # Tupled for multiprocessing
+    exp, force = exp_force
+
+    result_file = exp.work_dir + "/exp_point.pkl"
+    should_load = not force and os.path.exists(result_file)
+
+    result = None
+    if should_load:
+        with open(result_file, 'rb') as f:
+            try:
+                # No need to go through this work twice
+                result = pickle.load(f)
+            except:
+                pass
+
+    if not result:
+        try:
+            # Create a readable name
+            name = os.path.relpath(exp.path)
+            name = name if name != "." else os.path.split(os.getcwd())[1]
+
+            result = ExpPoint(name)
+
+            # Write overheads into result
+            cycles = exp.params[PARAMS['cycles']]
+            ft.extract_ft_data(result, exp.path, exp.work_dir, cycles)
+
+            # Write scheduling statistics into result
+            st.extract_sched_data(result, exp.path, exp.work_dir)
+
+            with open(result_file, 'wb') as f:
+                pickle.dump(result, f)
+        except:
+            traceback.print_exc()
+
+    return (exp, result)
+
+
 def get_exp_params(data_dir, cm_builder):
     param_file = "%s/%s" % (data_dir, DEFAULTS['params_file'])
     if os.path.isfile(param_file):
-        params = load_params(param_file)
+        params = com.load_params(param_file)
 
         # Store parameters in cm_builder, which will track which parameters change
         # across experiments
@@ -83,41 +126,8 @@ def load_exps(exp_dirs, cm_builder, force):
 
     return exps
 
-def parse_exp(exp_force):
-    # Tupled for multiprocessing
-    exp, force  = exp_force
-
-    result_file = exp.work_dir + "/exp_point.pkl"
-    should_load = not force and os.path.exists(result_file)
-
-    result = None
-    if should_load:
-        with open(result_file, 'rb') as f:
-            try:
-                # No need to go through this work twice
-                result = pickle.load(f)
-            except:
-                pass
 
-    if not result:
-        try:
-            result = ExpPoint(exp.path)
-            cycles = exp.params[PARAMS['cycles']]
-
-            # Write overheads into result
-            ft.extract_ft_data(result, exp.path, exp.work_dir, cycles)
-
-            # Write scheduling statistics into result
-            st.extract_sched_data(result, exp.path, exp.work_dir)
-
-            with open(result_file, 'wb') as f:
-                pickle.dump(result, f)
-        except:
-            traceback.print_exc()
-
-    return (exp, result)
-
-def get_exps(args):
+def get_dirs(args):
     if args:
         return args
     elif os.path.exists(DEFAULTS['out-run']):
@@ -128,38 +138,32 @@ def get_exps(args):
         sys.stderr.write("Reading data from current directory.\n")
         return [os.getcwd()]
 
-def main():
-    opts, args = parse_args()
-    exp_dirs  = get_exps(args)
-
-    # Load exp parameters into a ColMap
-    builder = ColMapBuilder()
-    exps = load_exps(exp_dirs, builder, opts.force)
 
-    # Don't track changes in ignored parameters
-    if opts.ignore:
-        for param in opts.ignore.split(","):
-            builder.try_remove(param)
-    builder.try_remove(PARAMS['trial'])  # Always average multiple trials
-    builder.try_remove(PARAMS['cycles']) # Only need for feather-trace parsing
+def fill_table(table, exps, opts):
+    sys.stderr.write("Parsing data...\n")
 
-    col_map = builder.build()
-    result_table = TupleTable(col_map)
+    procs  = min(len(exps), opts.processors)
+    logged = multiprocessing.Manager().list()
 
-    sys.stderr.write("Parsing data...\n")
+    pool = multiprocessing.Pool(processes=procs,
+    # Share a list of previously logged messages amongst processes
+    # This is for the com.log_once method to use
+                initializer=com.set_logged_list, initargs=(logged,))
 
-    procs = min(len(exps), opts.processors)
-    pool = Pool(processes=procs)
     pool_args = zip(exps, [opts.force]*len(exps))
     enum = pool.imap_unordered(parse_exp, pool_args, 1)
 
     try:
         for i, (exp, result) in enumerate(enum):
+            if not result:
+                continue
+
             if opts.verbose:
                 print(result)
             else:
                 sys.stderr.write('\r {0:.2%}'.format(float(i)/len(exps)))
-                result_table[exp.params] += [result]
+                table[exp.params] += [result]
+
         pool.close()
     except:
         pool.terminate()
@@ -170,16 +174,17 @@ def main():
 
     sys.stderr.write('\n')
 
-    if opts.force and os.path.exists(opts.out):
-        sh.rmtree(opts.out)
 
-    reduced_table = result_table.reduce()
+def write_output(table, opts):
+    reduced_table = table.reduce()
 
     if opts.write_map:
         sys.stderr.write("Writing python map into %s...\n" % opts.out)
-        # Write summarized results into map
         reduced_table.write_map(opts.out)
     else:
+        if opts.force and os.path.exists(opts.out):
+            sh.rmtree(opts.out)
+
         # Write out csv directories for all variable params
         dir_map = reduced_table.to_dir_map()
 
@@ -188,12 +193,42 @@ def main():
             if not opts.verbose:
                 sys.stderr.write("Too little data to make csv files, " +
                                  "printing results.\n")
-                for key, exp in result_table:
+                for key, exp in table:
                     for e in exp:
                         print(e)
         else:
             sys.stderr.write("Writing csvs into %s...\n" % opts.out)
             dir_map.write(opts.out)
 
+
+def main():
+    opts, args = parse_args()
+    exp_dirs = get_dirs(args)
+
+    # Load experiment parameters into a ColMap
+    builder = ColMapBuilder()
+    exps = load_exps(exp_dirs, builder, opts.force)
+
+    # Don't track changes in ignored parameters
+    if opts.ignore:
+        for param in opts.ignore.split(","):
+            builder.try_remove(param)
+
+    # Always average multiple trials
+    builder.try_remove(PARAMS['trial'])
+    # Only need this for feather-trace parsing
+    builder.try_remove(PARAMS['cycles'])
+
+    col_map = builder.build()
+    table = TupleTable(col_map)
+
+    fill_table(table, exps, opts)
+
+    if not table:
+        sys.stderr.write("Found no data to parse!")
+        sys.exit(1)
+
+    write_output(table, opts)
+
 if __name__ == '__main__':
     main()
-- 
cgit v1.2.2