From 7545402506aa76261e18d85af585ff0ac1cf05c1 Mon Sep 17 00:00:00 2001 From: Jonathan Herman Date: Tue, 23 Apr 2013 14:01:35 -0400 Subject: Improved accuracy of sched_trace measurement parsing. * Measurements from tasks missing > 20% of their scheduling records are ignored. This is configurable in config/config.py. * Measurements which only have zero values are ignored. * If either of these 2 situations are encountered print out a message the first time using the common.log_once() method. See parse_exps.py for how this is used with multiple threads. * Measurements from a task's last job are ignored. * Miss ratio is calculated only as a fraction of the number of jobs whose matching release and completion records were found, not just release. --- parse_exps.py | 159 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 97 insertions(+), 62 deletions(-) (limited to 'parse_exps.py') diff --git a/parse_exps.py b/parse_exps.py index c2cbedb..cc4372a 100755 --- a/parse_exps.py +++ b/parse_exps.py @@ -1,6 +1,8 @@ #!/usr/bin/env python from __future__ import print_function +import common as com +import multiprocessing import os import parse.ft as ft import parse.sched as st @@ -10,13 +12,12 @@ import sys import traceback from collections import namedtuple -from common import load_params from config.config import DEFAULTS,PARAMS from optparse import OptionParser from parse.point import ExpPoint from parse.tuple_table import TupleTable from parse.col_map import ColMapBuilder -from multiprocessing import Pool, cpu_count + def parse_args(): parser = OptionParser("usage: %prog [options] [data_dir]...") @@ -33,18 +34,60 @@ def parse_args(): parser.add_option('-m', '--write-map', action='store_true', default=False, dest='write_map', help='Output map of values instead of csv tree') - parser.add_option('-p', '--processors', default=max(cpu_count() - 1, 1), + parser.add_option('-p', '--processors', + default=max(multiprocessing.cpu_count() - 1, 1), type='int', dest='processors', help='number of threads for processing') return parser.parse_args() + ExpData = namedtuple('ExpData', ['path', 'params', 'work_dir']) + +def parse_exp(exp_force): + # Tupled for multiprocessing + exp, force = exp_force + + result_file = exp.work_dir + "/exp_point.pkl" + should_load = not force and os.path.exists(result_file) + + result = None + if should_load: + with open(result_file, 'rb') as f: + try: + # No need to go through this work twice + result = pickle.load(f) + except: + pass + + if not result: + try: + # Create a readable name + name = os.path.relpath(exp.path) + name = name if name != "." else os.path.split(os.getcwd())[1] + + result = ExpPoint(name) + + # Write overheads into result + cycles = exp.params[PARAMS['cycles']] + ft.extract_ft_data(result, exp.path, exp.work_dir, cycles) + + # Write scheduling statistics into result + st.extract_sched_data(result, exp.path, exp.work_dir) + + with open(result_file, 'wb') as f: + pickle.dump(result, f) + except: + traceback.print_exc() + + return (exp, result) + + def get_exp_params(data_dir, cm_builder): param_file = "%s/%s" % (data_dir, DEFAULTS['params_file']) if os.path.isfile(param_file): - params = load_params(param_file) + params = com.load_params(param_file) # Store parameters in cm_builder, which will track which parameters change # across experiments @@ -83,41 +126,8 @@ def load_exps(exp_dirs, cm_builder, force): return exps -def parse_exp(exp_force): - # Tupled for multiprocessing - exp, force = exp_force - - result_file = exp.work_dir + "/exp_point.pkl" - should_load = not force and os.path.exists(result_file) - - result = None - if should_load: - with open(result_file, 'rb') as f: - try: - # No need to go through this work twice - result = pickle.load(f) - except: - pass - if not result: - try: - result = ExpPoint(exp.path) - cycles = exp.params[PARAMS['cycles']] - - # Write overheads into result - ft.extract_ft_data(result, exp.path, exp.work_dir, cycles) - - # Write scheduling statistics into result - st.extract_sched_data(result, exp.path, exp.work_dir) - - with open(result_file, 'wb') as f: - pickle.dump(result, f) - except: - traceback.print_exc() - - return (exp, result) - -def get_exps(args): +def get_dirs(args): if args: return args elif os.path.exists(DEFAULTS['out-run']): @@ -128,38 +138,32 @@ def get_exps(args): sys.stderr.write("Reading data from current directory.\n") return [os.getcwd()] -def main(): - opts, args = parse_args() - exp_dirs = get_exps(args) - - # Load exp parameters into a ColMap - builder = ColMapBuilder() - exps = load_exps(exp_dirs, builder, opts.force) - # Don't track changes in ignored parameters - if opts.ignore: - for param in opts.ignore.split(","): - builder.try_remove(param) - builder.try_remove(PARAMS['trial']) # Always average multiple trials - builder.try_remove(PARAMS['cycles']) # Only need for feather-trace parsing +def fill_table(table, exps, opts): + sys.stderr.write("Parsing data...\n") - col_map = builder.build() - result_table = TupleTable(col_map) + procs = min(len(exps), opts.processors) + logged = multiprocessing.Manager().list() - sys.stderr.write("Parsing data...\n") + pool = multiprocessing.Pool(processes=procs, + # Share a list of previously logged messages amongst processes + # This is for the com.log_once method to use + initializer=com.set_logged_list, initargs=(logged,)) - procs = min(len(exps), opts.processors) - pool = Pool(processes=procs) pool_args = zip(exps, [opts.force]*len(exps)) enum = pool.imap_unordered(parse_exp, pool_args, 1) try: for i, (exp, result) in enumerate(enum): + if not result: + continue + if opts.verbose: print(result) else: sys.stderr.write('\r {0:.2%}'.format(float(i)/len(exps))) - result_table[exp.params] += [result] + table[exp.params] += [result] + pool.close() except: pool.terminate() @@ -170,16 +174,17 @@ def main(): sys.stderr.write('\n') - if opts.force and os.path.exists(opts.out): - sh.rmtree(opts.out) - reduced_table = result_table.reduce() +def write_output(table, opts): + reduced_table = table.reduce() if opts.write_map: sys.stderr.write("Writing python map into %s...\n" % opts.out) - # Write summarized results into map reduced_table.write_map(opts.out) else: + if opts.force and os.path.exists(opts.out): + sh.rmtree(opts.out) + # Write out csv directories for all variable params dir_map = reduced_table.to_dir_map() @@ -188,12 +193,42 @@ def main(): if not opts.verbose: sys.stderr.write("Too little data to make csv files, " + "printing results.\n") - for key, exp in result_table: + for key, exp in table: for e in exp: print(e) else: sys.stderr.write("Writing csvs into %s...\n" % opts.out) dir_map.write(opts.out) + +def main(): + opts, args = parse_args() + exp_dirs = get_dirs(args) + + # Load experiment parameters into a ColMap + builder = ColMapBuilder() + exps = load_exps(exp_dirs, builder, opts.force) + + # Don't track changes in ignored parameters + if opts.ignore: + for param in opts.ignore.split(","): + builder.try_remove(param) + + # Always average multiple trials + builder.try_remove(PARAMS['trial']) + # Only need this for feather-trace parsing + builder.try_remove(PARAMS['cycles']) + + col_map = builder.build() + table = TupleTable(col_map) + + fill_table(table, exps, opts) + + if not table: + sys.stderr.write("Found no data to parse!") + sys.exit(1) + + write_output(table, opts) + if __name__ == '__main__': main() -- cgit v1.2.2