From 53cfcf10531256d0e4411a7e0bda431ec27f28e7 Mon Sep 17 00:00:00 2001 From: Jonathan Herman Date: Tue, 30 Oct 2012 16:04:23 -0400 Subject: Process all non-scaling statistics for base experiments. --- parse/ft.py | 2 -- parse/sched.py | 13 +++++++------ parse/tuple_table.py | 10 +++++++++- parse_exps.py | 53 +++++++++++++++++++++++++++++++++++++++------------- 4 files changed, 56 insertions(+), 22 deletions(-) diff --git a/parse/ft.py b/parse/ft.py index feb338f..4e310b0 100644 --- a/parse/ft.py +++ b/parse/ft.py @@ -20,7 +20,6 @@ def get_ft_output(data_dir, out_dir, force=False): if force: os.remove(output_file) else: - print("ft-output already exists for %s" % data_dir) return output_file if len(bins) != 0: @@ -41,7 +40,6 @@ def get_ft_output(data_dir, out_dir, force=False): # Analyze will summarize those # todo pass in f cmd_arr = [conf.BINS['analyze']] - print("cmd arr: %s-%s" % (cmd_arr, bins)) cmd_arr.extend(bins) with open(output_file, "w") as f: subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file) diff --git a/parse/sched.py b/parse/sched.py index 80764b6..bbf6e10 100644 --- a/parse/sched.py +++ b/parse/sched.py @@ -11,6 +11,7 @@ import os import re import numpy as np import subprocess +import pprint from collections import namedtuple,defaultdict from operator import methodcaller @@ -54,6 +55,7 @@ class LeveledArray(object): def add(self, task, value): self.vals[task.config.level] += [value] + def write_measurements(self, result): for level, arr in self.vals.iteritems(): name = "%s%s" % ("%s-" % level if level else "", self.name) @@ -72,7 +74,6 @@ def get_st_output(data_dir, out_dir, force=False): if force: os.remove(output_file) else: - print("st-output already exists for %s" % data_dir) return output_file if len(bins) != 0: @@ -195,8 +196,11 @@ def extract_variance(task_dict, data, exp_point): completions[pid] += [duration] for pid, durations in completions.iteritems(): + m = Measurement(pid).from_array(durations) + # TODO: not this, please - task_dict[pid].run.append(Measurement(pid).from_array(durations)) + if not task_dict[pid].run: + task_dict[pid].run.append(m) job_times = np.array(durations) mean = job_times.mean() @@ -210,6 +214,7 @@ def extract_variance(task_dict, data, exp_point): corrected = (1 + 1/(4 * len(job_times))) * cv varz.add(task_dict[pid], corrected) + # varz.add(task_dict[pid], m[Type.Var]) if exp_point: map(methodcaller('write_measurements', exp_point), @@ -272,17 +277,13 @@ def extract_scaling_data(task_dict, data, result, base_file): for data_stat, base_stat in zip(data_stats[config],base_stats[config]): if not base_stat[Type.Avg] or not base_stat[Type.Max] or \ not data_stat[Type.Avg] or not data_stat[Type.Max]: - print("missing a thing: {},{}".format(base_stat, data_stat)) continue # How much larger is their exec stat than ours? - print("%s vs %s" % (base_stat, data_stat)) avg_scale = float(base_stat[Type.Avg]) / float(data_stat[Type.Avg]) max_scale = float(base_stat[Type.Max]) / float(data_stat[Type.Max]) task = task_dict[data_stat.id] - print("scaling for %s" % data_stat.id) - avg_scales.add(task, avg_scale) max_scales.add(task, max_scale) diff --git a/parse/tuple_table.py b/parse/tuple_table.py index 465abb3..e5dc39b 100644 --- a/parse/tuple_table.py +++ b/parse/tuple_table.py @@ -21,7 +21,6 @@ class ColMap(object): key += (None,) else: key += (kv[col],) - return key def __contains__(self, col): @@ -43,6 +42,15 @@ class ColMap(object): self.value_map[column] = value elif value != self.value_map[column]: self.force_add(column) + del(self.value_map[column]) + + def try_remove(self, column): + if column in self.rev_map: + idx = self.rev_map[column] + for value in self.col_list[idx+1:]: + self.rev_map[value] -= 1 + del(self.col_list[self.rev_map[column]]) + del(self.rev_map[column]) def __str__(self): return "%s" % (self.rev_map) diff --git a/parse_exps.py b/parse_exps.py index 2d1c370..87d0783 100755 --- a/parse_exps.py +++ b/parse_exps.py @@ -8,6 +8,7 @@ import parse.ft as ft import parse.sched as st import re import shutil as sh +import sys from collections import namedtuple from common import load_params @@ -16,18 +17,20 @@ from parse.point import ExpPoint from parse.tuple_table import ColMap,TupleTable def parse_args(): - # TODO: convert data-dir to proper option + # TODO: convert data-dir to proper option, clean 'dest' options parser = OptionParser("usage: %prog [options] [data_dir]...") parser.add_option('-o', '--out', dest='out', help='file or directory for data output', default='parse-data') - # TODO: this means nothing + # TODO: this means nothing, also remove dests parser.add_option('-c', '--clean', action='store_true', default=False, dest='clean', help='do not output single-point csvs') parser.add_option('-s', '--scale-against', dest='scale_against', metavar='PARAM=VALUE', default="", help='calculate task scaling factors against these configs') + parser.add_option('-i', '--ignore', metavar='[PARAM...]', default="", + help='ignore changing parameter values') parser.add_option('-f', '--force', action='store_true', default=False, dest='force', help='overwrite existing data') parser.add_option('-v', '--verbose', action='store_true', default=False, @@ -38,7 +41,7 @@ def parse_args(): return parser.parse_args() -ExpData = namedtuple('ExpData', ['name', 'params', 'data_files']) +ExpData = namedtuple('ExpData', ['name', 'params', 'data_files', 'is_base']) DataFiles = namedtuple('DataFiles', ['ft','st']) def get_exp_params(data_dir, col_map): @@ -63,7 +66,9 @@ def gen_exp_data(exp_dirs, base_conf, col_map, force): plain_exps = [] scaling_bases = [] - for data_dir in exp_dirs: + sys.stderr.write("Generating data...\n") + + for i, data_dir in enumerate(exp_dirs): if not os.path.isdir(data_dir): raise IOError("Invalid experiment '%s'" % os.path.abspath(data_dir)) @@ -76,18 +81,30 @@ def gen_exp_data(exp_dirs, base_conf, col_map, force): st_output = st.get_st_output(data_dir, tmp_dir, force) ft_output = ft.get_ft_output(data_dir, tmp_dir, force) - # Create experiment named after the data dir - exp_data = ExpData(data_dir, params, DataFiles(ft_output, st_output)) if base_conf and base_conf.viewitems() & params.viewitems(): if not st_output: raise Exception("Scaling base '%s' useless without sched data!" % data_dir) - params.pop(base_conf.keys()[0]) - scaling_bases += [exp_data] + is_base = True + + base_params = copy.deepcopy(params) + base_params.pop(base_conf.keys()[0]) + + base_exp = ExpData(data_dir, base_params, + DataFiles(ft_output, st_output), True) + scaling_bases += [base_exp] else: - plain_exps += [exp_data] + is_base = False + # Create experiment named after the data dir + exp_data = ExpData(data_dir, params, + DataFiles(ft_output, st_output), is_base) + + plain_exps += [exp_data] + + sys.stderr.write('\r {0:.2%}'.format(float(i)/len(exp_dirs))) + sys.stderr.write('\n') return (plain_exps, scaling_bases) def main(): @@ -107,14 +124,20 @@ def main(): raise IOError("Base column '%s' not present in any parameters!" % base_conf.keys()[0]) - base_table = TupleTable(col_map) # For tracking 'base' experiments - result_table = TupleTable(col_map) # For generating csv directories + base_map = copy.deepcopy(col_map) + if opts.ignore: + for param in opts.ignore.split(","): + col_map.try_remove(param) + + base_table = TupleTable(base_map) # For tracking 'base' experiments + result_table = TupleTable(col_map) # For generating output # Used to find matching scaling_base for each experiment for base in scaling_bases: base_table.add_exp(base.params, base) - for exp in plain_exps: + sys.stderr.write("Parsing data...\n") + for i,exp in enumerate(plain_exps): result = ExpPoint(exp.name) if exp.data_files.ft: @@ -123,7 +146,7 @@ def main(): if exp.data_files.st: base = None - if base_conf: + if base_conf and not exp.is_base: # Try to find a scaling base base_params = copy.deepcopy(exp.params) base_params.pop(base_conf.keys()[0]) @@ -137,12 +160,16 @@ def main(): if opts.verbose: print(result) + else: + sys.stderr.write('\r {0:.2%}'.format(float(i)/len(plain_exps))) + sys.stderr.write('\n') if opts.force and os.path.exists(opts.out): sh.rmtree(opts.out) result_table.reduce() + sys.stderr.write("Writing result...\n") if opts.write_map: # Write summarized results into map result_table.write_map(opts.out) -- cgit v1.2.2