diff options
| author | Jonathan Herman <hermanjl@cs.unc.edu> | 2012-10-30 16:04:23 -0400 |
|---|---|---|
| committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2012-10-30 16:04:23 -0400 |
| commit | 53cfcf10531256d0e4411a7e0bda431ec27f28e7 (patch) | |
| tree | d9119ebe9658f4f41ab870811b6a89f6e9683cbc | |
| parent | 2e804f8fa7d26755088e47357f8de6feb6dbe292 (diff) | |
Process all non-scaling statistics for base experiments.
| -rw-r--r-- | parse/ft.py | 2 | ||||
| -rw-r--r-- | parse/sched.py | 13 | ||||
| -rw-r--r-- | parse/tuple_table.py | 10 | ||||
| -rwxr-xr-x | parse_exps.py | 53 |
4 files changed, 56 insertions, 22 deletions
diff --git a/parse/ft.py b/parse/ft.py index feb338f..4e310b0 100644 --- a/parse/ft.py +++ b/parse/ft.py | |||
| @@ -20,7 +20,6 @@ def get_ft_output(data_dir, out_dir, force=False): | |||
| 20 | if force: | 20 | if force: |
| 21 | os.remove(output_file) | 21 | os.remove(output_file) |
| 22 | else: | 22 | else: |
| 23 | print("ft-output already exists for %s" % data_dir) | ||
| 24 | return output_file | 23 | return output_file |
| 25 | 24 | ||
| 26 | if len(bins) != 0: | 25 | if len(bins) != 0: |
| @@ -41,7 +40,6 @@ def get_ft_output(data_dir, out_dir, force=False): | |||
| 41 | # Analyze will summarize those | 40 | # Analyze will summarize those |
| 42 | # todo pass in f | 41 | # todo pass in f |
| 43 | cmd_arr = [conf.BINS['analyze']] | 42 | cmd_arr = [conf.BINS['analyze']] |
| 44 | print("cmd arr: %s-%s" % (cmd_arr, bins)) | ||
| 45 | cmd_arr.extend(bins) | 43 | cmd_arr.extend(bins) |
| 46 | with open(output_file, "w") as f: | 44 | with open(output_file, "w") as f: |
| 47 | subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file) | 45 | subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file) |
diff --git a/parse/sched.py b/parse/sched.py index 80764b6..bbf6e10 100644 --- a/parse/sched.py +++ b/parse/sched.py | |||
| @@ -11,6 +11,7 @@ import os | |||
| 11 | import re | 11 | import re |
| 12 | import numpy as np | 12 | import numpy as np |
| 13 | import subprocess | 13 | import subprocess |
| 14 | import pprint | ||
| 14 | 15 | ||
| 15 | from collections import namedtuple,defaultdict | 16 | from collections import namedtuple,defaultdict |
| 16 | from operator import methodcaller | 17 | from operator import methodcaller |
| @@ -54,6 +55,7 @@ class LeveledArray(object): | |||
| 54 | def add(self, task, value): | 55 | def add(self, task, value): |
| 55 | self.vals[task.config.level] += [value] | 56 | self.vals[task.config.level] += [value] |
| 56 | 57 | ||
| 58 | |||
| 57 | def write_measurements(self, result): | 59 | def write_measurements(self, result): |
| 58 | for level, arr in self.vals.iteritems(): | 60 | for level, arr in self.vals.iteritems(): |
| 59 | name = "%s%s" % ("%s-" % level if level else "", self.name) | 61 | name = "%s%s" % ("%s-" % level if level else "", self.name) |
| @@ -72,7 +74,6 @@ def get_st_output(data_dir, out_dir, force=False): | |||
| 72 | if force: | 74 | if force: |
| 73 | os.remove(output_file) | 75 | os.remove(output_file) |
| 74 | else: | 76 | else: |
| 75 | print("st-output already exists for %s" % data_dir) | ||
| 76 | return output_file | 77 | return output_file |
| 77 | 78 | ||
| 78 | if len(bins) != 0: | 79 | if len(bins) != 0: |
| @@ -195,8 +196,11 @@ def extract_variance(task_dict, data, exp_point): | |||
| 195 | completions[pid] += [duration] | 196 | completions[pid] += [duration] |
| 196 | 197 | ||
| 197 | for pid, durations in completions.iteritems(): | 198 | for pid, durations in completions.iteritems(): |
| 199 | m = Measurement(pid).from_array(durations) | ||
| 200 | |||
| 198 | # TODO: not this, please | 201 | # TODO: not this, please |
| 199 | task_dict[pid].run.append(Measurement(pid).from_array(durations)) | 202 | if not task_dict[pid].run: |
| 203 | task_dict[pid].run.append(m) | ||
| 200 | 204 | ||
| 201 | job_times = np.array(durations) | 205 | job_times = np.array(durations) |
| 202 | mean = job_times.mean() | 206 | mean = job_times.mean() |
| @@ -210,6 +214,7 @@ def extract_variance(task_dict, data, exp_point): | |||
| 210 | corrected = (1 + 1/(4 * len(job_times))) * cv | 214 | corrected = (1 + 1/(4 * len(job_times))) * cv |
| 211 | 215 | ||
| 212 | varz.add(task_dict[pid], corrected) | 216 | varz.add(task_dict[pid], corrected) |
| 217 | # varz.add(task_dict[pid], m[Type.Var]) | ||
| 213 | 218 | ||
| 214 | if exp_point: | 219 | if exp_point: |
| 215 | map(methodcaller('write_measurements', exp_point), | 220 | map(methodcaller('write_measurements', exp_point), |
| @@ -272,17 +277,13 @@ def extract_scaling_data(task_dict, data, result, base_file): | |||
| 272 | for data_stat, base_stat in zip(data_stats[config],base_stats[config]): | 277 | for data_stat, base_stat in zip(data_stats[config],base_stats[config]): |
| 273 | if not base_stat[Type.Avg] or not base_stat[Type.Max] or \ | 278 | if not base_stat[Type.Avg] or not base_stat[Type.Max] or \ |
| 274 | not data_stat[Type.Avg] or not data_stat[Type.Max]: | 279 | not data_stat[Type.Avg] or not data_stat[Type.Max]: |
| 275 | print("missing a thing: {},{}".format(base_stat, data_stat)) | ||
| 276 | continue | 280 | continue |
| 277 | # How much larger is their exec stat than ours? | 281 | # How much larger is their exec stat than ours? |
| 278 | print("%s vs %s" % (base_stat, data_stat)) | ||
| 279 | avg_scale = float(base_stat[Type.Avg]) / float(data_stat[Type.Avg]) | 282 | avg_scale = float(base_stat[Type.Avg]) / float(data_stat[Type.Avg]) |
| 280 | max_scale = float(base_stat[Type.Max]) / float(data_stat[Type.Max]) | 283 | max_scale = float(base_stat[Type.Max]) / float(data_stat[Type.Max]) |
| 281 | 284 | ||
| 282 | task = task_dict[data_stat.id] | 285 | task = task_dict[data_stat.id] |
| 283 | 286 | ||
| 284 | print("scaling for %s" % data_stat.id) | ||
| 285 | |||
| 286 | avg_scales.add(task, avg_scale) | 287 | avg_scales.add(task, avg_scale) |
| 287 | max_scales.add(task, max_scale) | 288 | max_scales.add(task, max_scale) |
| 288 | 289 | ||
diff --git a/parse/tuple_table.py b/parse/tuple_table.py index 465abb3..e5dc39b 100644 --- a/parse/tuple_table.py +++ b/parse/tuple_table.py | |||
| @@ -21,7 +21,6 @@ class ColMap(object): | |||
| 21 | key += (None,) | 21 | key += (None,) |
| 22 | else: | 22 | else: |
| 23 | key += (kv[col],) | 23 | key += (kv[col],) |
| 24 | |||
| 25 | return key | 24 | return key |
| 26 | 25 | ||
| 27 | def __contains__(self, col): | 26 | def __contains__(self, col): |
| @@ -43,6 +42,15 @@ class ColMap(object): | |||
| 43 | self.value_map[column] = value | 42 | self.value_map[column] = value |
| 44 | elif value != self.value_map[column]: | 43 | elif value != self.value_map[column]: |
| 45 | self.force_add(column) | 44 | self.force_add(column) |
| 45 | del(self.value_map[column]) | ||
| 46 | |||
| 47 | def try_remove(self, column): | ||
| 48 | if column in self.rev_map: | ||
| 49 | idx = self.rev_map[column] | ||
| 50 | for value in self.col_list[idx+1:]: | ||
| 51 | self.rev_map[value] -= 1 | ||
| 52 | del(self.col_list[self.rev_map[column]]) | ||
| 53 | del(self.rev_map[column]) | ||
| 46 | 54 | ||
| 47 | def __str__(self): | 55 | def __str__(self): |
| 48 | return "<ColMap>%s" % (self.rev_map) | 56 | return "<ColMap>%s" % (self.rev_map) |
diff --git a/parse_exps.py b/parse_exps.py index 2d1c370..87d0783 100755 --- a/parse_exps.py +++ b/parse_exps.py | |||
| @@ -8,6 +8,7 @@ import parse.ft as ft | |||
| 8 | import parse.sched as st | 8 | import parse.sched as st |
| 9 | import re | 9 | import re |
| 10 | import shutil as sh | 10 | import shutil as sh |
| 11 | import sys | ||
| 11 | 12 | ||
| 12 | from collections import namedtuple | 13 | from collections import namedtuple |
| 13 | from common import load_params | 14 | from common import load_params |
| @@ -16,18 +17,20 @@ from parse.point import ExpPoint | |||
| 16 | from parse.tuple_table import ColMap,TupleTable | 17 | from parse.tuple_table import ColMap,TupleTable |
| 17 | 18 | ||
| 18 | def parse_args(): | 19 | def parse_args(): |
| 19 | # TODO: convert data-dir to proper option | 20 | # TODO: convert data-dir to proper option, clean 'dest' options |
| 20 | parser = OptionParser("usage: %prog [options] [data_dir]...") | 21 | parser = OptionParser("usage: %prog [options] [data_dir]...") |
| 21 | 22 | ||
| 22 | parser.add_option('-o', '--out', dest='out', | 23 | parser.add_option('-o', '--out', dest='out', |
| 23 | help='file or directory for data output', default='parse-data') | 24 | help='file or directory for data output', default='parse-data') |
| 24 | 25 | ||
| 25 | # TODO: this means nothing | 26 | # TODO: this means nothing, also remove dests |
| 26 | parser.add_option('-c', '--clean', action='store_true', default=False, | 27 | parser.add_option('-c', '--clean', action='store_true', default=False, |
| 27 | dest='clean', help='do not output single-point csvs') | 28 | dest='clean', help='do not output single-point csvs') |
| 28 | parser.add_option('-s', '--scale-against', dest='scale_against', | 29 | parser.add_option('-s', '--scale-against', dest='scale_against', |
| 29 | metavar='PARAM=VALUE', default="", | 30 | metavar='PARAM=VALUE', default="", |
| 30 | help='calculate task scaling factors against these configs') | 31 | help='calculate task scaling factors against these configs') |
| 32 | parser.add_option('-i', '--ignore', metavar='[PARAM...]', default="", | ||
| 33 | help='ignore changing parameter values') | ||
| 31 | parser.add_option('-f', '--force', action='store_true', default=False, | 34 | parser.add_option('-f', '--force', action='store_true', default=False, |
| 32 | dest='force', help='overwrite existing data') | 35 | dest='force', help='overwrite existing data') |
| 33 | parser.add_option('-v', '--verbose', action='store_true', default=False, | 36 | parser.add_option('-v', '--verbose', action='store_true', default=False, |
| @@ -38,7 +41,7 @@ def parse_args(): | |||
| 38 | 41 | ||
| 39 | return parser.parse_args() | 42 | return parser.parse_args() |
| 40 | 43 | ||
| 41 | ExpData = namedtuple('ExpData', ['name', 'params', 'data_files']) | 44 | ExpData = namedtuple('ExpData', ['name', 'params', 'data_files', 'is_base']) |
| 42 | DataFiles = namedtuple('DataFiles', ['ft','st']) | 45 | DataFiles = namedtuple('DataFiles', ['ft','st']) |
| 43 | 46 | ||
| 44 | def get_exp_params(data_dir, col_map): | 47 | def get_exp_params(data_dir, col_map): |
| @@ -63,7 +66,9 @@ def gen_exp_data(exp_dirs, base_conf, col_map, force): | |||
| 63 | plain_exps = [] | 66 | plain_exps = [] |
| 64 | scaling_bases = [] | 67 | scaling_bases = [] |
| 65 | 68 | ||
| 66 | for data_dir in exp_dirs: | 69 | sys.stderr.write("Generating data...\n") |
| 70 | |||
| 71 | for i, data_dir in enumerate(exp_dirs): | ||
| 67 | if not os.path.isdir(data_dir): | 72 | if not os.path.isdir(data_dir): |
| 68 | raise IOError("Invalid experiment '%s'" % os.path.abspath(data_dir)) | 73 | raise IOError("Invalid experiment '%s'" % os.path.abspath(data_dir)) |
| 69 | 74 | ||
| @@ -76,18 +81,30 @@ def gen_exp_data(exp_dirs, base_conf, col_map, force): | |||
| 76 | st_output = st.get_st_output(data_dir, tmp_dir, force) | 81 | st_output = st.get_st_output(data_dir, tmp_dir, force) |
| 77 | ft_output = ft.get_ft_output(data_dir, tmp_dir, force) | 82 | ft_output = ft.get_ft_output(data_dir, tmp_dir, force) |
| 78 | 83 | ||
| 79 | # Create experiment named after the data dir | ||
| 80 | exp_data = ExpData(data_dir, params, DataFiles(ft_output, st_output)) | ||
| 81 | 84 | ||
| 82 | if base_conf and base_conf.viewitems() & params.viewitems(): | 85 | if base_conf and base_conf.viewitems() & params.viewitems(): |
| 83 | if not st_output: | 86 | if not st_output: |
| 84 | raise Exception("Scaling base '%s' useless without sched data!" | 87 | raise Exception("Scaling base '%s' useless without sched data!" |
| 85 | % data_dir) | 88 | % data_dir) |
| 86 | params.pop(base_conf.keys()[0]) | 89 | is_base = True |
| 87 | scaling_bases += [exp_data] | 90 | |
| 91 | base_params = copy.deepcopy(params) | ||
| 92 | base_params.pop(base_conf.keys()[0]) | ||
| 93 | |||
| 94 | base_exp = ExpData(data_dir, base_params, | ||
| 95 | DataFiles(ft_output, st_output), True) | ||
| 96 | scaling_bases += [base_exp] | ||
| 88 | else: | 97 | else: |
| 89 | plain_exps += [exp_data] | 98 | is_base = False |
| 90 | 99 | ||
| 100 | # Create experiment named after the data dir | ||
| 101 | exp_data = ExpData(data_dir, params, | ||
| 102 | DataFiles(ft_output, st_output), is_base) | ||
| 103 | |||
| 104 | plain_exps += [exp_data] | ||
| 105 | |||
| 106 | sys.stderr.write('\r {0:.2%}'.format(float(i)/len(exp_dirs))) | ||
| 107 | sys.stderr.write('\n') | ||
| 91 | return (plain_exps, scaling_bases) | 108 | return (plain_exps, scaling_bases) |
| 92 | 109 | ||
| 93 | def main(): | 110 | def main(): |
| @@ -107,14 +124,20 @@ def main(): | |||
| 107 | raise IOError("Base column '%s' not present in any parameters!" % | 124 | raise IOError("Base column '%s' not present in any parameters!" % |
| 108 | base_conf.keys()[0]) | 125 | base_conf.keys()[0]) |
| 109 | 126 | ||
| 110 | base_table = TupleTable(col_map) # For tracking 'base' experiments | 127 | base_map = copy.deepcopy(col_map) |
| 111 | result_table = TupleTable(col_map) # For generating csv directories | 128 | if opts.ignore: |
| 129 | for param in opts.ignore.split(","): | ||
| 130 | col_map.try_remove(param) | ||
| 131 | |||
| 132 | base_table = TupleTable(base_map) # For tracking 'base' experiments | ||
| 133 | result_table = TupleTable(col_map) # For generating output | ||
| 112 | 134 | ||
| 113 | # Used to find matching scaling_base for each experiment | 135 | # Used to find matching scaling_base for each experiment |
| 114 | for base in scaling_bases: | 136 | for base in scaling_bases: |
| 115 | base_table.add_exp(base.params, base) | 137 | base_table.add_exp(base.params, base) |
| 116 | 138 | ||
| 117 | for exp in plain_exps: | 139 | sys.stderr.write("Parsing data...\n") |
| 140 | for i,exp in enumerate(plain_exps): | ||
| 118 | result = ExpPoint(exp.name) | 141 | result = ExpPoint(exp.name) |
| 119 | 142 | ||
| 120 | if exp.data_files.ft: | 143 | if exp.data_files.ft: |
| @@ -123,7 +146,7 @@ def main(): | |||
| 123 | 146 | ||
| 124 | if exp.data_files.st: | 147 | if exp.data_files.st: |
| 125 | base = None | 148 | base = None |
| 126 | if base_conf: | 149 | if base_conf and not exp.is_base: |
| 127 | # Try to find a scaling base | 150 | # Try to find a scaling base |
| 128 | base_params = copy.deepcopy(exp.params) | 151 | base_params = copy.deepcopy(exp.params) |
| 129 | base_params.pop(base_conf.keys()[0]) | 152 | base_params.pop(base_conf.keys()[0]) |
| @@ -137,12 +160,16 @@ def main(): | |||
| 137 | 160 | ||
| 138 | if opts.verbose: | 161 | if opts.verbose: |
| 139 | print(result) | 162 | print(result) |
| 163 | else: | ||
| 164 | sys.stderr.write('\r {0:.2%}'.format(float(i)/len(plain_exps))) | ||
| 165 | sys.stderr.write('\n') | ||
| 140 | 166 | ||
| 141 | if opts.force and os.path.exists(opts.out): | 167 | if opts.force and os.path.exists(opts.out): |
| 142 | sh.rmtree(opts.out) | 168 | sh.rmtree(opts.out) |
| 143 | 169 | ||
| 144 | result_table.reduce() | 170 | result_table.reduce() |
| 145 | 171 | ||
| 172 | sys.stderr.write("Writing result...\n") | ||
| 146 | if opts.write_map: | 173 | if opts.write_map: |
| 147 | # Write summarized results into map | 174 | # Write summarized results into map |
| 148 | result_table.write_map(opts.out) | 175 | result_table.write_map(opts.out) |
