diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2012-10-30 16:04:23 -0400 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2012-10-30 16:04:23 -0400 |
commit | 53cfcf10531256d0e4411a7e0bda431ec27f28e7 (patch) | |
tree | d9119ebe9658f4f41ab870811b6a89f6e9683cbc | |
parent | 2e804f8fa7d26755088e47357f8de6feb6dbe292 (diff) |
Process all non-scaling statistics for base experiments.
-rw-r--r-- | parse/ft.py | 2 | ||||
-rw-r--r-- | parse/sched.py | 13 | ||||
-rw-r--r-- | parse/tuple_table.py | 10 | ||||
-rwxr-xr-x | parse_exps.py | 53 |
4 files changed, 56 insertions, 22 deletions
diff --git a/parse/ft.py b/parse/ft.py index feb338f..4e310b0 100644 --- a/parse/ft.py +++ b/parse/ft.py | |||
@@ -20,7 +20,6 @@ def get_ft_output(data_dir, out_dir, force=False): | |||
20 | if force: | 20 | if force: |
21 | os.remove(output_file) | 21 | os.remove(output_file) |
22 | else: | 22 | else: |
23 | print("ft-output already exists for %s" % data_dir) | ||
24 | return output_file | 23 | return output_file |
25 | 24 | ||
26 | if len(bins) != 0: | 25 | if len(bins) != 0: |
@@ -41,7 +40,6 @@ def get_ft_output(data_dir, out_dir, force=False): | |||
41 | # Analyze will summarize those | 40 | # Analyze will summarize those |
42 | # todo pass in f | 41 | # todo pass in f |
43 | cmd_arr = [conf.BINS['analyze']] | 42 | cmd_arr = [conf.BINS['analyze']] |
44 | print("cmd arr: %s-%s" % (cmd_arr, bins)) | ||
45 | cmd_arr.extend(bins) | 43 | cmd_arr.extend(bins) |
46 | with open(output_file, "w") as f: | 44 | with open(output_file, "w") as f: |
47 | subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file) | 45 | subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file) |
diff --git a/parse/sched.py b/parse/sched.py index 80764b6..bbf6e10 100644 --- a/parse/sched.py +++ b/parse/sched.py | |||
@@ -11,6 +11,7 @@ import os | |||
11 | import re | 11 | import re |
12 | import numpy as np | 12 | import numpy as np |
13 | import subprocess | 13 | import subprocess |
14 | import pprint | ||
14 | 15 | ||
15 | from collections import namedtuple,defaultdict | 16 | from collections import namedtuple,defaultdict |
16 | from operator import methodcaller | 17 | from operator import methodcaller |
@@ -54,6 +55,7 @@ class LeveledArray(object): | |||
54 | def add(self, task, value): | 55 | def add(self, task, value): |
55 | self.vals[task.config.level] += [value] | 56 | self.vals[task.config.level] += [value] |
56 | 57 | ||
58 | |||
57 | def write_measurements(self, result): | 59 | def write_measurements(self, result): |
58 | for level, arr in self.vals.iteritems(): | 60 | for level, arr in self.vals.iteritems(): |
59 | name = "%s%s" % ("%s-" % level if level else "", self.name) | 61 | name = "%s%s" % ("%s-" % level if level else "", self.name) |
@@ -72,7 +74,6 @@ def get_st_output(data_dir, out_dir, force=False): | |||
72 | if force: | 74 | if force: |
73 | os.remove(output_file) | 75 | os.remove(output_file) |
74 | else: | 76 | else: |
75 | print("st-output already exists for %s" % data_dir) | ||
76 | return output_file | 77 | return output_file |
77 | 78 | ||
78 | if len(bins) != 0: | 79 | if len(bins) != 0: |
@@ -195,8 +196,11 @@ def extract_variance(task_dict, data, exp_point): | |||
195 | completions[pid] += [duration] | 196 | completions[pid] += [duration] |
196 | 197 | ||
197 | for pid, durations in completions.iteritems(): | 198 | for pid, durations in completions.iteritems(): |
199 | m = Measurement(pid).from_array(durations) | ||
200 | |||
198 | # TODO: not this, please | 201 | # TODO: not this, please |
199 | task_dict[pid].run.append(Measurement(pid).from_array(durations)) | 202 | if not task_dict[pid].run: |
203 | task_dict[pid].run.append(m) | ||
200 | 204 | ||
201 | job_times = np.array(durations) | 205 | job_times = np.array(durations) |
202 | mean = job_times.mean() | 206 | mean = job_times.mean() |
@@ -210,6 +214,7 @@ def extract_variance(task_dict, data, exp_point): | |||
210 | corrected = (1 + 1/(4 * len(job_times))) * cv | 214 | corrected = (1 + 1/(4 * len(job_times))) * cv |
211 | 215 | ||
212 | varz.add(task_dict[pid], corrected) | 216 | varz.add(task_dict[pid], corrected) |
217 | # varz.add(task_dict[pid], m[Type.Var]) | ||
213 | 218 | ||
214 | if exp_point: | 219 | if exp_point: |
215 | map(methodcaller('write_measurements', exp_point), | 220 | map(methodcaller('write_measurements', exp_point), |
@@ -272,17 +277,13 @@ def extract_scaling_data(task_dict, data, result, base_file): | |||
272 | for data_stat, base_stat in zip(data_stats[config],base_stats[config]): | 277 | for data_stat, base_stat in zip(data_stats[config],base_stats[config]): |
273 | if not base_stat[Type.Avg] or not base_stat[Type.Max] or \ | 278 | if not base_stat[Type.Avg] or not base_stat[Type.Max] or \ |
274 | not data_stat[Type.Avg] or not data_stat[Type.Max]: | 279 | not data_stat[Type.Avg] or not data_stat[Type.Max]: |
275 | print("missing a thing: {},{}".format(base_stat, data_stat)) | ||
276 | continue | 280 | continue |
277 | # How much larger is their exec stat than ours? | 281 | # How much larger is their exec stat than ours? |
278 | print("%s vs %s" % (base_stat, data_stat)) | ||
279 | avg_scale = float(base_stat[Type.Avg]) / float(data_stat[Type.Avg]) | 282 | avg_scale = float(base_stat[Type.Avg]) / float(data_stat[Type.Avg]) |
280 | max_scale = float(base_stat[Type.Max]) / float(data_stat[Type.Max]) | 283 | max_scale = float(base_stat[Type.Max]) / float(data_stat[Type.Max]) |
281 | 284 | ||
282 | task = task_dict[data_stat.id] | 285 | task = task_dict[data_stat.id] |
283 | 286 | ||
284 | print("scaling for %s" % data_stat.id) | ||
285 | |||
286 | avg_scales.add(task, avg_scale) | 287 | avg_scales.add(task, avg_scale) |
287 | max_scales.add(task, max_scale) | 288 | max_scales.add(task, max_scale) |
288 | 289 | ||
diff --git a/parse/tuple_table.py b/parse/tuple_table.py index 465abb3..e5dc39b 100644 --- a/parse/tuple_table.py +++ b/parse/tuple_table.py | |||
@@ -21,7 +21,6 @@ class ColMap(object): | |||
21 | key += (None,) | 21 | key += (None,) |
22 | else: | 22 | else: |
23 | key += (kv[col],) | 23 | key += (kv[col],) |
24 | |||
25 | return key | 24 | return key |
26 | 25 | ||
27 | def __contains__(self, col): | 26 | def __contains__(self, col): |
@@ -43,6 +42,15 @@ class ColMap(object): | |||
43 | self.value_map[column] = value | 42 | self.value_map[column] = value |
44 | elif value != self.value_map[column]: | 43 | elif value != self.value_map[column]: |
45 | self.force_add(column) | 44 | self.force_add(column) |
45 | del(self.value_map[column]) | ||
46 | |||
47 | def try_remove(self, column): | ||
48 | if column in self.rev_map: | ||
49 | idx = self.rev_map[column] | ||
50 | for value in self.col_list[idx+1:]: | ||
51 | self.rev_map[value] -= 1 | ||
52 | del(self.col_list[self.rev_map[column]]) | ||
53 | del(self.rev_map[column]) | ||
46 | 54 | ||
47 | def __str__(self): | 55 | def __str__(self): |
48 | return "<ColMap>%s" % (self.rev_map) | 56 | return "<ColMap>%s" % (self.rev_map) |
diff --git a/parse_exps.py b/parse_exps.py index 2d1c370..87d0783 100755 --- a/parse_exps.py +++ b/parse_exps.py | |||
@@ -8,6 +8,7 @@ import parse.ft as ft | |||
8 | import parse.sched as st | 8 | import parse.sched as st |
9 | import re | 9 | import re |
10 | import shutil as sh | 10 | import shutil as sh |
11 | import sys | ||
11 | 12 | ||
12 | from collections import namedtuple | 13 | from collections import namedtuple |
13 | from common import load_params | 14 | from common import load_params |
@@ -16,18 +17,20 @@ from parse.point import ExpPoint | |||
16 | from parse.tuple_table import ColMap,TupleTable | 17 | from parse.tuple_table import ColMap,TupleTable |
17 | 18 | ||
18 | def parse_args(): | 19 | def parse_args(): |
19 | # TODO: convert data-dir to proper option | 20 | # TODO: convert data-dir to proper option, clean 'dest' options |
20 | parser = OptionParser("usage: %prog [options] [data_dir]...") | 21 | parser = OptionParser("usage: %prog [options] [data_dir]...") |
21 | 22 | ||
22 | parser.add_option('-o', '--out', dest='out', | 23 | parser.add_option('-o', '--out', dest='out', |
23 | help='file or directory for data output', default='parse-data') | 24 | help='file or directory for data output', default='parse-data') |
24 | 25 | ||
25 | # TODO: this means nothing | 26 | # TODO: this means nothing, also remove dests |
26 | parser.add_option('-c', '--clean', action='store_true', default=False, | 27 | parser.add_option('-c', '--clean', action='store_true', default=False, |
27 | dest='clean', help='do not output single-point csvs') | 28 | dest='clean', help='do not output single-point csvs') |
28 | parser.add_option('-s', '--scale-against', dest='scale_against', | 29 | parser.add_option('-s', '--scale-against', dest='scale_against', |
29 | metavar='PARAM=VALUE', default="", | 30 | metavar='PARAM=VALUE', default="", |
30 | help='calculate task scaling factors against these configs') | 31 | help='calculate task scaling factors against these configs') |
32 | parser.add_option('-i', '--ignore', metavar='[PARAM...]', default="", | ||
33 | help='ignore changing parameter values') | ||
31 | parser.add_option('-f', '--force', action='store_true', default=False, | 34 | parser.add_option('-f', '--force', action='store_true', default=False, |
32 | dest='force', help='overwrite existing data') | 35 | dest='force', help='overwrite existing data') |
33 | parser.add_option('-v', '--verbose', action='store_true', default=False, | 36 | parser.add_option('-v', '--verbose', action='store_true', default=False, |
@@ -38,7 +41,7 @@ def parse_args(): | |||
38 | 41 | ||
39 | return parser.parse_args() | 42 | return parser.parse_args() |
40 | 43 | ||
41 | ExpData = namedtuple('ExpData', ['name', 'params', 'data_files']) | 44 | ExpData = namedtuple('ExpData', ['name', 'params', 'data_files', 'is_base']) |
42 | DataFiles = namedtuple('DataFiles', ['ft','st']) | 45 | DataFiles = namedtuple('DataFiles', ['ft','st']) |
43 | 46 | ||
44 | def get_exp_params(data_dir, col_map): | 47 | def get_exp_params(data_dir, col_map): |
@@ -63,7 +66,9 @@ def gen_exp_data(exp_dirs, base_conf, col_map, force): | |||
63 | plain_exps = [] | 66 | plain_exps = [] |
64 | scaling_bases = [] | 67 | scaling_bases = [] |
65 | 68 | ||
66 | for data_dir in exp_dirs: | 69 | sys.stderr.write("Generating data...\n") |
70 | |||
71 | for i, data_dir in enumerate(exp_dirs): | ||
67 | if not os.path.isdir(data_dir): | 72 | if not os.path.isdir(data_dir): |
68 | raise IOError("Invalid experiment '%s'" % os.path.abspath(data_dir)) | 73 | raise IOError("Invalid experiment '%s'" % os.path.abspath(data_dir)) |
69 | 74 | ||
@@ -76,18 +81,30 @@ def gen_exp_data(exp_dirs, base_conf, col_map, force): | |||
76 | st_output = st.get_st_output(data_dir, tmp_dir, force) | 81 | st_output = st.get_st_output(data_dir, tmp_dir, force) |
77 | ft_output = ft.get_ft_output(data_dir, tmp_dir, force) | 82 | ft_output = ft.get_ft_output(data_dir, tmp_dir, force) |
78 | 83 | ||
79 | # Create experiment named after the data dir | ||
80 | exp_data = ExpData(data_dir, params, DataFiles(ft_output, st_output)) | ||
81 | 84 | ||
82 | if base_conf and base_conf.viewitems() & params.viewitems(): | 85 | if base_conf and base_conf.viewitems() & params.viewitems(): |
83 | if not st_output: | 86 | if not st_output: |
84 | raise Exception("Scaling base '%s' useless without sched data!" | 87 | raise Exception("Scaling base '%s' useless without sched data!" |
85 | % data_dir) | 88 | % data_dir) |
86 | params.pop(base_conf.keys()[0]) | 89 | is_base = True |
87 | scaling_bases += [exp_data] | 90 | |
91 | base_params = copy.deepcopy(params) | ||
92 | base_params.pop(base_conf.keys()[0]) | ||
93 | |||
94 | base_exp = ExpData(data_dir, base_params, | ||
95 | DataFiles(ft_output, st_output), True) | ||
96 | scaling_bases += [base_exp] | ||
88 | else: | 97 | else: |
89 | plain_exps += [exp_data] | 98 | is_base = False |
90 | 99 | ||
100 | # Create experiment named after the data dir | ||
101 | exp_data = ExpData(data_dir, params, | ||
102 | DataFiles(ft_output, st_output), is_base) | ||
103 | |||
104 | plain_exps += [exp_data] | ||
105 | |||
106 | sys.stderr.write('\r {0:.2%}'.format(float(i)/len(exp_dirs))) | ||
107 | sys.stderr.write('\n') | ||
91 | return (plain_exps, scaling_bases) | 108 | return (plain_exps, scaling_bases) |
92 | 109 | ||
93 | def main(): | 110 | def main(): |
@@ -107,14 +124,20 @@ def main(): | |||
107 | raise IOError("Base column '%s' not present in any parameters!" % | 124 | raise IOError("Base column '%s' not present in any parameters!" % |
108 | base_conf.keys()[0]) | 125 | base_conf.keys()[0]) |
109 | 126 | ||
110 | base_table = TupleTable(col_map) # For tracking 'base' experiments | 127 | base_map = copy.deepcopy(col_map) |
111 | result_table = TupleTable(col_map) # For generating csv directories | 128 | if opts.ignore: |
129 | for param in opts.ignore.split(","): | ||
130 | col_map.try_remove(param) | ||
131 | |||
132 | base_table = TupleTable(base_map) # For tracking 'base' experiments | ||
133 | result_table = TupleTable(col_map) # For generating output | ||
112 | 134 | ||
113 | # Used to find matching scaling_base for each experiment | 135 | # Used to find matching scaling_base for each experiment |
114 | for base in scaling_bases: | 136 | for base in scaling_bases: |
115 | base_table.add_exp(base.params, base) | 137 | base_table.add_exp(base.params, base) |
116 | 138 | ||
117 | for exp in plain_exps: | 139 | sys.stderr.write("Parsing data...\n") |
140 | for i,exp in enumerate(plain_exps): | ||
118 | result = ExpPoint(exp.name) | 141 | result = ExpPoint(exp.name) |
119 | 142 | ||
120 | if exp.data_files.ft: | 143 | if exp.data_files.ft: |
@@ -123,7 +146,7 @@ def main(): | |||
123 | 146 | ||
124 | if exp.data_files.st: | 147 | if exp.data_files.st: |
125 | base = None | 148 | base = None |
126 | if base_conf: | 149 | if base_conf and not exp.is_base: |
127 | # Try to find a scaling base | 150 | # Try to find a scaling base |
128 | base_params = copy.deepcopy(exp.params) | 151 | base_params = copy.deepcopy(exp.params) |
129 | base_params.pop(base_conf.keys()[0]) | 152 | base_params.pop(base_conf.keys()[0]) |
@@ -137,12 +160,16 @@ def main(): | |||
137 | 160 | ||
138 | if opts.verbose: | 161 | if opts.verbose: |
139 | print(result) | 162 | print(result) |
163 | else: | ||
164 | sys.stderr.write('\r {0:.2%}'.format(float(i)/len(plain_exps))) | ||
165 | sys.stderr.write('\n') | ||
140 | 166 | ||
141 | if opts.force and os.path.exists(opts.out): | 167 | if opts.force and os.path.exists(opts.out): |
142 | sh.rmtree(opts.out) | 168 | sh.rmtree(opts.out) |
143 | 169 | ||
144 | result_table.reduce() | 170 | result_table.reduce() |
145 | 171 | ||
172 | sys.stderr.write("Writing result...\n") | ||
146 | if opts.write_map: | 173 | if opts.write_map: |
147 | # Write summarized results into map | 174 | # Write summarized results into map |
148 | result_table.write_map(opts.out) | 175 | result_table.write_map(opts.out) |