Process all non-scaling statistics for base experiments.

author: Jonathan Herman <hermanjl@cs.unc.edu> 2012-10-30 16:04:23 -0400
committer: Jonathan Herman <hermanjl@cs.unc.edu> 2012-10-30 16:04:23 -0400
commit: 53cfcf10531256d0e4411a7e0bda431ec27f28e7 (patch)
tree: d9119ebe9658f4f41ab870811b6a89f6e9683cbc
parent: 2e804f8fa7d26755088e47357f8de6feb6dbe292 (diff)
4 files changed, 56 insertions, 22 deletions
diff --git a/parse/ft.py b/parse/ft.py
index feb338f..4e310b0 100644
--- a/parse/ft.py
+++ b/parse/ft.py
@@ -20,7 +20,6 @@ def get_ft_output(data_dir, out_dir, force=False):
        if force:
            os.remove(output_file)
        else:
-            print("ft-output already exists for %s" % data_dir)
            return output_file
    if len(bins) != 0:
@@ -41,7 +40,6 @@ def get_ft_output(data_dir, out_dir, force=False):
        # Analyze will summarize those
        # todo pass in f
        cmd_arr = [conf.BINS['analyze']]
-        print("cmd arr: %s-%s" % (cmd_arr, bins))
        cmd_arr.extend(bins)
        with open(output_file, "w") as f:
            subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file)
diff --git a/parse/sched.py b/parse/sched.py
index 80764b6..bbf6e10 100644
--- a/parse/sched.py
+++ b/parse/sched.py
@@ -11,6 +11,7 @@ import os
 import re
 import numpy as np
 import subprocess
+import pprint
 from collections import namedtuple,defaultdict
 from operator import methodcaller
@@ -54,6 +55,7 @@ class LeveledArray(object):
    def add(self, task, value):
        self.vals[task.config.level] += [value]
    def write_measurements(self, result):
        for level, arr in self.vals.iteritems():
            name = "%s%s" % ("%s-" % level if level else "", self.name)
@@ -72,7 +74,6 @@ def get_st_output(data_dir, out_dir, force=False):
        if force:
            os.remove(output_file)
        else:
-            print("st-output already exists for %s" % data_dir)
            return output_file
    if len(bins) != 0:
@@ -195,8 +196,11 @@ def extract_variance(task_dict, data, exp_point):
        completions[pid] += [duration]
    for pid, durations in completions.iteritems():
+        m = Measurement(pid).from_array(durations)
        # TODO: not this, please
-        task_dict[pid].run.append(Measurement(pid).from_array(durations))
+        if not task_dict[pid].run:
+            task_dict[pid].run.append(m)
        job_times = np.array(durations)
        mean = job_times.mean()
@@ -210,6 +214,7 @@ def extract_variance(task_dict, data, exp_point):
        corrected = (1 + 1/(4 * len(job_times))) * cv
        varz.add(task_dict[pid], corrected)
+        # varz.add(task_dict[pid], m[Type.Var])
    if exp_point:
        map(methodcaller('write_measurements', exp_point),
@@ -272,17 +277,13 @@ def extract_scaling_data(task_dict, data, result, base_file):
        for data_stat, base_stat in zip(data_stats[config],base_stats[config]):
            if not base_stat[Type.Avg] or not base_stat[Type.Max] or \
               not data_stat[Type.Avg] or not data_stat[Type.Max]:
-               print("missing a thing: {},{}".format(base_stat, data_stat))
               continue
            # How much larger is their exec stat than ours?
-            print("%s vs %s" % (base_stat, data_stat))
            avg_scale = float(base_stat[Type.Avg]) / float(data_stat[Type.Avg])
            max_scale = float(base_stat[Type.Max]) / float(data_stat[Type.Max])
            task = task_dict[data_stat.id]
-            print("scaling for %s" % data_stat.id)
            avg_scales.add(task, avg_scale)
            max_scales.add(task, max_scale)
diff --git a/parse/tuple_table.py b/parse/tuple_table.py
index 465abb3..e5dc39b 100644
--- a/parse/tuple_table.py
+++ b/parse/tuple_table.py
@@ -21,7 +21,6 @@ class ColMap(object):
                key += (None,)
            else:
                key += (kv[col],)
        return key
    def __contains__(self, col):
@@ -43,6 +42,15 @@ class ColMap(object):
                self.value_map[column] = value
            elif value != self.value_map[column]:
                self.force_add(column)
+                del(self.value_map[column])
+    def try_remove(self, column):
+        if column in self.rev_map:
+            idx = self.rev_map[column]
+            for value in self.col_list[idx+1:]:
+                self.rev_map[value] -= 1
+            del(self.col_list[self.rev_map[column]])
+            del(self.rev_map[column])
    def __str__(self):
        return "<ColMap>%s" % (self.rev_map)
diff --git a/parse_exps.py b/parse_exps.py
index 2d1c370..87d0783 100755
--- a/parse_exps.py
+++ b/parse_exps.py
@@ -8,6 +8,7 @@ import parse.ft as ft
 import parse.sched as st
 import re
 import shutil as sh
+import sys
 from collections import namedtuple
 from common import load_params
@@ -16,18 +17,20 @@ from parse.point import ExpPoint
 from parse.tuple_table import ColMap,TupleTable
 def parse_args():
-    # TODO: convert data-dir to proper option
+    # TODO: convert data-dir to proper option, clean 'dest' options
    parser = OptionParser("usage: %prog [options] [data_dir]...")
    parser.add_option('-o', '--out', dest='out',
                      help='file or directory for data output', default='parse-data')
-    # TODO: this means nothing
+    # TODO: this means nothing, also remove dests
    parser.add_option('-c', '--clean', action='store_true', default=False,
                      dest='clean', help='do not output single-point csvs')
    parser.add_option('-s', '--scale-against', dest='scale_against',
                      metavar='PARAM=VALUE', default="",
                      help='calculate task scaling factors against these configs')
+    parser.add_option('-i', '--ignore', metavar='[PARAM...]', default="",
+                      help='ignore changing parameter values')
    parser.add_option('-f', '--force', action='store_true', default=False,
                      dest='force', help='overwrite existing data')
    parser.add_option('-v', '--verbose', action='store_true', default=False,
@@ -38,7 +41,7 @@ def parse_args():
    return parser.parse_args()
-ExpData   = namedtuple('ExpData', ['name', 'params', 'data_files'])
+ExpData   = namedtuple('ExpData', ['name', 'params', 'data_files', 'is_base'])
 DataFiles = namedtuple('DataFiles', ['ft','st'])
 def get_exp_params(data_dir, col_map):
@@ -63,7 +66,9 @@ def gen_exp_data(exp_dirs, base_conf, col_map, force):
    plain_exps = []
    scaling_bases  = []
-    for data_dir in exp_dirs:
+    sys.stderr.write("Generating data...\n")
+    for i, data_dir in enumerate(exp_dirs):
        if not os.path.isdir(data_dir):
            raise IOError("Invalid experiment '%s'" % os.path.abspath(data_dir))
@@ -76,18 +81,30 @@ def gen_exp_data(exp_dirs, base_conf, col_map, force):
        st_output = st.get_st_output(data_dir, tmp_dir, force)
        ft_output = ft.get_ft_output(data_dir, tmp_dir, force)
-        # Create experiment named after the data dir
-        exp_data = ExpData(data_dir, params, DataFiles(ft_output, st_output))
        if base_conf and base_conf.viewitems() & params.viewitems():
            if not st_output:
                raise Exception("Scaling base '%s' useless without sched data!"
                                % data_dir)
-            params.pop(base_conf.keys()[0])
+            is_base = True
-            scaling_bases += [exp_data]
+            base_params = copy.deepcopy(params)
+            base_params.pop(base_conf.keys()[0])
+            base_exp = ExpData(data_dir, base_params,
+                               DataFiles(ft_output, st_output), True)
+            scaling_bases += [base_exp]
        else:
-            plain_exps += [exp_data]
+            is_base = False
+        # Create experiment named after the data dir
+        exp_data = ExpData(data_dir, params,
+                           DataFiles(ft_output, st_output), is_base)
+        plain_exps += [exp_data]
+        sys.stderr.write('\r {0:.2%}'.format(float(i)/len(exp_dirs)))
+    sys.stderr.write('\n')
    return (plain_exps, scaling_bases)
 def main():
@@ -107,14 +124,20 @@ def main():
        raise IOError("Base column '%s' not present in any parameters!" %
                      base_conf.keys()[0])
-    base_table = TupleTable(col_map) # For tracking 'base' experiments
+    base_map = copy.deepcopy(col_map)
-    result_table  = TupleTable(col_map) # For generating csv directories
+    if opts.ignore:
+        for param in opts.ignore.split(","):
+            col_map.try_remove(param)
+    base_table   = TupleTable(base_map) # For tracking 'base' experiments
+    result_table = TupleTable(col_map)  # For generating output
    # Used to find matching scaling_base for each experiment
    for base in scaling_bases:
        base_table.add_exp(base.params, base)
-    for exp in plain_exps:
+    sys.stderr.write("Parsing data...\n")
+    for i,exp in enumerate(plain_exps):
        result = ExpPoint(exp.name)
        if exp.data_files.ft:
@@ -123,7 +146,7 @@ def main():
        if exp.data_files.st:
            base = None
-            if base_conf:
+            if base_conf and not exp.is_base:
                # Try to find a scaling base
                base_params = copy.deepcopy(exp.params)
                base_params.pop(base_conf.keys()[0])
@@ -137,12 +160,16 @@ def main():
        if opts.verbose:
            print(result)
+        else:
+            sys.stderr.write('\r {0:.2%}'.format(float(i)/len(plain_exps)))
+    sys.stderr.write('\n')
    if opts.force and os.path.exists(opts.out):
        sh.rmtree(opts.out)
    result_table.reduce()
+    sys.stderr.write("Writing result...\n")
    if opts.write_map:
        # Write summarized results into map
        result_table.write_map(opts.out)
author	Jonathan Herman <hermanjl@cs.unc.edu>	2012-10-30 16:04:23 -0400
committer	Jonathan Herman <hermanjl@cs.unc.edu>	2012-10-30 16:04:23 -0400
commit	53cfcf10531256d0e4411a7e0bda431ec27f28e7 (patch)
tree	d9119ebe9658f4f41ab870811b6a89f6e9683cbc
parent	2e804f8fa7d26755088e47357f8de6feb6dbe292 (diff)

diff --git a/parse/ft.py b/parse/ft.py index feb338f..4e310b0 100644 --- a/parse/ft.py +++ b/parse/ft.py
@@ -20,7 +20,6 @@ def get_ft_output(data_dir, out_dir, force=False):
20	if force:	20	if force:
21	os.remove(output_file)	21	os.remove(output_file)
22	else:	22	else:
23	print("ft-output already exists for %s" % data_dir)
24	return output_file	23	return output_file
25		24
26	if len(bins) != 0:	25	if len(bins) != 0:
@@ -41,7 +40,6 @@ def get_ft_output(data_dir, out_dir, force=False):
41	# Analyze will summarize those	40	# Analyze will summarize those
42	# todo pass in f	41	# todo pass in f
43	cmd_arr = [conf.BINS['analyze']]	42	cmd_arr = [conf.BINS['analyze']]
44	print("cmd arr: %s-%s" % (cmd_arr, bins))
45	cmd_arr.extend(bins)	43	cmd_arr.extend(bins)
46	with open(output_file, "w") as f:	44	with open(output_file, "w") as f:
47	subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file)	45	subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file)


diff --git a/parse/sched.py b/parse/sched.py index 80764b6..bbf6e10 100644 --- a/parse/sched.py +++ b/parse/sched.py
@@ -11,6 +11,7 @@ import os
11	import re	11	import re
12	import numpy as np	12	import numpy as np
13	import subprocess	13	import subprocess
		14	import pprint
14		15
15	from collections import namedtuple,defaultdict	16	from collections import namedtuple,defaultdict
16	from operator import methodcaller	17	from operator import methodcaller
@@ -54,6 +55,7 @@ class LeveledArray(object):
54	def add(self, task, value):	55	def add(self, task, value):
55	self.vals[task.config.level] += [value]	56	self.vals[task.config.level] += [value]
56		57
		58
57	def write_measurements(self, result):	59	def write_measurements(self, result):
58	for level, arr in self.vals.iteritems():	60	for level, arr in self.vals.iteritems():
59	name = "%s%s" % ("%s-" % level if level else "", self.name)	61	name = "%s%s" % ("%s-" % level if level else "", self.name)
@@ -72,7 +74,6 @@ def get_st_output(data_dir, out_dir, force=False):
72	if force:	74	if force:
73	os.remove(output_file)	75	os.remove(output_file)
74	else:	76	else:
75	print("st-output already exists for %s" % data_dir)
76	return output_file	77	return output_file
77		78
78	if len(bins) != 0:	79	if len(bins) != 0:
@@ -195,8 +196,11 @@ def extract_variance(task_dict, data, exp_point):
195	completions[pid] += [duration]	196	completions[pid] += [duration]
196		197
197	for pid, durations in completions.iteritems():	198	for pid, durations in completions.iteritems():
		199	m = Measurement(pid).from_array(durations)
		200
198	# TODO: not this, please	201	# TODO: not this, please
199	task_dict[pid].run.append(Measurement(pid).from_array(durations))	202	if not task_dict[pid].run:
		203	task_dict[pid].run.append(m)
200		204
201	job_times = np.array(durations)	205	job_times = np.array(durations)
202	mean = job_times.mean()	206	mean = job_times.mean()
@@ -210,6 +214,7 @@ def extract_variance(task_dict, data, exp_point):
210	corrected = (1 + 1/(4 * len(job_times))) * cv	214	corrected = (1 + 1/(4 * len(job_times))) * cv
211		215
212	varz.add(task_dict[pid], corrected)	216	varz.add(task_dict[pid], corrected)
		217	# varz.add(task_dict[pid], m[Type.Var])
213		218
214	if exp_point:	219	if exp_point:
215	map(methodcaller('write_measurements', exp_point),	220	map(methodcaller('write_measurements', exp_point),
@@ -272,17 +277,13 @@ def extract_scaling_data(task_dict, data, result, base_file):
272	for data_stat, base_stat in zip(data_stats[config],base_stats[config]):	277	for data_stat, base_stat in zip(data_stats[config],base_stats[config]):
273	if not base_stat[Type.Avg] or not base_stat[Type.Max] or \	278	if not base_stat[Type.Avg] or not base_stat[Type.Max] or \
274	not data_stat[Type.Avg] or not data_stat[Type.Max]:	279	not data_stat[Type.Avg] or not data_stat[Type.Max]:
275	print("missing a thing: {},{}".format(base_stat, data_stat))
276	continue	280	continue
277	# How much larger is their exec stat than ours?	281	# How much larger is their exec stat than ours?
278	print("%s vs %s" % (base_stat, data_stat))
279	avg_scale = float(base_stat[Type.Avg]) / float(data_stat[Type.Avg])	282	avg_scale = float(base_stat[Type.Avg]) / float(data_stat[Type.Avg])
280	max_scale = float(base_stat[Type.Max]) / float(data_stat[Type.Max])	283	max_scale = float(base_stat[Type.Max]) / float(data_stat[Type.Max])
281		284
282	task = task_dict[data_stat.id]	285	task = task_dict[data_stat.id]
283		286
284	print("scaling for %s" % data_stat.id)
285
286	avg_scales.add(task, avg_scale)	287	avg_scales.add(task, avg_scale)
287	max_scales.add(task, max_scale)	288	max_scales.add(task, max_scale)
288		289


diff --git a/parse/tuple_table.py b/parse/tuple_table.py index 465abb3..e5dc39b 100644 --- a/parse/tuple_table.py +++ b/parse/tuple_table.py
@@ -21,7 +21,6 @@ class ColMap(object):
21	key += (None,)	21	key += (None,)
22	else:	22	else:
23	key += (kv[col],)	23	key += (kv[col],)
24
25	return key	24	return key
26		25
27	def __contains__(self, col):	26	def __contains__(self, col):
@@ -43,6 +42,15 @@ class ColMap(object):
43	self.value_map[column] = value	42	self.value_map[column] = value
44	elif value != self.value_map[column]:	43	elif value != self.value_map[column]:
45	self.force_add(column)	44	self.force_add(column)
		45	del(self.value_map[column])
		46
		47	def try_remove(self, column):
		48	if column in self.rev_map:
		49	idx = self.rev_map[column]
		50	for value in self.col_list[idx+1:]:
		51	self.rev_map[value] -= 1
		52	del(self.col_list[self.rev_map[column]])
		53	del(self.rev_map[column])
46		54
47	def __str__(self):	55	def __str__(self):
48	return "<ColMap>%s" % (self.rev_map)	56	return "<ColMap>%s" % (self.rev_map)


diff --git a/parse_exps.py b/parse_exps.py index 2d1c370..87d0783 100755 --- a/parse_exps.py +++ b/parse_exps.py
@@ -8,6 +8,7 @@ import parse.ft as ft
8	import parse.sched as st	8	import parse.sched as st
9	import re	9	import re
10	import shutil as sh	10	import shutil as sh
		11	import sys
11		12
12	from collections import namedtuple	13	from collections import namedtuple
13	from common import load_params	14	from common import load_params
@@ -16,18 +17,20 @@ from parse.point import ExpPoint
16	from parse.tuple_table import ColMap,TupleTable	17	from parse.tuple_table import ColMap,TupleTable
17		18
18	def parse_args():	19	def parse_args():
19	# TODO: convert data-dir to proper option	20	# TODO: convert data-dir to proper option, clean 'dest' options
20	parser = OptionParser("usage: %prog [options] [data_dir]...")	21	parser = OptionParser("usage: %prog [options] [data_dir]...")
21		22
22	parser.add_option('-o', '--out', dest='out',	23	parser.add_option('-o', '--out', dest='out',
23	help='file or directory for data output', default='parse-data')	24	help='file or directory for data output', default='parse-data')
24		25
25	# TODO: this means nothing	26	# TODO: this means nothing, also remove dests
26	parser.add_option('-c', '--clean', action='store_true', default=False,	27	parser.add_option('-c', '--clean', action='store_true', default=False,
27	dest='clean', help='do not output single-point csvs')	28	dest='clean', help='do not output single-point csvs')
28	parser.add_option('-s', '--scale-against', dest='scale_against',	29	parser.add_option('-s', '--scale-against', dest='scale_against',
29	metavar='PARAM=VALUE', default="",	30	metavar='PARAM=VALUE', default="",
30	help='calculate task scaling factors against these configs')	31	help='calculate task scaling factors against these configs')
		32	parser.add_option('-i', '--ignore', metavar='[PARAM...]', default="",
		33	help='ignore changing parameter values')
31	parser.add_option('-f', '--force', action='store_true', default=False,	34	parser.add_option('-f', '--force', action='store_true', default=False,
32	dest='force', help='overwrite existing data')	35	dest='force', help='overwrite existing data')
33	parser.add_option('-v', '--verbose', action='store_true', default=False,	36	parser.add_option('-v', '--verbose', action='store_true', default=False,
@@ -38,7 +41,7 @@ def parse_args():
38		41
39	return parser.parse_args()	42	return parser.parse_args()
40		43
41	ExpData = namedtuple('ExpData', ['name', 'params', 'data_files'])	44	ExpData = namedtuple('ExpData', ['name', 'params', 'data_files', 'is_base'])
42	DataFiles = namedtuple('DataFiles', ['ft','st'])	45	DataFiles = namedtuple('DataFiles', ['ft','st'])
43		46
44	def get_exp_params(data_dir, col_map):	47	def get_exp_params(data_dir, col_map):
@@ -63,7 +66,9 @@ def gen_exp_data(exp_dirs, base_conf, col_map, force):
63	plain_exps = []	66	plain_exps = []
64	scaling_bases = []	67	scaling_bases = []
65		68
66	for data_dir in exp_dirs:	69	sys.stderr.write("Generating data...\n")
		70
		71	for i, data_dir in enumerate(exp_dirs):
67	if not os.path.isdir(data_dir):	72	if not os.path.isdir(data_dir):
68	raise IOError("Invalid experiment '%s'" % os.path.abspath(data_dir))	73	raise IOError("Invalid experiment '%s'" % os.path.abspath(data_dir))
69		74
@@ -76,18 +81,30 @@ def gen_exp_data(exp_dirs, base_conf, col_map, force):
76	st_output = st.get_st_output(data_dir, tmp_dir, force)	81	st_output = st.get_st_output(data_dir, tmp_dir, force)
77	ft_output = ft.get_ft_output(data_dir, tmp_dir, force)	82	ft_output = ft.get_ft_output(data_dir, tmp_dir, force)
78		83
79	# Create experiment named after the data dir
80	exp_data = ExpData(data_dir, params, DataFiles(ft_output, st_output))
81		84
82	if base_conf and base_conf.viewitems() & params.viewitems():	85	if base_conf and base_conf.viewitems() & params.viewitems():
83	if not st_output:	86	if not st_output:
84	raise Exception("Scaling base '%s' useless without sched data!"	87	raise Exception("Scaling base '%s' useless without sched data!"
85	% data_dir)	88	% data_dir)
86	params.pop(base_conf.keys()[0])	89	is_base = True
87	scaling_bases += [exp_data]	90
		91	base_params = copy.deepcopy(params)
		92	base_params.pop(base_conf.keys()[0])
		93
		94	base_exp = ExpData(data_dir, base_params,
		95	DataFiles(ft_output, st_output), True)
		96	scaling_bases += [base_exp]
88	else:	97	else:
89	plain_exps += [exp_data]	98	is_base = False
90		99
		100	# Create experiment named after the data dir
		101	exp_data = ExpData(data_dir, params,
		102	DataFiles(ft_output, st_output), is_base)
		103
		104	plain_exps += [exp_data]
		105
		106	sys.stderr.write('\r {0:.2%}'.format(float(i)/len(exp_dirs)))
		107	sys.stderr.write('\n')
91	return (plain_exps, scaling_bases)	108	return (plain_exps, scaling_bases)
92		109
93	def main():	110	def main():
@@ -107,14 +124,20 @@ def main():
107	raise IOError("Base column '%s' not present in any parameters!" %	124	raise IOError("Base column '%s' not present in any parameters!" %
108	base_conf.keys()[0])	125	base_conf.keys()[0])
109		126
110	base_table = TupleTable(col_map) # For tracking 'base' experiments	127	base_map = copy.deepcopy(col_map)
111	result_table = TupleTable(col_map) # For generating csv directories	128	if opts.ignore:
		129	for param in opts.ignore.split(","):
		130	col_map.try_remove(param)
		131
		132	base_table = TupleTable(base_map) # For tracking 'base' experiments
		133	result_table = TupleTable(col_map) # For generating output
112		134
113	# Used to find matching scaling_base for each experiment	135	# Used to find matching scaling_base for each experiment
114	for base in scaling_bases:	136	for base in scaling_bases:
115	base_table.add_exp(base.params, base)	137	base_table.add_exp(base.params, base)
116		138
117	for exp in plain_exps:	139	sys.stderr.write("Parsing data...\n")
		140	for i,exp in enumerate(plain_exps):
118	result = ExpPoint(exp.name)	141	result = ExpPoint(exp.name)
119		142
120	if exp.data_files.ft:	143	if exp.data_files.ft:
@@ -123,7 +146,7 @@ def main():
123		146
124	if exp.data_files.st:	147	if exp.data_files.st:
125	base = None	148	base = None
126	if base_conf:	149	if base_conf and not exp.is_base:
127	# Try to find a scaling base	150	# Try to find a scaling base
128	base_params = copy.deepcopy(exp.params)	151	base_params = copy.deepcopy(exp.params)
129	base_params.pop(base_conf.keys()[0])	152	base_params.pop(base_conf.keys()[0])
@@ -137,12 +160,16 @@ def main():
137		160
138	if opts.verbose:	161	if opts.verbose:
139	print(result)	162	print(result)
		163	else:
		164	sys.stderr.write('\r {0:.2%}'.format(float(i)/len(plain_exps)))
		165	sys.stderr.write('\n')
140		166
141	if opts.force and os.path.exists(opts.out):	167	if opts.force and os.path.exists(opts.out):
142	sh.rmtree(opts.out)	168	sh.rmtree(opts.out)
143		169
144	result_table.reduce()	170	result_table.reduce()
145		171
		172	sys.stderr.write("Writing result...\n")
146	if opts.write_map:	173	if opts.write_map:
147	# Write summarized results into map	174	# Write summarized results into map
148	result_table.write_map(opts.out)	175	result_table.write_map(opts.out)