From 2844e3887b4ff635dfa85e9b2ec773b06fe9af4f Mon Sep 17 00:00:00 2001 From: Jonathan Herman Date: Fri, 28 Sep 2012 16:55:30 -0400 Subject: Added option to parse scaling factors against a base task set. --- experiment/experiment.py | 2 +- parse/ft.py | 2 +- parse/sched.py | 123 ++++++++++++++++++++++++++++++++++++++--------- parse/tuple_table.py | 8 ++- parse_exps.py | 67 +++++++++++++++++++++----- run_exps.py | 2 +- 6 files changed, 165 insertions(+), 39 deletions(-) diff --git a/experiment/experiment.py b/experiment/experiment.py index 5ed6480..a95ca42 100644 --- a/experiment/experiment.py +++ b/experiment/experiment.py @@ -161,7 +161,7 @@ class Experiment(object): self.log("Starting %d tracers" % len(self.tracers)) map(methodcaller('start_tracing'), self.tracers) - time.sleep(2) + time.sleep(4) def teardown(self): sleep_time = 5 diff --git a/parse/ft.py b/parse/ft.py index 9837898..868c8ca 100644 --- a/parse/ft.py +++ b/parse/ft.py @@ -41,7 +41,7 @@ def get_ft_output(data_dir, out_dir): return None return output_file -def get_ft_data(data_file, result, overheads): +def extract_ft_data(data_file, result, overheads): rstr = r",(?:\s+[^\s]+){3}.*?([\d\.]+).*?([\d\.]+),(?:\s+[^\s]+){3}.*?([\d\.]+)" with open(data_file) as f: diff --git a/parse/sched.py b/parse/sched.py index a84aece..94ab000 100644 --- a/parse/sched.py +++ b/parse/sched.py @@ -1,5 +1,6 @@ """ TODO: make regexes indexable by name + """ import config.config as conf @@ -8,10 +9,11 @@ import re import numpy as np import subprocess -from collections import namedtuple -from point import Measurement +from collections import namedtuple,defaultdict +from point import Measurement,Type -Task = namedtuple('Task', ['pid', 'period']) +TaskConfig = namedtuple('TaskConfig', ['cpu','wcet','period']) +Task = namedtuple('Task', ['pid', 'config']) def get_st_output(data_dir, out_dir): bin_files = conf.FILES['sched_data'].format(".*") @@ -32,32 +34,54 @@ def get_st_output(data_dir, out_dir): return output_file def get_tasks(data): - reg = r"PARAM.*?(\d+).*?cost:\s+[\d\.]+ms.*?period.*?([\d.]+)" - return [Task(x[0], x[1]) for x in re.findall(reg, data)] + reg = r"PARAM *?(\d+)\/.*?cost:\s+([\d\.]+)ms.*?period.*?([\d.]+)ms.*?part.*?(\d+)" + ret = [] + for match in re.findall(reg, data): + t = Task(match[0], TaskConfig(match[3],match[1],match[2])) + ret += [t] + return ret + +def get_task_exits(data): + reg = r"TASK_EXIT *?(\d+)/.*?Avg.*?(\d+).*?Max.*?(\d+)" + ret = [] + for match in re.findall(reg, data): + m = Measurement(match[0], {Type.Max : match[2], Type.Avg : match[1]}) + ret += [m] + return ret + def extract_tardy_vals(data, exp_point): - ratios = [] - tards = [] + ratios = [] + avg_tards = [] + max_tards = [] for t in get_tasks(data): - reg = r"TARDY.*?" + t.pid + "/(\d+).*?Tot.*?([\d.]+).*?ms.*([\d.]+).*?ms.*?([\d.]+)" + reg = r"TARDY.*?" + t.pid + "/(\d+).*?Tot.*?([\d\.]+).*?ms.*([\d\.]+).*?ms.*?([\d\.]+)" matches = re.findall(reg, data) if len(matches) != 0: jobs = float(matches[0][0]) + total_tard = float(matches[0][1]) - # max_tard = float(matches[0][2]) + print("total tard: %s" % total_tard) + avg_tard = (total_tard / jobs) / float(t.config.period) + max_tard = float(matches[0][2]) / float(t.config.period) + + print("avg tard: %s" % avg_tard) + misses = float(matches[0][3]) - rel_tard = (total_tard / jobs) / float(t.period) if misses != 0: miss_ratio = (misses / jobs) + print("misses is %d, jobs is %d" % (misses, jobs)) else: miss_ratio = 0 - ratios.append(miss_ratio) - tards.append(rel_tard) + ratios += [miss_ratio] + avg_tards += [avg_tard] + max_tards += [max_tard] - for (array, name) in ((tards, "rel-tard"), (ratios, "miss-ratio")): - exp_point[name] = Measurement().from_array(array) + exp_point["avg-rel-tard"] = Measurement().from_array(avg_tards) + exp_point["max-rel-tard"] = Measurement().from_array(max_tards) + exp_point["miss-ratio"] = Measurement().from_array(ratios) def extract_variance(data, exp_point): varz = [] @@ -77,17 +101,70 @@ def extract_variance(data, exp_point): varz.append(corrected) - exp_point['var'] = Measurement().from_array(varz) + exp_point['exec-var'] = Measurement().from_array(varz) -def get_sched_data(data_file, result): +def extract_sched_data(data_file, result): with open(data_file, 'r') as f: data = f.read() - # if conf != BASE: - # (our_values, their_values) = extract_exec_vals(our_data, their_data) - # conf_result = get_stats(our_values, their_values) - # for key in conf_result.keys(): - # result[key][conf] = conf_result[key] + extract_tardy_vals(data, result) + extract_variance(data, result) - extract_tardy_vals(data, result) - extract_variance(data, result) +def config_exit_stats(file): + with open(file, 'r') as f: + data = f.read() + + tasks = get_tasks(data) + + # Dictionary of task exit measurements by pid + exits = get_task_exits(data) + exit_dict = dict((e.id, e) for e in exits) + + # Dictionary where keys are configurations, values are list + # of tasks with those configuratino + config_dict = defaultdict(lambda: []) + for t in tasks: + config_dict[t.config] += [t] + + for config in config_dict: + task_list = sorted(config_dict[config]) + + # Replace tasks with corresponding exit stats + exit_list = [exit_dict[t.pid] for t in task_list] + config_dict[config] = exit_list + + return config_dict + +saved_stats = {} +def get_base_stats(base_file): + if base_file in saved_stats: + return saved_stats[base_file] + result = config_exit_stats(base_file) + saved_stats[base_file] = result + return result + +def extract_scaling_data(data_file, base_file, result): + # Generate trees of tasks with matching configurations + data_stats = config_exit_stats(data_file) + base_stats = get_base_stats(base_file) + + # Scaling factors are calculated by matching groups of tasks with the same + # config, then comparing task-to-task exec times in order of PID within + # each group + max_scales = [] + avg_scales = [] + for config in data_stats: + if len(data_stats[config]) != len(base_stats[config]): + # Quit, we are missing a record and can't guarantee + # a task-to-task comparison + continue + for data_stat, base_stat in zip(data_stats[config],base_stats[config]): + # How much larger is their exec stat than ours? + avg_scale = float(base_stat[Type.Avg]) / float(base_stat[Type.Avg]) + max_scale = float(base_stat[Type.Max]) / float(base_stat[Type.Max]) + + avg_scales += [avg_scale] + max_scales += [max_scale] + + result['max-scale'] = Measurement().from_array(max_scales) + result['avg-scale'] = Measurement().from_array(avg_scales) diff --git a/parse/tuple_table.py b/parse/tuple_table.py index 0cf6bec..b56fa6c 100644 --- a/parse/tuple_table.py +++ b/parse/tuple_table.py @@ -21,8 +21,8 @@ class ColMap(object): added += 1 key += (kv[col],) - if added != len(kv): - raise Exception("column map '%s' missed field in map\n%s" % + if added < len(kv): + raise Exception("column map '%s' missed field in map '%s'" % (self.col_list, kv)) return key @@ -51,6 +51,10 @@ class TupleTable(object): key = self.col_map.get_key(kv) self.table[key] += [point] + def get_exps(self, kv): + key = self.col_map.get_key(kv) + return self.table[key] + def __reduce(self): if self.reduced: raise Exception("cannot reduce twice!") diff --git a/parse_exps.py b/parse_exps.py index ecb1cac..3a1d1b9 100755 --- a/parse_exps.py +++ b/parse_exps.py @@ -2,9 +2,11 @@ from __future__ import print_function import config.config as conf +import copy import os import parse.ft as ft import parse.sched as st +import re from collections import namedtuple from common import load_params @@ -17,6 +19,9 @@ def parse_args(): parser.add_option('-o', '--out-dir', dest='out_dir', help='directory for data output', default=os.getcwd()) + parser.add_option('-s', '--scale-against', dest='scale_against', + metavar='PARAM=VALUE', default="", + help='calculate task scaling factors against these configs') return parser.parse_args() @@ -41,8 +46,10 @@ def get_exp_params(data_dir, col_map): return params -def gen_exp_data(exp_dirs, col_map): - exps = [] +def gen_exp_data(exp_dirs, base_conf, col_map): + plain_exps = [] + scaling_bases = [] + for data_dir in exp_dirs: if not os.path.isdir(data_dir): raise IOError("Invalid experiment '%s'" % os.path.abspath(data_dir)) @@ -51,34 +58,72 @@ def gen_exp_data(exp_dirs, col_map): if not os.path.exists(tmp_dir): os.mkdir(tmp_dir) + # Read and translate exp output files params = get_exp_params(data_dir, col_map) st_output = st.get_st_output(data_dir, tmp_dir) ft_output = ft.get_ft_output(data_dir, tmp_dir) + # Create experiment named after the data dir exp_data = ExpData(data_dir, params, DataFiles(ft_output, st_output)) - exps += [exp_data] - return exps + if base_conf and base_conf.viewitems() & params.viewitems(): + if not st_output: + raise Exception("Scaling base '%s' useless without sched data!" + % data_dir) + params.pop(base_conf.keys()[0]) + scaling_bases += [exp_data] + else: + plain_exps += [exp_data] + + return (plain_exps, scaling_bases) def main(): opts, args = parse_args() args = args or [os.getcwd()] + + # Configuration key for task systems used to calculate task + # execution scaling factors + base_conf = dict(re.findall("(.*)=(.*)", opts.scale_against)) + col_map = ColMap() - exps = gen_exp_data(args, col_map) - table = TupleTable(col_map) + (plain_exps, scaling_bases) = gen_exp_data(args, base_conf, col_map) + + base_table = TupleTable(col_map) + result_table = TupleTable(col_map) - for exp in exps: + # Used to find matching scaling_base for each experiment + for base in scaling_bases: + base_table.add_exp(base.params, base) + + for exp in plain_exps: result = ExpPoint(exp.name) + if exp.data_files.ft: - ft.get_ft_data(exp.data_files.ft, result, conf.BASE_EVENTS) + # Write overheads into result + ft.extract_ft_data(exp.data_files.ft, result, conf.BASE_EVENTS) + if exp.data_files.st: - st.get_sched_data(exp.data_files.st, result) + if base_conf: + # Try to find a scaling base + base_params = copy.deepcopy(exp.params) + base_params.pop(base_conf.keys()[0]) + base = base_table.get_exps(base_params)[0] + if base: + # Write scaling factor (vs base) into result + st.extract_scaling_data(exp.data_files.st, + base.data_files.st, + result) + # Write deadline misses / tardiness into result + st.extract_sched_data(exp.data_files.st, result) + + result_table.add_exp(exp.params, result) + + print(result) - table.add_exp(exp.params, result) - table.write_result(opts.out_dir) + result_table.write_result(opts.out_dir) if __name__ == '__main__': main() diff --git a/run_exps.py b/run_exps.py index bda0e40..4484952 100755 --- a/run_exps.py +++ b/run_exps.py @@ -218,7 +218,7 @@ def main(): print(" Successful:\t\t%d" % succ) print(" Failed:\t\t%d" % failed) print(" Already Done:\t\t%d" % done) - print(" Invalid Kernel:\t\t%d" % invalid) + print(" Wrong Kernel:\t\t%d" % invalid) if __name__ == '__main__': -- cgit v1.2.2