From 5d97a6baf6166b74355c6e744e010949a46fd625 Mon Sep 17 00:00:00 2001 From: Jonathan Herman Date: Sun, 7 Oct 2012 23:40:12 -0400 Subject: Split scheduling data by task criticality. --- parse/sched.py | 125 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 76 insertions(+), 49 deletions(-) (limited to 'parse/sched.py') diff --git a/parse/sched.py b/parse/sched.py index b84e16e..300c569 100644 --- a/parse/sched.py +++ b/parse/sched.py @@ -34,7 +34,26 @@ COMPLETION_RECORD = r"(?P" +\ TaskConfig = namedtuple('TaskConfig', ['cpu','wcet','period','type','level']) Task = namedtuple('Task', ['pid', 'config']) +class LeveledArray(object): + """ + Groups statistics by the level of the task to which they apply + """ + def __init__(self, name): + self.name = name + self.vals = defaultdict(lambda:[]) + + def add(self, task, value): + self.vals[task.config.level] += [value] + + def write_measurements(self, result): + for level, arr in self.vals.iteritems(): + name = "%s%s" % ("%s-" % level if level else "", self.name) + result[name] = Measurement(name).from_array(arr) + def get_st_output(data_dir, out_dir): + """ + Create and return files containing unpacked sched data + """ bin_files = conf.FILES['sched_data'].format(".*") bins = [f for f in os.listdir(data_dir) if re.match(bin_files, f)] @@ -70,7 +89,7 @@ def get_tasks(data): (e, match.groupdict(), match.group('RECORD'))) return ret -def get_tasks_dict(data): +def get_task_dict(data): tasks_list = get_tasks(data) tasks_dict = {} for t in tasks_list: @@ -89,17 +108,15 @@ def get_task_exits(data): except: raise Exception("Invalid exit record, parsed:\n\t%s\n\t%s" % (match.groupdict(), m.group('RECORD'))) - + ret += [m] return ret - -def extract_tardy_vals(data, exp_point): - ratios = [] - avg_tards = [] - max_tards = [] - tasks = get_tasks_dict(data) +def extract_tardy_vals(task_dict, data, exp_point): + ratios = LeveledArray("miss-ratio") + avg_tards = LeveledArray("avg-rel-tardiness") + max_tards = LeveledArray("max-rel-tardiness") for match in re.finditer(TARDY_RECORD, data): try: @@ -114,35 +131,40 @@ def extract_tardy_vals(data, exp_point): raise Exception("Invalid tardy record:\n\t%s\n\t%s" % (match.groupdict(), match.group("RECORD"))) - if pid not in tasks: + if pid not in task_dict: raise Exception("Invalid pid '%d' in tardy record:\n\t%s" % match.group("RECORD")) - - t = tasks[pid] - avg_tards += [ total_tard / (jobs * t.config.period) ] - max_tards += [ max_tard / t.config.period ] - ratios += [ misses / jobs ] - - exp_point["avg-rel-tard"] = Measurement().from_array(avg_tards) - exp_point["max-rel-tard"] = Measurement().from_array(max_tards) - exp_point["miss-ratio"] = Measurement().from_array(ratios) - -def extract_variance(data, exp_point): - varz = [] + + t = task_dict[pid] + avg_tards.add(t, total_tard / (jobs * t.config.period)) + max_tards.add(t, max_tard / t.config.period) + ratios.add(t, misses / jobs) + + ratios.write_measurements(exp_point) + avg_tards.write_measurements(exp_point) + max_tards.write_measurements(exp_point) + +def extract_variance(task_dict, data, exp_point): + varz = LeveledArray("exec-variance") completions = defaultdict(lambda: []) + missed = defaultdict(lambda: int()) for match in re.finditer(COMPLETION_RECORD, data): try: pid = int(match.group("PID")) duration = float(match.group("EXEC")) - if not (duration and pid): raise Exception() + # Last (exit) record often has exec time of 0 + missed[pid] += not bool(duration) + + if missed[pid] > 1 or not pid: raise Exception() except: - raise Exception("Invalid completion record:\n\t%s\n\t%s" % - (match.groupdict(), match.group("RECORD"))) + raise Exception("Invalid completion record, missed - %d:" + "\n\t%s\n\t%s" % (missed[pid], match.groupdict(), + match.group("RECORD"))) completions[pid] += [duration] - for (pid, durations) in completions: + for pid, durations in completions.iteritems(): job_times = np.array(durations) # Coefficient of variation @@ -150,32 +172,22 @@ def extract_variance(data, exp_point): # Correction, assuming normal distributions corrected = (1 + 1/(4 * len(job_times))) * cv - varz.append(corrected) - - exp_point['exec-var'] = Measurement().from_array(varz) - -def extract_sched_data(data_file, result): - with open(data_file, 'r') as f: - data = f.read() + varz.add(task_dict[pid], corrected) - extract_tardy_vals(data, result) - extract_variance(data, result) + varz.write_measurements(exp_point) -def config_exit_stats(file): +def config_exit_stats(task_dict, file): with open(file, 'r') as f: data = f.read() - - tasks = get_tasks(data) # Dictionary of task exit measurements by pid exits = get_task_exits(data) - exit_dict = dict((e.id, e) for e in exits) # Dictionary where keys are configurations, values are list # of tasks with those configuratino config_dict = defaultdict(lambda: []) - for t in tasks: + for t in task_dict.itervalues(): config_dict[t.config] += [t] for config in config_dict: @@ -185,7 +197,6 @@ def config_exit_stats(file): if not t.pid in exit_dict: raise Exception("Missing exit record for task '%s' in '%s'" % (t, file)) - exit_list = [exit_dict[t.pid] for t in task_list] config_dict[config] = exit_list @@ -195,20 +206,22 @@ saved_stats = {} def get_base_stats(base_file): if base_file in saved_stats: return saved_stats[base_file] - result = config_exit_stats(base_file) + with open(base_file, 'r') as f: + data = f.read() + result = config_exit_stats(data) saved_stats[base_file] = result return result -def extract_scaling_data(data_file, base_file, result): +def extract_scaling_data(task_dict, data, result, base_file): # Generate trees of tasks with matching configurations - data_stats = config_exit_stats(data_file) + data_stats = config_exit_stats(data) base_stats = get_base_stats(base_file) # Scaling factors are calculated by matching groups of tasks with the same # config, then comparing task-to-task exec times in order of PID within # each group - max_scales = [] - avg_scales = [] + max_scales = LeveledArray("max-scaling") + avg_scales = LeveledArray("avg-scaling") for config in data_stats: if len(data_stats[config]) != len(base_stats[config]): @@ -220,8 +233,22 @@ def extract_scaling_data(data_file, base_file, result): avg_scale = float(base_stat[Type.Avg]) / float(base_stat[Type.Avg]) max_scale = float(base_stat[Type.Max]) / float(base_stat[Type.Max]) - avg_scales += [avg_scale] - max_scales += [max_scale] + task = task_dict[data_stat.id] + + avg_scales.add(task, avg_scale) + max_scales.add(task, max_scale) + + avg_scales.write_measurements(result) + max_scales.write_measurements(result) + +def extract_sched_data(data_file, result, base_file): + with open(data_file, 'r') as f: + data = f.read() + + task_dict = get_task_dict(data) + + extract_tardy_vals(task_dict, data, result) + extract_variance(task_dict, data, result) - result['max-scale'] = Measurement().from_array(max_scales) - result['avg-scale'] = Measurement().from_array(avg_scales) + if (base_file): + extract_scaling_data(task_dict, data, result, base_file) -- cgit v1.2.2