From 7c09ec981c6e06af2e62d67a609eb53728267954 Mon Sep 17 00:00:00 2001 From: Jonathan Herman Date: Thu, 27 Sep 2012 19:03:22 -0400 Subject: Added script to parse directory data, create CSVs for every chagned value. This change also makes SchedTrace and OverheadTrace events configurable. --- common.py | 17 ++++++ config/config.example.py | 18 ++++++- experiment/tracer.py | 29 ++++------ parse/__init__.py | 0 parse/dir_map.py | 104 ++++++++++++++++++++++++++++++++++++ parse/enum.py | 7 +++ parse/ft.py | 60 +++++++++++++++++++++ parse/point.py | 135 +++++++++++++++++++++++++++++++++++++++++++++++ parse/sched.py | 89 +++++++++++++++++++++++++++++++ parse/tuple_table.py | 76 ++++++++++++++++++++++++++ parse_exps.py | 85 +++++++++++++++++++++++++++++ 11 files changed, 600 insertions(+), 20 deletions(-) create mode 100644 common.py create mode 100644 parse/__init__.py create mode 100644 parse/dir_map.py create mode 100644 parse/enum.py create mode 100644 parse/ft.py create mode 100644 parse/point.py create mode 100644 parse/sched.py create mode 100644 parse/tuple_table.py mode change 100644 => 100755 parse_exps.py diff --git a/common.py b/common.py new file mode 100644 index 0000000..a09ef7c --- /dev/null +++ b/common.py @@ -0,0 +1,17 @@ +from collections import defaultdict + +def load_params(fname): + params = defaultdict(int) + with open(fname, 'r') as f: + data = f.read() + try: + parsed = eval(data) + # Convert to defaultdict + for k in parsed: + params[k] = str(parsed[k]) + except Exception as e: + raise IOError("Invalid param file: %s\n%s" % (fname, e)) + + return params + + diff --git a/config/config.example.py b/config/config.example.py index b307687..9675f66 100644 --- a/config/config.example.py +++ b/config/config.example.py @@ -1,6 +1,7 @@ from __future__ import print_function import os import sys +import itertools """ These are paths to repository directories. @@ -21,7 +22,8 @@ BINS = {'bespin' : '{}/bespin'.format(REPOS['liblitmus']), 'split' : '{}/split'.format(REPOS['analysis']), 'sort' : '{}/sort-all'.format(REPOS['analysis']), 'analyze' : '{}/analyze'.format(REPOS['analysis']), - 'trace-cmd' : '{}/trace-cmd'.format(REPOS['trace-cmd'])} + 'trace-cmd' : '{}/trace-cmd'.format(REPOS['trace-cmd']), + 'st_show' : '{}/st_show'.format(REPOS['sched_trace'])} DEFAULTS = {'params_file' : 'params.py', 'sched_file' : 'sched.py', @@ -32,11 +34,23 @@ DEFAULTS = {'params_file' : 'params.py', FILES = {'ft_data' : 'ft.bin', 'linux_data' : 'trace.dat', 'sched_data' : 'st-{}.bin', - 'log_data' : 'trace.slog'} + 'log_data' : 'trace.slog',} PARAMS = {'sched' : 'scheduler', 'dur' : 'duration'} +SCHED_EVENTS = range(501, 513) +BASE_EVENTS = ['SCHED', 'RELEASE', 'SCHED2', 'TICK', 'CXS'] + +# Expand for mixed-crit +# CRIT_EVENTS = ['LVL{}_SCHED', 'LEVEL{}_RELEASE'] +# CRIT_LEVELS = ['A', 'B', 'C'] +# BASE_EVENTS += [s.format(l) for (l,s) in +# itertools.product(CRIT_LEVELS, CRIT_EVENTS)] + +ALL_EVENTS = ["%s_%s" % (e, t) for (e,t) in + itertools.product(BASE_EVENTS, ["START","END"])] + valid = True for repo, loc in REPOS.items(): if not os.path.isdir(loc): diff --git a/experiment/tracer.py b/experiment/tracer.py index d7743ad..ad4ebfe 100644 --- a/experiment/tracer.py +++ b/experiment/tracer.py @@ -1,8 +1,10 @@ import litmus_util import os +import config.config as conf + from operator import methodcaller from executable.ftcat import FTcat,Executable -from config.config import FILES,BINS + class Tracer(object): def __init__(self, name, output_dir): @@ -27,11 +29,11 @@ class LinuxTracer(Tracer): extra_args = ["record", "-e", "sched:sched_switch", "-e", "litmus:*", - "-o", "%s/%s" % (output_dir, FILES['linux_data'])] + "-o", "%s/%s" % (output_dir, conf.FILES['linux_data'])] stdout = open('%s/trace-cmd-stdout.txt' % self.output_dir, 'w') stderr = open('%s/trace-cmd-stderr.txt' % self.output_dir, 'w') - execute = Executable(BINS['trace-cmd'], extra_args, stdout, stderr) + execute = Executable(conf.BINS['trace-cmd'], extra_args, stdout, stderr) self.bins.append(execute) @staticmethod @@ -49,7 +51,7 @@ class LogTracer(Tracer): def __init__(self, output_dir): super(LogTracer, self).__init__("Logger", output_dir) - out_file = open("%s/%s" % (self.output_dir, FILES['log_data']), 'w') + out_file = open("%s/%s" % (self.output_dir, conf.FILES['log_data']), 'w') cat = (Executable("/bin/cat", [LogTracer.DEVICE_STR])) cat.stdout_file = out_file @@ -62,7 +64,6 @@ class LogTracer(Tracer): class SchedTracer(Tracer): - EVENTS = range(501, 510) # not including 511 DEVICE_STR = '/dev/litmus/sched_trace' def __init__(self, output_dir): @@ -74,7 +75,7 @@ class SchedTracer(Tracer): stdout_f = open('%s/st-%d.bin' % (self.output_dir, cpu), 'w') stderr_f = open('%s/st-%d-stderr.txt' % (self.output_dir, cpu), 'w') dev = '{0}{1}'.format(SchedTracer.DEVICE_STR, cpu) - ftc = FTcat(BINS['ftcat'], stdout_f, stderr_f, dev, SchedTracer.EVENTS, cpu=cpu) + ftc = FTcat(conf.BINS['ftcat'], stdout_f, stderr_f, dev, conf.SCHED_EVENTS, cpu=cpu) self.bins.append(ftc) @@ -85,22 +86,14 @@ class SchedTracer(Tracer): class OverheadTracer(Tracer): DEVICE_STR = '/dev/litmus/ft_trace0' - EVENTS = [# 'SCHED_START', 'SCHED_END', 'SCHED2_START', 'SCHED2_END', - 'RELEASE_START', 'RELEASE_END', - 'LVLA_RELEASE_START', 'LVLA_RELEASE_END', - 'LVLA_SCHED_START', 'LVLA_SCHED_END', - 'LVLB_RELEASE_START', 'LVLB_RELEASE_END', - 'LVLB_SCHED_START', 'LVLB_SCHED_END', - 'LVLC_RELEASE_START', 'LVLC_RELEASE_END', - 'LVLC_SCHED_START', 'LVLC_SCHED_END'] def __init__(self, output_dir): super(OverheadTracer, self).__init__("Overhead Trace", output_dir) - stdout_f = open('{0}/{1}'.format(self.output_dir, FILES['ft_data']), 'w') - stderr_f = open('{0}/{1}.stderr.txt'.format(self.output_dir, FILES['ft_data']), 'w') - ftc = FTcat(BINS['ftcat'], stdout_f, stderr_f, - OverheadTracer.DEVICE_STR, OverheadTracer.EVENTS) + stdout_f = open('{0}/{1}'.format(self.output_dir, conf.FILES['ft_data']), 'w') + stderr_f = open('{0}/{1}.stderr.txt'.format(self.output_dir, conf.FILES['ft_data']), 'w') + ftc = FTcat(conf.BINS['ftcat'], stdout_f, stderr_f, + OverheadTracer.DEVICE_STR, conf.ALL_EVENTS) self.bins.append(ftc) diff --git a/parse/__init__.py b/parse/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/parse/dir_map.py b/parse/dir_map.py new file mode 100644 index 0000000..6e959f2 --- /dev/null +++ b/parse/dir_map.py @@ -0,0 +1,104 @@ +import os + +from collections import defaultdict +from point import Type + +class TreeNode(object): + def __init__(self, parent = None): + self.parent = parent + self.children = defaultdict(lambda : TreeNode(self)) + self.values = [] + +class DirMap(object): + def to_csv(self, vals): + val_strs = [] + for key in sorted(vals.keys()): + val_strs += ["%s=%s" % (key, vals[key])] + return "%s.csv" % ("_".join(val_strs)) + + def __init__(self, out_dir): + self.root = TreeNode(None) + self.out_dir = out_dir + self.values = [] + + def debug_update_node(self, path, keys, value): + self.__update_node(path, keys, value) + + def __update_node(self, path, keys, value): + node = self.root + + path += [ self.to_csv(keys) ] + for p in path: + node = node.children[p] + + node.values += [value] + + def add_point(self, vary, vary_value, keys, point): + for stat in point.get_stats(): + summary = point[stat] + + for summary_type in Type: + measurement = summary[summary_type] + + for base_type in Type: + if not base_type in measurement: + continue + # Ex: wcet/avg/max/vary-type/other-stuff.csv + path = [ stat, summary_type, base_type, "vary-%s" % vary ] + result = measurement[base_type] + + self.__update_node(path, keys, (vary_value, result)) + + + + def reduce(self): + def reduce2(node): + for key in node.children.keys(): + child = node.children[key] + reduce2(child) + if not (child.children or child.values): + node.children.pop(key) + + if len(node.values) == 1: + node.values = [] + + reduce2(self.root) + + def write(self): + def write2(path, node): + out_path = "/".join(path) + if node.values: + # Leaf + with open("/".join(path), "w") as f: + arr = [",".join([str(b) for b in n]) for n in node.values] + f.write("\n".join(arr) + "\n") + elif not os.path.isdir(out_path): + os.mkdir(out_path) + + for (key, child) in node.children.iteritems(): + path.append(key) + write2(path, child) + path.pop() + + + write2([self.out_dir], self.root) + + + def __str__(self): + def str2(node, level): + header = " " * level + ret = "" + if not node.children: + return "%s%s\n" % (header, str(node.values) if node.values else "") + for key,child in node.children.iteritems(): + ret += "%s/%s\n" % (header, key) + ret += str2(child, level + 1) + return ret + + return "%s\n%s" % (self.out_dir, str2(self.root, 1)) + + + + + + diff --git a/parse/enum.py b/parse/enum.py new file mode 100644 index 0000000..bf35d01 --- /dev/null +++ b/parse/enum.py @@ -0,0 +1,7 @@ +class Enum(frozenset): + def __getattr__(self, name): + if name in self: + return name + raise AttributeError + + diff --git a/parse/ft.py b/parse/ft.py new file mode 100644 index 0000000..9837898 --- /dev/null +++ b/parse/ft.py @@ -0,0 +1,60 @@ +import config.config as conf +import os +import re +import shutil as sh +import subprocess + +from point import Measurement,Type + +def get_ft_output(data_dir, out_dir): + bin_file = conf.FILES['ft_data'] + "$" + bins = [f for f in os.listdir(data_dir) if re.match(bin_file, f)] + + FT_DATA_NAME = "scheduler=x-ft" + output_file = "{}/out-ft".format(out_dir) + + if os.path.isfile(output_file): + print("ft-output already exists for %s" % data_dir) + return output_file + + if len(bins) != 0: + err_file = open("%s/err-ft" % out_dir, 'w') + # Need to make a copy of the original data file so scripts can change it + sh.copyfile("{}/{}".format(data_dir, bins[0]), + "{}/{}".format(out_dir, FT_DATA_NAME)) + + subprocess.call([conf.BINS['sort'], FT_DATA_NAME], + cwd=out_dir, stderr=err_file, stdout=err_file) + subprocess.call([conf.BINS['split'], FT_DATA_NAME], + cwd=out_dir, stderr=err_file, stdout=err_file) + + # Previous subprocesses just spit out all these intermediate files + bins = [f for f in os.listdir(out_dir) if re.match(".*overhead=.*bin", f)] + bins = [f for f in bins if os.stat("%s/%s"%(out_dir, f)).st_size] + + # Analyze will summarize those + cmd_arr = [conf.BINS['analyze']] + cmd_arr.extend(bins) + with open(output_file, "w") as f: + subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file) + else: + return None + return output_file + +def get_ft_data(data_file, result, overheads): + rstr = r",(?:\s+[^\s]+){3}.*?([\d\.]+).*?([\d\.]+),(?:\s+[^\s]+){3}.*?([\d\.]+)" + + with open(data_file) as f: + data = f.read() + + for ovh in overheads: + measure = Measurement("%s-%s" % (data_file, ovh)) + vals = re.findall(".*{}".format(ovh) + rstr, data); + if len(vals) != 0: + vals = vals[0] + measure[Type.Max] = float(vals[0]) + measure[Type.Avg] = float(vals[1]) + measure[Type.Var] = float(vals[2]) + result[ovh] = measure + + return result diff --git a/parse/point.py b/parse/point.py new file mode 100644 index 0000000..4343d03 --- /dev/null +++ b/parse/point.py @@ -0,0 +1,135 @@ +""" +Too much duplicate code in this file +""" + +import copy +import numpy as np +from enum import Enum +from collections import defaultdict + +Type = Enum(['Min','Max','Avg','Var']) +default_typemap = {Type.Max : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1}, + Type.Min : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1}, + Type.Avg : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1}} + +def make_typemap(): + return copy.deepcopy(default_typemap) + +def dict_str(adict, sep = "\n"): + return sep.join(["%s: %s" % (k, str(v)) for (k,v) in adict.iteritems()]) + +class Measurement(object): + def __init__(self, id = None, kv = {}): + self.id = id + self.stats = {} + for k, v in kv.iteritems(): + self[k] = v + + def from_array(self,array): + array = np.array(array) + self[Type.Max] = array.max() + self[Type.Avg] = array.mean() + self[Type.Var] = array.var() + return self + + def __check_type(self, type): + if not type in Type: + raise AttributeError("Not a valid type '%s'" % type) + + def __getitem__(self, type): + self.__check_type(type) + return self.stats[type] + + def __iter__(self): + return self.stats.iteritems() + + def __contains__(self, type): + self.__check_type(type) + return type in self.stats + + def __setitem__(self, type, value): + self.__check_type(type) + self.stats[type] = value + + def __str__(self): + return " %s" % (self.id, dict_str(self.stats, " ")) + + +class Summary(Measurement): + def __init__(self, id, measures, typemap = default_typemap): + super(Summary, self).__init__("Summary-%s" % id) + + self.__check_types(measures, typemap) + self.__summarize(measures, typemap) + + def __check_types(self, measures, typemap): + required_types = self.__get_required(typemap) + for m in measures: + for type in required_types: + if type not in m: + raise ValueError("measurement '%s' missing type '%s'" % + (self.id, type)) + + def __summarize(self, measures, typemap): + for sum_type in Type: + self[sum_type] = Measurement(self.id) + + def avg(vals): + return sum(vals) / len(vals) + + for base_type in Type: + for sum_type, func in (Type.Min,min),(Type.Max,max),(Type.Avg, avg): + if typemap[sum_type][base_type]: + val = func([m[base_type] for m in measures]) + self[sum_type][base_type] = val + + def __get_required(self, typemap): + required = [] + for base_type in Type: + matches = [t[base_type] for t in typemap.itervalues()] + if bool(sum(matches)): + required += [base_type] + return required + +class ExpPoint(object): + def __init__(self, id = "", init = {}): + self.stats = {} + for type, value in init.iteritems(): + self[type] = value + self.id = id + + def __check_val(self, obj): + if not isinstance(obj, Measurement): + raise AttributeError("Not a valid measurement '%s'" % obj) + + def __getitem__(self, type): + return self.stats[type] + + def __iter__(self): + return self.stats.iteritems() + + def __contains__(self, type): + return type in self.stats + + def __setitem__(self, type, value): + self.__check_val(value) + self.stats[type] = value + + def __str__(self): + return "\n%s" % (self.id, dict_str(self.stats)) + + def get_stats(self): + return self.stats.keys() + +class SummaryPoint(ExpPoint): + def __init__(self, id, points, typemap = default_typemap): + super(SummaryPoint,self).__init__("Summary-%s" % id) + + grouped = defaultdict(lambda : []) + + for exp in points: + for name,measure in exp.stats.iteritems(): + grouped[name] += [measure] + + for key in grouped.iterkeys(): + self[key] = Summary(key, grouped[key], typemap) diff --git a/parse/sched.py b/parse/sched.py new file mode 100644 index 0000000..ec4d917 --- /dev/null +++ b/parse/sched.py @@ -0,0 +1,89 @@ +import config.config as conf +import os +import re +import numpy as np +import subprocess + +from collections import namedtuple +from point import Measurement + +Task = namedtuple('Task', ['pid', 'period']) + +def get_st_output(data_dir, out_dir): + bin_files = conf.FILES['sched_data'].format(".*") + bins = [f for f in os.listdir(data_dir) if re.match(bin_files, f)] + + output_file = "%s/out-st" % out_dir + + if os.path.isfile(output_file): + return output_file + + if len(bins) != 0: + cmd_arr = [conf.BINS['st_show']] + cmd_arr.extend(bins) + with open(output_file, "w") as f: + subprocess.call(cmd_arr, cwd=data_dir, stdout=f) + else: + return None + return output_file + +def get_tasks(data): + reg = r"PARAM.*?(\d+).*?cost:\s+[\d\.]+ms.*?period.*?([\d.]+)" + return [Task(x[0], x[1]) for x in re.findall(reg, data)] + +def extract_tardy_vals(data, exp_point): + ratios = [] + tards = [] + + for t in get_tasks(data): + reg = r"TARDY.*?" + t.pid + "/(\d+).*?Tot.*?([\d.]+).*?ms.*([\d.]+).*?ms.*?([\d.]+)" + matches = re.findall(reg, data) + if len(matches) != 0: + jobs = float(matches[0][0]) + total_tard = float(matches[0][1]) + # max_tard = float(matches[0][2]) + misses = float(matches[0][3]) + rel_tard = (total_tard / jobs) / float(t.period) + if misses != 0: + miss_ratio = (misses / jobs) + else: + miss_ratio = 0 + + ratios.append(miss_ratio) + tards.append(rel_tard) + + for (array, name) in ((tards, "rel-tard"), (ratios, "miss-ratio")): + exp_point[name] = Measurement().from_array(array) + +def extract_variance(data, exp_point): + varz = [] + for t in get_tasks(data): + reg = r"COMPLETION.*?" + t.pid + r".*?([\d\.]+)ms" + matches = re.findall(reg, data) + + if len(matches) == 0: + return 0 + + job_times = np.array(filter(lambda x: float(x) != 0, matches), dtype=np.float) + + # Coefficient of variation + cv = job_times.std() / job_times.mean() + # Correction, assuming normal distributions + corrected = (1 + 1/(4 * len(job_times))) * cv + + varz.append(corrected) + + exp_point['var'] = Measurement().from_array(varz) + +def get_sched_data(data_file, result): + with open(data_file, 'r') as f: + data = f.read() + + # if conf != BASE: + # (our_values, their_values) = extract_exec_vals(our_data, their_data) + # conf_result = get_stats(our_values, their_values) + # for key in conf_result.keys(): + # result[key][conf] = conf_result[key] + + extract_tardy_vals(data, result) + extract_variance(data, result) diff --git a/parse/tuple_table.py b/parse/tuple_table.py new file mode 100644 index 0000000..df80b37 --- /dev/null +++ b/parse/tuple_table.py @@ -0,0 +1,76 @@ +from collections import defaultdict +from point import SummaryPoint +from dir_map import DirMap + +class ColMap(object): + def __init__(self): + self.rev_map = {} + self.col_list = [] + + def columns(self): + return self.col_list + + def get_key(self, kv): + key = () + added = 0 + + for col in self.col_list: + if col not in kv: + key += (None,) + else: + added += 1 + key += (kv[col],) + + if added != len(kv): + raise Exception("column map '%s' missed field in map\n%s" % + (self.col_list, kv)) + + return key + + def get_map(self, tuple): + map = {} + for i in range(0, len(tuple)): + map[self.col_list[i]] = tuple[i] + return map + + def try_add(self, column): + if column not in self.rev_map: + self.rev_map[column] = len(self.col_list) + self.col_list += [column] + + def __str__(self): + return "%s" % (self.rev_map) + +class TupleTable(object): + def __init__(self, col_map): + self.col_map = col_map + self.table = defaultdict(lambda: []) + self.reduced = False + + def add_exp(self, kv, point): + key = self.col_map.get_key(kv) + self.table[key] += [point] + + def __reduce(self): + if self.reduced: + raise Exception("cannot reduce twice!") + self.reduced = True + for key, values in self.table.iteritems(): + self.table[key] = SummaryPoint(key, values) + + def write_result(self, out_dir): + dir_map = DirMap(out_dir) + self.__reduce() + for key, point in self.table.iteritems(): + kv = self.col_map.get_map(key) + + for col in self.col_map.columns(): + val = kv[col] + kv.pop(col) + + dir_map.add_point(col, val, kv, point) + + kv[col] = val + + dir_map.reduce() + dir_map.write() diff --git a/parse_exps.py b/parse_exps.py old mode 100644 new mode 100755 index e69de29..6a7d14f --- a/parse_exps.py +++ b/parse_exps.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python +from __future__ import print_function + +import config.config as conf +import os + +import parse.ft as ft +import parse.sched as st + +from collections import namedtuple +from common import load_params +from optparse import OptionParser +from parse.tuple_table import ColMap,TupleTable +from parse.point import ExpPoint + +def parse_args(): + parser = OptionParser("usage: %prog [options] [data_dir]...") + + parser.add_option('-o', '--out-dir', dest='out_dir', + help='directory for data output', default=os.getcwd()) + + return parser.parse_args() + +ExpData = namedtuple('ExpData', ['name', 'params', 'data_files']) +DataFiles = namedtuple('DataFiles', ['ft','st']) + +def get_exp_params(data_dir, col_map): + param_file = "%s/%s" % (data_dir, conf.DEFAULTS['params_file']) + if not os.path.isfile: + raise Exception("No param file '%s' exists!" % param_file) + + # Keep only params that uniquely identify the experiment + params = load_params(param_file) + for ignored in conf.PARAMS.itervalues(): + if ignored in params: + params.pop(ignored) + + # Track all changed params + for key in params.keys(): + col_map.try_add(key) + + return params + + +def gen_exp_data(exp_dirs, col_map): + exps = [] + for data_dir in exp_dirs: + if not os.path.isdir(data_dir): + raise IOError("Invalid experiment '%s'" % os.path.abspath(data_dir)) + + tmp_dir = data_dir + "/tmp" + if not os.path.exists(tmp_dir): + os.mkdir(tmp_dir) + + params = get_exp_params(data_dir, col_map) + st_output = st.get_st_output(data_dir, tmp_dir) + ft_output = ft.get_ft_output(data_dir, tmp_dir) + + exp_data = ExpData(data_dir, params, DataFiles(ft_output, st_output)) + exps += [exp_data] + + return exps + +def main(): + opts, args = parse_args() + + args = args or [os.getcwd()] + col_map = ColMap() + exps = gen_exp_data(args, col_map) + + table = TupleTable(col_map) + + for exp in exps: + result = ExpPoint(exp.name) + if exp.data_files.ft: + ft.get_ft_data(exp.data_files.ft, result, conf.BASE_EVENTS) + if exp.data_files.st: + st.get_sched_data(exp.data_files.st, result) + + table.add_exp(exp.params, result) + + table.write_result(opts.out_dir) + +if __name__ == '__main__': + main() -- cgit v1.2.2