diff options
| author | Jonathan Herman <hermanjl@cs.unc.edu> | 2012-09-27 19:03:22 -0400 |
|---|---|---|
| committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2012-09-27 19:03:22 -0400 |
| commit | 7c09ec981c6e06af2e62d67a609eb53728267954 (patch) | |
| tree | 76a93db7cadc452ac70eabbd52fdd87ed5fd54c4 | |
| parent | 5554e053e9f3d5f7987d3f1d889802b211af8eab (diff) | |
Added script to parse directory data, create CSVs for every chagned value.
This change also makes SchedTrace and OverheadTrace events configurable.
| -rw-r--r-- | common.py | 17 | ||||
| -rw-r--r-- | config/config.example.py | 18 | ||||
| -rw-r--r-- | experiment/tracer.py | 29 | ||||
| -rw-r--r-- | parse/__init__.py | 0 | ||||
| -rw-r--r-- | parse/dir_map.py | 104 | ||||
| -rw-r--r-- | parse/enum.py | 7 | ||||
| -rw-r--r-- | parse/ft.py | 60 | ||||
| -rw-r--r-- | parse/point.py | 135 | ||||
| -rw-r--r-- | parse/sched.py | 89 | ||||
| -rw-r--r-- | parse/tuple_table.py | 76 | ||||
| -rwxr-xr-x[-rw-r--r--] | parse_exps.py | 85 |
11 files changed, 600 insertions, 20 deletions
diff --git a/common.py b/common.py new file mode 100644 index 0000000..a09ef7c --- /dev/null +++ b/common.py | |||
| @@ -0,0 +1,17 @@ | |||
| 1 | from collections import defaultdict | ||
| 2 | |||
| 3 | def load_params(fname): | ||
| 4 | params = defaultdict(int) | ||
| 5 | with open(fname, 'r') as f: | ||
| 6 | data = f.read() | ||
| 7 | try: | ||
| 8 | parsed = eval(data) | ||
| 9 | # Convert to defaultdict | ||
| 10 | for k in parsed: | ||
| 11 | params[k] = str(parsed[k]) | ||
| 12 | except Exception as e: | ||
| 13 | raise IOError("Invalid param file: %s\n%s" % (fname, e)) | ||
| 14 | |||
| 15 | return params | ||
| 16 | |||
| 17 | |||
diff --git a/config/config.example.py b/config/config.example.py index b307687..9675f66 100644 --- a/config/config.example.py +++ b/config/config.example.py | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | from __future__ import print_function | 1 | from __future__ import print_function |
| 2 | import os | 2 | import os |
| 3 | import sys | 3 | import sys |
| 4 | import itertools | ||
| 4 | 5 | ||
| 5 | """ | 6 | """ |
| 6 | These are paths to repository directories. | 7 | These are paths to repository directories. |
| @@ -21,7 +22,8 @@ BINS = {'bespin' : '{}/bespin'.format(REPOS['liblitmus']), | |||
| 21 | 'split' : '{}/split'.format(REPOS['analysis']), | 22 | 'split' : '{}/split'.format(REPOS['analysis']), |
| 22 | 'sort' : '{}/sort-all'.format(REPOS['analysis']), | 23 | 'sort' : '{}/sort-all'.format(REPOS['analysis']), |
| 23 | 'analyze' : '{}/analyze'.format(REPOS['analysis']), | 24 | 'analyze' : '{}/analyze'.format(REPOS['analysis']), |
| 24 | 'trace-cmd' : '{}/trace-cmd'.format(REPOS['trace-cmd'])} | 25 | 'trace-cmd' : '{}/trace-cmd'.format(REPOS['trace-cmd']), |
| 26 | 'st_show' : '{}/st_show'.format(REPOS['sched_trace'])} | ||
| 25 | 27 | ||
| 26 | DEFAULTS = {'params_file' : 'params.py', | 28 | DEFAULTS = {'params_file' : 'params.py', |
| 27 | 'sched_file' : 'sched.py', | 29 | 'sched_file' : 'sched.py', |
| @@ -32,11 +34,23 @@ DEFAULTS = {'params_file' : 'params.py', | |||
| 32 | FILES = {'ft_data' : 'ft.bin', | 34 | FILES = {'ft_data' : 'ft.bin', |
| 33 | 'linux_data' : 'trace.dat', | 35 | 'linux_data' : 'trace.dat', |
| 34 | 'sched_data' : 'st-{}.bin', | 36 | 'sched_data' : 'st-{}.bin', |
| 35 | 'log_data' : 'trace.slog'} | 37 | 'log_data' : 'trace.slog',} |
| 36 | 38 | ||
| 37 | PARAMS = {'sched' : 'scheduler', | 39 | PARAMS = {'sched' : 'scheduler', |
| 38 | 'dur' : 'duration'} | 40 | 'dur' : 'duration'} |
| 39 | 41 | ||
| 42 | SCHED_EVENTS = range(501, 513) | ||
| 43 | BASE_EVENTS = ['SCHED', 'RELEASE', 'SCHED2', 'TICK', 'CXS'] | ||
| 44 | |||
| 45 | # Expand for mixed-crit | ||
| 46 | # CRIT_EVENTS = ['LVL{}_SCHED', 'LEVEL{}_RELEASE'] | ||
| 47 | # CRIT_LEVELS = ['A', 'B', 'C'] | ||
| 48 | # BASE_EVENTS += [s.format(l) for (l,s) in | ||
| 49 | # itertools.product(CRIT_LEVELS, CRIT_EVENTS)] | ||
| 50 | |||
| 51 | ALL_EVENTS = ["%s_%s" % (e, t) for (e,t) in | ||
| 52 | itertools.product(BASE_EVENTS, ["START","END"])] | ||
| 53 | |||
| 40 | valid = True | 54 | valid = True |
| 41 | for repo, loc in REPOS.items(): | 55 | for repo, loc in REPOS.items(): |
| 42 | if not os.path.isdir(loc): | 56 | if not os.path.isdir(loc): |
diff --git a/experiment/tracer.py b/experiment/tracer.py index d7743ad..ad4ebfe 100644 --- a/experiment/tracer.py +++ b/experiment/tracer.py | |||
| @@ -1,8 +1,10 @@ | |||
| 1 | import litmus_util | 1 | import litmus_util |
| 2 | import os | 2 | import os |
| 3 | import config.config as conf | ||
| 4 | |||
| 3 | from operator import methodcaller | 5 | from operator import methodcaller |
| 4 | from executable.ftcat import FTcat,Executable | 6 | from executable.ftcat import FTcat,Executable |
| 5 | from config.config import FILES,BINS | 7 | |
| 6 | 8 | ||
| 7 | class Tracer(object): | 9 | class Tracer(object): |
| 8 | def __init__(self, name, output_dir): | 10 | def __init__(self, name, output_dir): |
| @@ -27,11 +29,11 @@ class LinuxTracer(Tracer): | |||
| 27 | 29 | ||
| 28 | extra_args = ["record", "-e", "sched:sched_switch", | 30 | extra_args = ["record", "-e", "sched:sched_switch", |
| 29 | "-e", "litmus:*", | 31 | "-e", "litmus:*", |
| 30 | "-o", "%s/%s" % (output_dir, FILES['linux_data'])] | 32 | "-o", "%s/%s" % (output_dir, conf.FILES['linux_data'])] |
| 31 | stdout = open('%s/trace-cmd-stdout.txt' % self.output_dir, 'w') | 33 | stdout = open('%s/trace-cmd-stdout.txt' % self.output_dir, 'w') |
| 32 | stderr = open('%s/trace-cmd-stderr.txt' % self.output_dir, 'w') | 34 | stderr = open('%s/trace-cmd-stderr.txt' % self.output_dir, 'w') |
| 33 | 35 | ||
| 34 | execute = Executable(BINS['trace-cmd'], extra_args, stdout, stderr) | 36 | execute = Executable(conf.BINS['trace-cmd'], extra_args, stdout, stderr) |
| 35 | self.bins.append(execute) | 37 | self.bins.append(execute) |
| 36 | 38 | ||
| 37 | @staticmethod | 39 | @staticmethod |
| @@ -49,7 +51,7 @@ class LogTracer(Tracer): | |||
| 49 | def __init__(self, output_dir): | 51 | def __init__(self, output_dir): |
| 50 | super(LogTracer, self).__init__("Logger", output_dir) | 52 | super(LogTracer, self).__init__("Logger", output_dir) |
| 51 | 53 | ||
| 52 | out_file = open("%s/%s" % (self.output_dir, FILES['log_data']), 'w') | 54 | out_file = open("%s/%s" % (self.output_dir, conf.FILES['log_data']), 'w') |
| 53 | 55 | ||
| 54 | cat = (Executable("/bin/cat", [LogTracer.DEVICE_STR])) | 56 | cat = (Executable("/bin/cat", [LogTracer.DEVICE_STR])) |
| 55 | cat.stdout_file = out_file | 57 | cat.stdout_file = out_file |
| @@ -62,7 +64,6 @@ class LogTracer(Tracer): | |||
| 62 | 64 | ||
| 63 | 65 | ||
| 64 | class SchedTracer(Tracer): | 66 | class SchedTracer(Tracer): |
| 65 | EVENTS = range(501, 510) # not including 511 | ||
| 66 | DEVICE_STR = '/dev/litmus/sched_trace' | 67 | DEVICE_STR = '/dev/litmus/sched_trace' |
| 67 | 68 | ||
| 68 | def __init__(self, output_dir): | 69 | def __init__(self, output_dir): |
| @@ -74,7 +75,7 @@ class SchedTracer(Tracer): | |||
| 74 | stdout_f = open('%s/st-%d.bin' % (self.output_dir, cpu), 'w') | 75 | stdout_f = open('%s/st-%d.bin' % (self.output_dir, cpu), 'w') |
| 75 | stderr_f = open('%s/st-%d-stderr.txt' % (self.output_dir, cpu), 'w') | 76 | stderr_f = open('%s/st-%d-stderr.txt' % (self.output_dir, cpu), 'w') |
| 76 | dev = '{0}{1}'.format(SchedTracer.DEVICE_STR, cpu) | 77 | dev = '{0}{1}'.format(SchedTracer.DEVICE_STR, cpu) |
| 77 | ftc = FTcat(BINS['ftcat'], stdout_f, stderr_f, dev, SchedTracer.EVENTS, cpu=cpu) | 78 | ftc = FTcat(conf.BINS['ftcat'], stdout_f, stderr_f, dev, conf.SCHED_EVENTS, cpu=cpu) |
| 78 | 79 | ||
| 79 | self.bins.append(ftc) | 80 | self.bins.append(ftc) |
| 80 | 81 | ||
| @@ -85,22 +86,14 @@ class SchedTracer(Tracer): | |||
| 85 | 86 | ||
| 86 | class OverheadTracer(Tracer): | 87 | class OverheadTracer(Tracer): |
| 87 | DEVICE_STR = '/dev/litmus/ft_trace0' | 88 | DEVICE_STR = '/dev/litmus/ft_trace0' |
| 88 | EVENTS = [# 'SCHED_START', 'SCHED_END', 'SCHED2_START', 'SCHED2_END', | ||
| 89 | 'RELEASE_START', 'RELEASE_END', | ||
| 90 | 'LVLA_RELEASE_START', 'LVLA_RELEASE_END', | ||
| 91 | 'LVLA_SCHED_START', 'LVLA_SCHED_END', | ||
| 92 | 'LVLB_RELEASE_START', 'LVLB_RELEASE_END', | ||
| 93 | 'LVLB_SCHED_START', 'LVLB_SCHED_END', | ||
| 94 | 'LVLC_RELEASE_START', 'LVLC_RELEASE_END', | ||
| 95 | 'LVLC_SCHED_START', 'LVLC_SCHED_END'] | ||
| 96 | 89 | ||
| 97 | def __init__(self, output_dir): | 90 | def __init__(self, output_dir): |
| 98 | super(OverheadTracer, self).__init__("Overhead Trace", output_dir) | 91 | super(OverheadTracer, self).__init__("Overhead Trace", output_dir) |
| 99 | 92 | ||
| 100 | stdout_f = open('{0}/{1}'.format(self.output_dir, FILES['ft_data']), 'w') | 93 | stdout_f = open('{0}/{1}'.format(self.output_dir, conf.FILES['ft_data']), 'w') |
| 101 | stderr_f = open('{0}/{1}.stderr.txt'.format(self.output_dir, FILES['ft_data']), 'w') | 94 | stderr_f = open('{0}/{1}.stderr.txt'.format(self.output_dir, conf.FILES['ft_data']), 'w') |
| 102 | ftc = FTcat(BINS['ftcat'], stdout_f, stderr_f, | 95 | ftc = FTcat(conf.BINS['ftcat'], stdout_f, stderr_f, |
| 103 | OverheadTracer.DEVICE_STR, OverheadTracer.EVENTS) | 96 | OverheadTracer.DEVICE_STR, conf.ALL_EVENTS) |
| 104 | 97 | ||
| 105 | self.bins.append(ftc) | 98 | self.bins.append(ftc) |
| 106 | 99 | ||
diff --git a/parse/__init__.py b/parse/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/parse/__init__.py | |||
diff --git a/parse/dir_map.py b/parse/dir_map.py new file mode 100644 index 0000000..6e959f2 --- /dev/null +++ b/parse/dir_map.py | |||
| @@ -0,0 +1,104 @@ | |||
| 1 | import os | ||
| 2 | |||
| 3 | from collections import defaultdict | ||
| 4 | from point import Type | ||
| 5 | |||
| 6 | class TreeNode(object): | ||
| 7 | def __init__(self, parent = None): | ||
| 8 | self.parent = parent | ||
| 9 | self.children = defaultdict(lambda : TreeNode(self)) | ||
| 10 | self.values = [] | ||
| 11 | |||
| 12 | class DirMap(object): | ||
| 13 | def to_csv(self, vals): | ||
| 14 | val_strs = [] | ||
| 15 | for key in sorted(vals.keys()): | ||
| 16 | val_strs += ["%s=%s" % (key, vals[key])] | ||
| 17 | return "%s.csv" % ("_".join(val_strs)) | ||
| 18 | |||
| 19 | def __init__(self, out_dir): | ||
| 20 | self.root = TreeNode(None) | ||
| 21 | self.out_dir = out_dir | ||
| 22 | self.values = [] | ||
| 23 | |||
| 24 | def debug_update_node(self, path, keys, value): | ||
| 25 | self.__update_node(path, keys, value) | ||
| 26 | |||
| 27 | def __update_node(self, path, keys, value): | ||
| 28 | node = self.root | ||
| 29 | |||
| 30 | path += [ self.to_csv(keys) ] | ||
| 31 | for p in path: | ||
| 32 | node = node.children[p] | ||
| 33 | |||
| 34 | node.values += [value] | ||
| 35 | |||
| 36 | def add_point(self, vary, vary_value, keys, point): | ||
| 37 | for stat in point.get_stats(): | ||
| 38 | summary = point[stat] | ||
| 39 | |||
| 40 | for summary_type in Type: | ||
| 41 | measurement = summary[summary_type] | ||
| 42 | |||
| 43 | for base_type in Type: | ||
| 44 | if not base_type in measurement: | ||
| 45 | continue | ||
| 46 | # Ex: wcet/avg/max/vary-type/other-stuff.csv | ||
| 47 | path = [ stat, summary_type, base_type, "vary-%s" % vary ] | ||
| 48 | result = measurement[base_type] | ||
| 49 | |||
| 50 | self.__update_node(path, keys, (vary_value, result)) | ||
| 51 | |||
| 52 | |||
| 53 | |||
| 54 | def reduce(self): | ||
| 55 | def reduce2(node): | ||
| 56 | for key in node.children.keys(): | ||
| 57 | child = node.children[key] | ||
| 58 | reduce2(child) | ||
| 59 | if not (child.children or child.values): | ||
| 60 | node.children.pop(key) | ||
| 61 | |||
| 62 | if len(node.values) == 1: | ||
| 63 | node.values = [] | ||
| 64 | |||
| 65 | reduce2(self.root) | ||
| 66 | |||
| 67 | def write(self): | ||
| 68 | def write2(path, node): | ||
| 69 | out_path = "/".join(path) | ||
| 70 | if node.values: | ||
| 71 | # Leaf | ||
| 72 | with open("/".join(path), "w") as f: | ||
| 73 | arr = [",".join([str(b) for b in n]) for n in node.values] | ||
| 74 | f.write("\n".join(arr) + "\n") | ||
| 75 | elif not os.path.isdir(out_path): | ||
| 76 | os.mkdir(out_path) | ||
| 77 | |||
| 78 | for (key, child) in node.children.iteritems(): | ||
| 79 | path.append(key) | ||
| 80 | write2(path, child) | ||
| 81 | path.pop() | ||
| 82 | |||
| 83 | |||
| 84 | write2([self.out_dir], self.root) | ||
| 85 | |||
| 86 | |||
| 87 | def __str__(self): | ||
| 88 | def str2(node, level): | ||
| 89 | header = " " * level | ||
| 90 | ret = "" | ||
| 91 | if not node.children: | ||
| 92 | return "%s%s\n" % (header, str(node.values) if node.values else "") | ||
| 93 | for key,child in node.children.iteritems(): | ||
| 94 | ret += "%s/%s\n" % (header, key) | ||
| 95 | ret += str2(child, level + 1) | ||
| 96 | return ret | ||
| 97 | |||
| 98 | return "%s\n%s" % (self.out_dir, str2(self.root, 1)) | ||
| 99 | |||
| 100 | |||
| 101 | |||
| 102 | |||
| 103 | |||
| 104 | |||
diff --git a/parse/enum.py b/parse/enum.py new file mode 100644 index 0000000..bf35d01 --- /dev/null +++ b/parse/enum.py | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | class Enum(frozenset): | ||
| 2 | def __getattr__(self, name): | ||
| 3 | if name in self: | ||
| 4 | return name | ||
| 5 | raise AttributeError | ||
| 6 | |||
| 7 | |||
diff --git a/parse/ft.py b/parse/ft.py new file mode 100644 index 0000000..9837898 --- /dev/null +++ b/parse/ft.py | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | import config.config as conf | ||
| 2 | import os | ||
| 3 | import re | ||
| 4 | import shutil as sh | ||
| 5 | import subprocess | ||
| 6 | |||
| 7 | from point import Measurement,Type | ||
| 8 | |||
| 9 | def get_ft_output(data_dir, out_dir): | ||
| 10 | bin_file = conf.FILES['ft_data'] + "$" | ||
| 11 | bins = [f for f in os.listdir(data_dir) if re.match(bin_file, f)] | ||
| 12 | |||
| 13 | FT_DATA_NAME = "scheduler=x-ft" | ||
| 14 | output_file = "{}/out-ft".format(out_dir) | ||
| 15 | |||
| 16 | if os.path.isfile(output_file): | ||
| 17 | print("ft-output already exists for %s" % data_dir) | ||
| 18 | return output_file | ||
| 19 | |||
| 20 | if len(bins) != 0: | ||
| 21 | err_file = open("%s/err-ft" % out_dir, 'w') | ||
| 22 | # Need to make a copy of the original data file so scripts can change it | ||
| 23 | sh.copyfile("{}/{}".format(data_dir, bins[0]), | ||
| 24 | "{}/{}".format(out_dir, FT_DATA_NAME)) | ||
| 25 | |||
| 26 | subprocess.call([conf.BINS['sort'], FT_DATA_NAME], | ||
| 27 | cwd=out_dir, stderr=err_file, stdout=err_file) | ||
| 28 | subprocess.call([conf.BINS['split'], FT_DATA_NAME], | ||
| 29 | cwd=out_dir, stderr=err_file, stdout=err_file) | ||
| 30 | |||
| 31 | # Previous subprocesses just spit out all these intermediate files | ||
| 32 | bins = [f for f in os.listdir(out_dir) if re.match(".*overhead=.*bin", f)] | ||
| 33 | bins = [f for f in bins if os.stat("%s/%s"%(out_dir, f)).st_size] | ||
| 34 | |||
| 35 | # Analyze will summarize those | ||
| 36 | cmd_arr = [conf.BINS['analyze']] | ||
| 37 | cmd_arr.extend(bins) | ||
| 38 | with open(output_file, "w") as f: | ||
| 39 | subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file) | ||
| 40 | else: | ||
| 41 | return None | ||
| 42 | return output_file | ||
| 43 | |||
| 44 | def get_ft_data(data_file, result, overheads): | ||
| 45 | rstr = r",(?:\s+[^\s]+){3}.*?([\d\.]+).*?([\d\.]+),(?:\s+[^\s]+){3}.*?([\d\.]+)" | ||
| 46 | |||
| 47 | with open(data_file) as f: | ||
| 48 | data = f.read() | ||
| 49 | |||
| 50 | for ovh in overheads: | ||
| 51 | measure = Measurement("%s-%s" % (data_file, ovh)) | ||
| 52 | vals = re.findall(".*{}".format(ovh) + rstr, data); | ||
| 53 | if len(vals) != 0: | ||
| 54 | vals = vals[0] | ||
| 55 | measure[Type.Max] = float(vals[0]) | ||
| 56 | measure[Type.Avg] = float(vals[1]) | ||
| 57 | measure[Type.Var] = float(vals[2]) | ||
| 58 | result[ovh] = measure | ||
| 59 | |||
| 60 | return result | ||
diff --git a/parse/point.py b/parse/point.py new file mode 100644 index 0000000..4343d03 --- /dev/null +++ b/parse/point.py | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | """ | ||
| 2 | Too much duplicate code in this file | ||
| 3 | """ | ||
| 4 | |||
| 5 | import copy | ||
| 6 | import numpy as np | ||
| 7 | from enum import Enum | ||
| 8 | from collections import defaultdict | ||
| 9 | |||
| 10 | Type = Enum(['Min','Max','Avg','Var']) | ||
| 11 | default_typemap = {Type.Max : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1}, | ||
| 12 | Type.Min : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1}, | ||
| 13 | Type.Avg : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1}} | ||
| 14 | |||
| 15 | def make_typemap(): | ||
| 16 | return copy.deepcopy(default_typemap) | ||
| 17 | |||
| 18 | def dict_str(adict, sep = "\n"): | ||
| 19 | return sep.join(["%s: %s" % (k, str(v)) for (k,v) in adict.iteritems()]) | ||
| 20 | |||
| 21 | class Measurement(object): | ||
| 22 | def __init__(self, id = None, kv = {}): | ||
| 23 | self.id = id | ||
| 24 | self.stats = {} | ||
| 25 | for k, v in kv.iteritems(): | ||
| 26 | self[k] = v | ||
| 27 | |||
| 28 | def from_array(self,array): | ||
| 29 | array = np.array(array) | ||
| 30 | self[Type.Max] = array.max() | ||
| 31 | self[Type.Avg] = array.mean() | ||
| 32 | self[Type.Var] = array.var() | ||
| 33 | return self | ||
| 34 | |||
| 35 | def __check_type(self, type): | ||
| 36 | if not type in Type: | ||
| 37 | raise AttributeError("Not a valid type '%s'" % type) | ||
| 38 | |||
| 39 | def __getitem__(self, type): | ||
| 40 | self.__check_type(type) | ||
| 41 | return self.stats[type] | ||
| 42 | |||
| 43 | def __iter__(self): | ||
| 44 | return self.stats.iteritems() | ||
| 45 | |||
| 46 | def __contains__(self, type): | ||
| 47 | self.__check_type(type) | ||
| 48 | return type in self.stats | ||
| 49 | |||
| 50 | def __setitem__(self, type, value): | ||
| 51 | self.__check_type(type) | ||
| 52 | self.stats[type] = value | ||
| 53 | |||
| 54 | def __str__(self): | ||
| 55 | return "<Measurement-%s> %s" % (self.id, dict_str(self.stats, " ")) | ||
| 56 | |||
| 57 | |||
| 58 | class Summary(Measurement): | ||
| 59 | def __init__(self, id, measures, typemap = default_typemap): | ||
| 60 | super(Summary, self).__init__("Summary-%s" % id) | ||
| 61 | |||
| 62 | self.__check_types(measures, typemap) | ||
| 63 | self.__summarize(measures, typemap) | ||
| 64 | |||
| 65 | def __check_types(self, measures, typemap): | ||
| 66 | required_types = self.__get_required(typemap) | ||
| 67 | for m in measures: | ||
| 68 | for type in required_types: | ||
| 69 | if type not in m: | ||
| 70 | raise ValueError("measurement '%s' missing type '%s'" % | ||
| 71 | (self.id, type)) | ||
| 72 | |||
| 73 | def __summarize(self, measures, typemap): | ||
| 74 | for sum_type in Type: | ||
| 75 | self[sum_type] = Measurement(self.id) | ||
| 76 | |||
| 77 | def avg(vals): | ||
| 78 | return sum(vals) / len(vals) | ||
| 79 | |||
| 80 | for base_type in Type: | ||
| 81 | for sum_type, func in (Type.Min,min),(Type.Max,max),(Type.Avg, avg): | ||
| 82 | if typemap[sum_type][base_type]: | ||
| 83 | val = func([m[base_type] for m in measures]) | ||
| 84 | self[sum_type][base_type] = val | ||
| 85 | |||
| 86 | def __get_required(self, typemap): | ||
| 87 | required = [] | ||
| 88 | for base_type in Type: | ||
| 89 | matches = [t[base_type] for t in typemap.itervalues()] | ||
| 90 | if bool(sum(matches)): | ||
| 91 | required += [base_type] | ||
| 92 | return required | ||
| 93 | |||
| 94 | class ExpPoint(object): | ||
| 95 | def __init__(self, id = "", init = {}): | ||
| 96 | self.stats = {} | ||
| 97 | for type, value in init.iteritems(): | ||
| 98 | self[type] = value | ||
| 99 | self.id = id | ||
| 100 | |||
| 101 | def __check_val(self, obj): | ||
| 102 | if not isinstance(obj, Measurement): | ||
| 103 | raise AttributeError("Not a valid measurement '%s'" % obj) | ||
| 104 | |||
| 105 | def __getitem__(self, type): | ||
| 106 | return self.stats[type] | ||
| 107 | |||
| 108 | def __iter__(self): | ||
| 109 | return self.stats.iteritems() | ||
| 110 | |||
| 111 | def __contains__(self, type): | ||
| 112 | return type in self.stats | ||
| 113 | |||
| 114 | def __setitem__(self, type, value): | ||
| 115 | self.__check_val(value) | ||
| 116 | self.stats[type] = value | ||
| 117 | |||
| 118 | def __str__(self): | ||
| 119 | return "<ExpPoint-%s>\n%s" % (self.id, dict_str(self.stats)) | ||
| 120 | |||
| 121 | def get_stats(self): | ||
| 122 | return self.stats.keys() | ||
| 123 | |||
| 124 | class SummaryPoint(ExpPoint): | ||
| 125 | def __init__(self, id, points, typemap = default_typemap): | ||
| 126 | super(SummaryPoint,self).__init__("Summary-%s" % id) | ||
| 127 | |||
| 128 | grouped = defaultdict(lambda : []) | ||
| 129 | |||
| 130 | for exp in points: | ||
| 131 | for name,measure in exp.stats.iteritems(): | ||
| 132 | grouped[name] += [measure] | ||
| 133 | |||
| 134 | for key in grouped.iterkeys(): | ||
| 135 | self[key] = Summary(key, grouped[key], typemap) | ||
diff --git a/parse/sched.py b/parse/sched.py new file mode 100644 index 0000000..ec4d917 --- /dev/null +++ b/parse/sched.py | |||
| @@ -0,0 +1,89 @@ | |||
| 1 | import config.config as conf | ||
| 2 | import os | ||
| 3 | import re | ||
| 4 | import numpy as np | ||
| 5 | import subprocess | ||
| 6 | |||
| 7 | from collections import namedtuple | ||
| 8 | from point import Measurement | ||
| 9 | |||
| 10 | Task = namedtuple('Task', ['pid', 'period']) | ||
| 11 | |||
| 12 | def get_st_output(data_dir, out_dir): | ||
| 13 | bin_files = conf.FILES['sched_data'].format(".*") | ||
| 14 | bins = [f for f in os.listdir(data_dir) if re.match(bin_files, f)] | ||
| 15 | |||
| 16 | output_file = "%s/out-st" % out_dir | ||
| 17 | |||
| 18 | if os.path.isfile(output_file): | ||
| 19 | return output_file | ||
| 20 | |||
| 21 | if len(bins) != 0: | ||
| 22 | cmd_arr = [conf.BINS['st_show']] | ||
| 23 | cmd_arr.extend(bins) | ||
| 24 | with open(output_file, "w") as f: | ||
| 25 | subprocess.call(cmd_arr, cwd=data_dir, stdout=f) | ||
| 26 | else: | ||
| 27 | return None | ||
| 28 | return output_file | ||
| 29 | |||
| 30 | def get_tasks(data): | ||
| 31 | reg = r"PARAM.*?(\d+).*?cost:\s+[\d\.]+ms.*?period.*?([\d.]+)" | ||
| 32 | return [Task(x[0], x[1]) for x in re.findall(reg, data)] | ||
| 33 | |||
| 34 | def extract_tardy_vals(data, exp_point): | ||
| 35 | ratios = [] | ||
| 36 | tards = [] | ||
| 37 | |||
| 38 | for t in get_tasks(data): | ||
| 39 | reg = r"TARDY.*?" + t.pid + "/(\d+).*?Tot.*?([\d.]+).*?ms.*([\d.]+).*?ms.*?([\d.]+)" | ||
| 40 | matches = re.findall(reg, data) | ||
| 41 | if len(matches) != 0: | ||
| 42 | jobs = float(matches[0][0]) | ||
| 43 | total_tard = float(matches[0][1]) | ||
| 44 | # max_tard = float(matches[0][2]) | ||
| 45 | misses = float(matches[0][3]) | ||
| 46 | rel_tard = (total_tard / jobs) / float(t.period) | ||
| 47 | if misses != 0: | ||
| 48 | miss_ratio = (misses / jobs) | ||
| 49 | else: | ||
| 50 | miss_ratio = 0 | ||
| 51 | |||
| 52 | ratios.append(miss_ratio) | ||
| 53 | tards.append(rel_tard) | ||
| 54 | |||
| 55 | for (array, name) in ((tards, "rel-tard"), (ratios, "miss-ratio")): | ||
| 56 | exp_point[name] = Measurement().from_array(array) | ||
| 57 | |||
| 58 | def extract_variance(data, exp_point): | ||
| 59 | varz = [] | ||
| 60 | for t in get_tasks(data): | ||
| 61 | reg = r"COMPLETION.*?" + t.pid + r".*?([\d\.]+)ms" | ||
| 62 | matches = re.findall(reg, data) | ||
| 63 | |||
| 64 | if len(matches) == 0: | ||
| 65 | return 0 | ||
| 66 | |||
| 67 | job_times = np.array(filter(lambda x: float(x) != 0, matches), dtype=np.float) | ||
| 68 | |||
| 69 | # Coefficient of variation | ||
| 70 | cv = job_times.std() / job_times.mean() | ||
| 71 | # Correction, assuming normal distributions | ||
| 72 | corrected = (1 + 1/(4 * len(job_times))) * cv | ||
| 73 | |||
| 74 | varz.append(corrected) | ||
| 75 | |||
| 76 | exp_point['var'] = Measurement().from_array(varz) | ||
| 77 | |||
| 78 | def get_sched_data(data_file, result): | ||
| 79 | with open(data_file, 'r') as f: | ||
| 80 | data = f.read() | ||
| 81 | |||
| 82 | # if conf != BASE: | ||
| 83 | # (our_values, their_values) = extract_exec_vals(our_data, their_data) | ||
| 84 | # conf_result = get_stats(our_values, their_values) | ||
| 85 | # for key in conf_result.keys(): | ||
| 86 | # result[key][conf] = conf_result[key] | ||
| 87 | |||
| 88 | extract_tardy_vals(data, result) | ||
| 89 | extract_variance(data, result) | ||
diff --git a/parse/tuple_table.py b/parse/tuple_table.py new file mode 100644 index 0000000..df80b37 --- /dev/null +++ b/parse/tuple_table.py | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | from collections import defaultdict | ||
| 2 | from point import SummaryPoint | ||
| 3 | from dir_map import DirMap | ||
| 4 | |||
| 5 | class ColMap(object): | ||
| 6 | def __init__(self): | ||
| 7 | self.rev_map = {} | ||
| 8 | self.col_list = [] | ||
| 9 | |||
| 10 | def columns(self): | ||
| 11 | return self.col_list | ||
| 12 | |||
| 13 | def get_key(self, kv): | ||
| 14 | key = () | ||
| 15 | added = 0 | ||
| 16 | |||
| 17 | for col in self.col_list: | ||
| 18 | if col not in kv: | ||
| 19 | key += (None,) | ||
| 20 | else: | ||
| 21 | added += 1 | ||
| 22 | key += (kv[col],) | ||
| 23 | |||
| 24 | if added != len(kv): | ||
| 25 | raise Exception("column map '%s' missed field in map\n%s" % | ||
| 26 | (self.col_list, kv)) | ||
| 27 | |||
| 28 | return key | ||
| 29 | |||
| 30 | def get_map(self, tuple): | ||
| 31 | map = {} | ||
| 32 | for i in range(0, len(tuple)): | ||
| 33 | map[self.col_list[i]] = tuple[i] | ||
| 34 | return map | ||
| 35 | |||
| 36 | def try_add(self, column): | ||
| 37 | if column not in self.rev_map: | ||
| 38 | self.rev_map[column] = len(self.col_list) | ||
| 39 | self.col_list += [column] | ||
| 40 | |||
| 41 | def __str__(self): | ||
| 42 | return "<ColMap>%s" % (self.rev_map) | ||
| 43 | |||
| 44 | class TupleTable(object): | ||
| 45 | def __init__(self, col_map): | ||
| 46 | self.col_map = col_map | ||
| 47 | self.table = defaultdict(lambda: []) | ||
| 48 | self.reduced = False | ||
| 49 | |||
| 50 | def add_exp(self, kv, point): | ||
| 51 | key = self.col_map.get_key(kv) | ||
| 52 | self.table[key] += [point] | ||
| 53 | |||
| 54 | def __reduce(self): | ||
| 55 | if self.reduced: | ||
| 56 | raise Exception("cannot reduce twice!") | ||
| 57 | self.reduced = True | ||
| 58 | for key, values in self.table.iteritems(): | ||
| 59 | self.table[key] = SummaryPoint(key, values) | ||
| 60 | |||
| 61 | def write_result(self, out_dir): | ||
| 62 | dir_map = DirMap(out_dir) | ||
| 63 | self.__reduce() | ||
| 64 | for key, point in self.table.iteritems(): | ||
| 65 | kv = self.col_map.get_map(key) | ||
| 66 | |||
| 67 | for col in self.col_map.columns(): | ||
| 68 | val = kv[col] | ||
| 69 | kv.pop(col) | ||
| 70 | |||
| 71 | dir_map.add_point(col, val, kv, point) | ||
| 72 | |||
| 73 | kv[col] = val | ||
| 74 | |||
| 75 | dir_map.reduce() | ||
| 76 | dir_map.write() | ||
diff --git a/parse_exps.py b/parse_exps.py index e69de29..6a7d14f 100644..100755 --- a/parse_exps.py +++ b/parse_exps.py | |||
| @@ -0,0 +1,85 @@ | |||
| 1 | #!/usr/bin/env python | ||
| 2 | from __future__ import print_function | ||
| 3 | |||
| 4 | import config.config as conf | ||
| 5 | import os | ||
| 6 | |||
| 7 | import parse.ft as ft | ||
| 8 | import parse.sched as st | ||
| 9 | |||
| 10 | from collections import namedtuple | ||
| 11 | from common import load_params | ||
| 12 | from optparse import OptionParser | ||
| 13 | from parse.tuple_table import ColMap,TupleTable | ||
| 14 | from parse.point import ExpPoint | ||
| 15 | |||
| 16 | def parse_args(): | ||
| 17 | parser = OptionParser("usage: %prog [options] [data_dir]...") | ||
| 18 | |||
| 19 | parser.add_option('-o', '--out-dir', dest='out_dir', | ||
| 20 | help='directory for data output', default=os.getcwd()) | ||
| 21 | |||
| 22 | return parser.parse_args() | ||
| 23 | |||
| 24 | ExpData = namedtuple('ExpData', ['name', 'params', 'data_files']) | ||
| 25 | DataFiles = namedtuple('DataFiles', ['ft','st']) | ||
| 26 | |||
| 27 | def get_exp_params(data_dir, col_map): | ||
| 28 | param_file = "%s/%s" % (data_dir, conf.DEFAULTS['params_file']) | ||
| 29 | if not os.path.isfile: | ||
| 30 | raise Exception("No param file '%s' exists!" % param_file) | ||
| 31 | |||
| 32 | # Keep only params that uniquely identify the experiment | ||
| 33 | params = load_params(param_file) | ||
| 34 | for ignored in conf.PARAMS.itervalues(): | ||
| 35 | if ignored in params: | ||
| 36 | params.pop(ignored) | ||
| 37 | |||
| 38 | # Track all changed params | ||
| 39 | for key in params.keys(): | ||
| 40 | col_map.try_add(key) | ||
| 41 | |||
| 42 | return params | ||
| 43 | |||
| 44 | |||
| 45 | def gen_exp_data(exp_dirs, col_map): | ||
| 46 | exps = [] | ||
| 47 | for data_dir in exp_dirs: | ||
| 48 | if not os.path.isdir(data_dir): | ||
| 49 | raise IOError("Invalid experiment '%s'" % os.path.abspath(data_dir)) | ||
| 50 | |||
| 51 | tmp_dir = data_dir + "/tmp" | ||
| 52 | if not os.path.exists(tmp_dir): | ||
| 53 | os.mkdir(tmp_dir) | ||
| 54 | |||
| 55 | params = get_exp_params(data_dir, col_map) | ||
| 56 | st_output = st.get_st_output(data_dir, tmp_dir) | ||
| 57 | ft_output = ft.get_ft_output(data_dir, tmp_dir) | ||
| 58 | |||
| 59 | exp_data = ExpData(data_dir, params, DataFiles(ft_output, st_output)) | ||
| 60 | exps += [exp_data] | ||
| 61 | |||
| 62 | return exps | ||
| 63 | |||
| 64 | def main(): | ||
| 65 | opts, args = parse_args() | ||
| 66 | |||
| 67 | args = args or [os.getcwd()] | ||
| 68 | col_map = ColMap() | ||
| 69 | exps = gen_exp_data(args, col_map) | ||
| 70 | |||
| 71 | table = TupleTable(col_map) | ||
| 72 | |||
| 73 | for exp in exps: | ||
| 74 | result = ExpPoint(exp.name) | ||
| 75 | if exp.data_files.ft: | ||
| 76 | ft.get_ft_data(exp.data_files.ft, result, conf.BASE_EVENTS) | ||
| 77 | if exp.data_files.st: | ||
| 78 | st.get_sched_data(exp.data_files.st, result) | ||
| 79 | |||
| 80 | table.add_exp(exp.params, result) | ||
| 81 | |||
| 82 | table.write_result(opts.out_dir) | ||
| 83 | |||
| 84 | if __name__ == '__main__': | ||
| 85 | main() | ||
