7 files changed, 471 insertions, 0 deletions
diff --git a/parse/__init__.py b/parse/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/parse/__init__.py
diff --git a/parse/dir_map.py b/parse/dir_map.py
new file mode 100644
index 0000000..6e959f2
--- /dev/null
+++ b/parse/dir_map.py
@@ -0,0 +1,104 @@
+import os
+from collections import defaultdict
+from point import Type
+class TreeNode(object):
+    def __init__(self, parent = None):
+        self.parent = parent
+        self.children = defaultdict(lambda : TreeNode(self))
+        self.values = []
+class DirMap(object):
+    def to_csv(self, vals):
+        val_strs = []
+        for key in sorted(vals.keys()):
+            val_strs += ["%s=%s" % (key, vals[key])]
+        return "%s.csv" % ("_".join(val_strs))
+    
+    def __init__(self, out_dir):
+        self.root = TreeNode(None)
+        self.out_dir = out_dir
+        self.values  = []
+    def debug_update_node(self, path, keys, value):
+        self.__update_node(path, keys, value)
+    def __update_node(self, path, keys, value):
+        node = self.root
+        path += [ self.to_csv(keys) ]
+        for p in path:
+            node = node.children[p]
+        node.values += [value]
+    def add_point(self, vary, vary_value, keys, point):
+        for stat in point.get_stats():
+            summary = point[stat]
+            for summary_type in Type:
+                measurement = summary[summary_type]
+                for base_type in Type:
+                    if not base_type in measurement:
+                        continue
+                    # Ex: wcet/avg/max/vary-type/other-stuff.csv
+                    path  = [ stat, summary_type, base_type, "vary-%s" % vary ]
+                    result = measurement[base_type]
+                    self.__update_node(path, keys, (vary_value, result))
+                        
+    def reduce(self):
+        def reduce2(node):
+            for key in node.children.keys():
+                child = node.children[key]
+                reduce2(child)
+                if not (child.children or child.values):
+                    node.children.pop(key)
+            if len(node.values) == 1:
+                node.values = []
+        reduce2(self.root)
+    def write(self):    
+        def write2(path, node):
+            out_path = "/".join(path)
+            if node.values:
+                # Leaf
+                with open("/".join(path), "w") as f:
+                    arr = [",".join([str(b) for b in n]) for n in node.values]
+                    f.write("\n".join(arr) + "\n")
+            elif not os.path.isdir(out_path):
+                os.mkdir(out_path)        
+            for (key, child) in node.children.iteritems():
+                path.append(key)
+                write2(path, child)
+                path.pop()
+        write2([self.out_dir], self.root)
+    def __str__(self):
+        def str2(node, level):
+            header = "  " * level
+            ret = ""
+            if not node.children:
+                return "%s%s\n" % (header, str(node.values) if node.values else "")
+            for key,child in node.children.iteritems():
+                ret += "%s/%s\n" % (header, key)
+                ret += str2(child, level + 1)
+            return ret
+            
+        return "%s\n%s" % (self.out_dir, str2(self.root, 1))
+       
+        
+                
+        
+                
+            
diff --git a/parse/enum.py b/parse/enum.py
new file mode 100644
index 0000000..bf35d01
--- /dev/null
+++ b/parse/enum.py
@@ -0,0 +1,7 @@
+class Enum(frozenset):
+    def __getattr__(self, name):
+        if name in self:
+            return name
+        raise AttributeError
+    
diff --git a/parse/ft.py b/parse/ft.py
new file mode 100644
index 0000000..9837898
--- /dev/null
+++ b/parse/ft.py
@@ -0,0 +1,60 @@
+import config.config as conf
+import os
+import re
+import shutil as sh
+import subprocess
+from point import Measurement,Type
+def get_ft_output(data_dir, out_dir):
+    bin_file = conf.FILES['ft_data'] + "$"
+    bins = [f for f in os.listdir(data_dir) if re.match(bin_file, f)]
+    FT_DATA_NAME = "scheduler=x-ft"
+    output_file  = "{}/out-ft".format(out_dir)
+    
+    if os.path.isfile(output_file):
+        print("ft-output already exists for %s" % data_dir)
+        return output_file
+    if len(bins) != 0:
+        err_file = open("%s/err-ft" % out_dir, 'w')
+        # Need to make a copy of the original data file so scripts can change it
+        sh.copyfile("{}/{}".format(data_dir, bins[0]),
+                    "{}/{}".format(out_dir, FT_DATA_NAME))
+        subprocess.call([conf.BINS['sort'], FT_DATA_NAME],
+                        cwd=out_dir, stderr=err_file, stdout=err_file)
+        subprocess.call([conf.BINS['split'], FT_DATA_NAME],
+                        cwd=out_dir, stderr=err_file, stdout=err_file)
+        # Previous subprocesses just spit out all these intermediate files
+        bins = [f for f in os.listdir(out_dir) if re.match(".*overhead=.*bin", f)]
+        bins = [f for f in bins if os.stat("%s/%s"%(out_dir, f)).st_size]
+        # Analyze will summarize those
+        cmd_arr = [conf.BINS['analyze']]
+        cmd_arr.extend(bins)
+        with open(output_file, "w") as f:
+            subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file)
+    else:
+        return None
+    return output_file
+def get_ft_data(data_file, result, overheads):
+    rstr = r",(?:\s+[^\s]+){3}.*?([\d\.]+).*?([\d\.]+),(?:\s+[^\s]+){3}.*?([\d\.]+)"
+    with open(data_file) as f:
+        data = f.read()
+    for ovh in overheads:
+        measure = Measurement("%s-%s" % (data_file, ovh))
+        vals = re.findall(".*{}".format(ovh) + rstr, data);
+        if len(vals) != 0:
+            vals = vals[0]
+            measure[Type.Max] = float(vals[0])
+            measure[Type.Avg] = float(vals[1])
+            measure[Type.Var] = float(vals[2])
+            result[ovh] = measure
+    return result
diff --git a/parse/point.py b/parse/point.py
new file mode 100644
index 0000000..4343d03
--- /dev/null
+++ b/parse/point.py
@@ -0,0 +1,135 @@
+"""
+Too much duplicate code in this file
+"""
+import copy
+import numpy as np
+from enum import Enum
+from collections import defaultdict
+Type = Enum(['Min','Max','Avg','Var'])
+default_typemap = {Type.Max : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1},
+                   Type.Min : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1},
+                   Type.Avg : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1}}
+def make_typemap():
+    return copy.deepcopy(default_typemap)
+def dict_str(adict, sep = "\n"):
+    return sep.join(["%s: %s" % (k, str(v)) for (k,v) in adict.iteritems()])
+class Measurement(object):
+    def __init__(self, id = None, kv = {}):
+        self.id = id
+        self.stats = {}
+        for k, v in kv.iteritems():
+            self[k] = v
+    def from_array(self,array):
+        array = np.array(array)
+        self[Type.Max] = array.max()
+        self[Type.Avg] = array.mean()
+        self[Type.Var] = array.var()
+        return self
+    def __check_type(self, type):
+        if not type in Type:
+            raise AttributeError("Not a valid type '%s'" % type)
+        
+    def __getitem__(self, type):
+        self.__check_type(type)
+        return self.stats[type]
+    def __iter__(self):
+        return self.stats.iteritems()
+    def __contains__(self, type):
+        self.__check_type(type)
+        return type in self.stats
+    def __setitem__(self, type, value):
+        self.__check_type(type)
+        self.stats[type] = value
+    def __str__(self):
+        return "<Measurement-%s> %s" % (self.id, dict_str(self.stats, " "))
+            
+class Summary(Measurement):
+    def __init__(self, id, measures, typemap = default_typemap):
+        super(Summary, self).__init__("Summary-%s" % id)
+        self.__check_types(measures, typemap)
+        self.__summarize(measures, typemap)
+    def __check_types(self, measures, typemap):
+        required_types = self.__get_required(typemap)
+        for m in measures:
+            for type in required_types:
+                if type not in m:
+                    raise ValueError("measurement '%s' missing type '%s'" %
+                                     (self.id, type))
+    def __summarize(self, measures, typemap):
+        for sum_type in Type:
+            self[sum_type] = Measurement(self.id)
+        def avg(vals):
+            return sum(vals) / len(vals)
+        for base_type in Type:
+            for sum_type, func in (Type.Min,min),(Type.Max,max),(Type.Avg, avg):
+                if typemap[sum_type][base_type]:
+                    val = func([m[base_type] for m in measures])
+                    self[sum_type][base_type] = val
+    def __get_required(self, typemap):
+        required = []
+        for base_type in Type:
+            matches = [t[base_type] for t in typemap.itervalues()]
+            if bool(sum(matches)):
+                required += [base_type]
+        return required
+class ExpPoint(object):
+    def __init__(self, id = "", init = {}):
+        self.stats = {}
+        for type, value in init.iteritems():
+            self[type] = value
+        self.id = id
+    def __check_val(self, obj):
+        if not isinstance(obj, Measurement):
+            raise AttributeError("Not a valid measurement '%s'" % obj)
+        
+    def __getitem__(self, type):
+        return self.stats[type]
+    def __iter__(self):
+        return self.stats.iteritems()
+    def __contains__(self, type):
+        return type in self.stats
+    def __setitem__(self, type, value):
+        self.__check_val(value)
+        self.stats[type] = value
+    def __str__(self):
+        return "<ExpPoint-%s>\n%s" % (self.id, dict_str(self.stats))
+    def get_stats(self):
+        return self.stats.keys()
+class SummaryPoint(ExpPoint):
+    def __init__(self, id, points, typemap = default_typemap):
+        super(SummaryPoint,self).__init__("Summary-%s" % id)
+        grouped = defaultdict(lambda : [])
+        for exp in points:
+            for name,measure in exp.stats.iteritems():
+                grouped[name] += [measure]
+        for key in grouped.iterkeys():
+            self[key] = Summary(key, grouped[key], typemap)
diff --git a/parse/sched.py b/parse/sched.py
new file mode 100644
index 0000000..ec4d917
--- /dev/null
+++ b/parse/sched.py
@@ -0,0 +1,89 @@
+import config.config as conf
+import os
+import re
+import numpy as np
+import subprocess
+from collections import namedtuple
+from point import Measurement
+Task = namedtuple('Task', ['pid', 'period'])
+def get_st_output(data_dir, out_dir):
+    bin_files = conf.FILES['sched_data'].format(".*")
+    bins = [f for f in os.listdir(data_dir) if re.match(bin_files, f)]
+    output_file = "%s/out-st" % out_dir
+    if os.path.isfile(output_file):
+        return output_file
+    if len(bins) != 0:
+        cmd_arr = [conf.BINS['st_show']]
+        cmd_arr.extend(bins)
+        with open(output_file, "w") as f:
+            subprocess.call(cmd_arr, cwd=data_dir, stdout=f)
+    else:
+        return None
+    return output_file
+def get_tasks(data):
+    reg = r"PARAM.*?(\d+).*?cost:\s+[\d\.]+ms.*?period.*?([\d.]+)"
+    return [Task(x[0], x[1]) for x in re.findall(reg, data)]
+def extract_tardy_vals(data, exp_point):
+    ratios = []
+    tards = []
+    for t in get_tasks(data):
+        reg = r"TARDY.*?" + t.pid + "/(\d+).*?Tot.*?([\d.]+).*?ms.*([\d.]+).*?ms.*?([\d.]+)"
+        matches = re.findall(reg, data)
+        if len(matches) != 0:
+            jobs = float(matches[0][0])
+            total_tard = float(matches[0][1])
+            # max_tard = float(matches[0][2])
+            misses = float(matches[0][3])
+            rel_tard = (total_tard / jobs) / float(t.period)
+            if misses != 0:
+                miss_ratio = (misses / jobs)
+            else:
+                miss_ratio = 0
+            ratios.append(miss_ratio)
+            tards.append(rel_tard)
+    for (array, name) in ((tards, "rel-tard"), (ratios, "miss-ratio")):
+        exp_point[name] = Measurement().from_array(array)
+def extract_variance(data, exp_point):
+    varz = []
+    for t in get_tasks(data):
+        reg = r"COMPLETION.*?" + t.pid + r".*?([\d\.]+)ms"
+        matches = re.findall(reg, data)
+        if len(matches) == 0:
+            return 0
+        job_times = np.array(filter(lambda x: float(x) != 0, matches), dtype=np.float)
+        # Coefficient of variation
+        cv = job_times.std() / job_times.mean()
+        # Correction, assuming normal distributions
+        corrected = (1 + 1/(4 * len(job_times))) * cv
+        varz.append(corrected)
+    exp_point['var'] = Measurement().from_array(varz)
+def get_sched_data(data_file, result):
+    with open(data_file, 'r') as f:
+        data = f.read()
+        # if conf != BASE:
+        #     (our_values, their_values) = extract_exec_vals(our_data, their_data)
+        #     conf_result = get_stats(our_values, their_values)
+        #     for key in conf_result.keys():
+        #         result[key][conf] = conf_result[key]
+        extract_tardy_vals(data, result)
+        extract_variance(data, result)
diff --git a/parse/tuple_table.py b/parse/tuple_table.py
new file mode 100644
index 0000000..df80b37
--- /dev/null
+++ b/parse/tuple_table.py
@@ -0,0 +1,76 @@
+from collections import defaultdict
+from point import SummaryPoint
+from dir_map import DirMap
+class ColMap(object):
+    def __init__(self):
+        self.rev_map = {}
+        self.col_list = []
+    def columns(self):
+        return self.col_list
+    def get_key(self, kv):
+        key = ()
+        added = 0
+        
+        for col in self.col_list:
+            if col not in kv:
+                key += (None,)
+            else:
+                added += 1
+                key += (kv[col],)
+        if added != len(kv):
+            raise Exception("column map '%s' missed field in map\n%s" %
+                            (self.col_list, kv))
+               
+        return key
+    def get_map(self, tuple):
+        map = {}
+        for i in range(0, len(tuple)):
+            map[self.col_list[i]] = tuple[i]
+        return map
+    def try_add(self, column):
+        if column not in self.rev_map:
+            self.rev_map[column] = len(self.col_list)
+            self.col_list += [column]
+    def __str__(self):
+        return "<ColMap>%s" % (self.rev_map)
+    
+class TupleTable(object):
+    def __init__(self, col_map):
+        self.col_map = col_map
+        self.table = defaultdict(lambda: [])
+        self.reduced = False
+    def add_exp(self, kv, point):
+        key = self.col_map.get_key(kv)
+        self.table[key] += [point]
+    def __reduce(self):
+        if self.reduced:
+            raise Exception("cannot reduce twice!")
+        self.reduced = True
+        for key, values in self.table.iteritems():
+            self.table[key] = SummaryPoint(key, values)
+    def write_result(self, out_dir):
+        dir_map = DirMap(out_dir)
+        self.__reduce()
+        for key, point in self.table.iteritems():
+            kv = self.col_map.get_map(key)
+            for col in self.col_map.columns():
+                val = kv[col]
+                kv.pop(col)
+                dir_map.add_point(col, val, kv, point)
+                kv[col] = val
+        dir_map.reduce()
+        dir_map.write()

diff --git a/parse/__init__.py b/parse/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/parse/__init__.py


diff --git a/parse/dir_map.py b/parse/dir_map.py new file mode 100644 index 0000000..6e959f2 --- /dev/null +++ b/parse/dir_map.py
@@ -0,0 +1,104 @@
	1	import os
	2
	3	from collections import defaultdict
	4	from point import Type
	5
	6	class TreeNode(object):
	7	def __init__(self, parent = None):
	8	self.parent = parent
	9	self.children = defaultdict(lambda : TreeNode(self))
	10	self.values = []
	11
	12	class DirMap(object):
	13	def to_csv(self, vals):
	14	val_strs = []
	15	for key in sorted(vals.keys()):
	16	val_strs += ["%s=%s" % (key, vals[key])]
	17	return "%s.csv" % ("_".join(val_strs))
	18
	19	def __init__(self, out_dir):
	20	self.root = TreeNode(None)
	21	self.out_dir = out_dir
	22	self.values = []
	23
	24	def debug_update_node(self, path, keys, value):
	25	self.__update_node(path, keys, value)
	26
	27	def __update_node(self, path, keys, value):
	28	node = self.root
	29
	30	path += [ self.to_csv(keys) ]
	31	for p in path:
	32	node = node.children[p]
	33
	34	node.values += [value]
	35
	36	def add_point(self, vary, vary_value, keys, point):
	37	for stat in point.get_stats():
	38	summary = point[stat]
	39
	40	for summary_type in Type:
	41	measurement = summary[summary_type]
	42
	43	for base_type in Type:
	44	if not base_type in measurement:
	45	continue
	46	# Ex: wcet/avg/max/vary-type/other-stuff.csv
	47	path = [ stat, summary_type, base_type, "vary-%s" % vary ]
	48	result = measurement[base_type]
	49
	50	self.__update_node(path, keys, (vary_value, result))
	51
	52
	53
	54	def reduce(self):
	55	def reduce2(node):
	56	for key in node.children.keys():
	57	child = node.children[key]
	58	reduce2(child)
	59	if not (child.children or child.values):
	60	node.children.pop(key)
	61
	62	if len(node.values) == 1:
	63	node.values = []
	64
	65	reduce2(self.root)
	66
	67	def write(self):
	68	def write2(path, node):
	69	out_path = "/".join(path)
	70	if node.values:
	71	# Leaf
	72	with open("/".join(path), "w") as f:
	73	arr = [",".join([str(b) for b in n]) for n in node.values]
	74	f.write("\n".join(arr) + "\n")
	75	elif not os.path.isdir(out_path):
	76	os.mkdir(out_path)
	77
	78	for (key, child) in node.children.iteritems():
	79	path.append(key)
	80	write2(path, child)
	81	path.pop()
	82
	83
	84	write2([self.out_dir], self.root)
	85
	86
	87	def __str__(self):
	88	def str2(node, level):
	89	header = " " * level
	90	ret = ""
	91	if not node.children:
	92	return "%s%s\n" % (header, str(node.values) if node.values else "")
	93	for key,child in node.children.iteritems():
	94	ret += "%s/%s\n" % (header, key)
	95	ret += str2(child, level + 1)
	96	return ret
	97
	98	return "%s\n%s" % (self.out_dir, str2(self.root, 1))
	99
	100
	101
	102
	103
	104


diff --git a/parse/enum.py b/parse/enum.py new file mode 100644 index 0000000..bf35d01 --- /dev/null +++ b/parse/enum.py
@@ -0,0 +1,7 @@
	1	class Enum(frozenset):
	2	def __getattr__(self, name):
	3	if name in self:
	4	return name
	5	raise AttributeError
	6
	7


diff --git a/parse/ft.py b/parse/ft.py new file mode 100644 index 0000000..9837898 --- /dev/null +++ b/parse/ft.py
@@ -0,0 +1,60 @@
	1	import config.config as conf
	2	import os
	3	import re
	4	import shutil as sh
	5	import subprocess
	6
	7	from point import Measurement,Type
	8
	9	def get_ft_output(data_dir, out_dir):
	10	bin_file = conf.FILES['ft_data'] + "$"
	11	bins = [f for f in os.listdir(data_dir) if re.match(bin_file, f)]
	12
	13	FT_DATA_NAME = "scheduler=x-ft"
	14	output_file = "{}/out-ft".format(out_dir)
	15
	16	if os.path.isfile(output_file):
	17	print("ft-output already exists for %s" % data_dir)
	18	return output_file
	19
	20	if len(bins) != 0:
	21	err_file = open("%s/err-ft" % out_dir, 'w')
	22	# Need to make a copy of the original data file so scripts can change it
	23	sh.copyfile("{}/{}".format(data_dir, bins[0]),
	24	"{}/{}".format(out_dir, FT_DATA_NAME))
	25
	26	subprocess.call([conf.BINS['sort'], FT_DATA_NAME],
	27	cwd=out_dir, stderr=err_file, stdout=err_file)
	28	subprocess.call([conf.BINS['split'], FT_DATA_NAME],
	29	cwd=out_dir, stderr=err_file, stdout=err_file)
	30
	31	# Previous subprocesses just spit out all these intermediate files
	32	bins = [f for f in os.listdir(out_dir) if re.match(".overhead=.bin", f)]
	33	bins = [f for f in bins if os.stat("%s/%s"%(out_dir, f)).st_size]
	34
	35	# Analyze will summarize those
	36	cmd_arr = [conf.BINS['analyze']]
	37	cmd_arr.extend(bins)
	38	with open(output_file, "w") as f:
	39	subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file)
	40	else:
	41	return None
	42	return output_file
	43
	44	def get_ft_data(data_file, result, overheads):
	45	rstr = r",(?:\s+[^\s]+){3}.?([\d\.]+).?([\d\.]+),(?:\s+[^\s]+){3}.*?([\d\.]+)"
	46
	47	with open(data_file) as f:
	48	data = f.read()
	49
	50	for ovh in overheads:
	51	measure = Measurement("%s-%s" % (data_file, ovh))
	52	vals = re.findall(".*{}".format(ovh) + rstr, data);
	53	if len(vals) != 0:
	54	vals = vals[0]
	55	measure[Type.Max] = float(vals[0])
	56	measure[Type.Avg] = float(vals[1])
	57	measure[Type.Var] = float(vals[2])
	58	result[ovh] = measure
	59
	60	return result


diff --git a/parse/point.py b/parse/point.py new file mode 100644 index 0000000..4343d03 --- /dev/null +++ b/parse/point.py
@@ -0,0 +1,135 @@
	1	"""
	2	Too much duplicate code in this file
	3	"""
	4
	5	import copy
	6	import numpy as np
	7	from enum import Enum
	8	from collections import defaultdict
	9
	10	Type = Enum(['Min','Max','Avg','Var'])
	11	default_typemap = {Type.Max : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1},
	12	Type.Min : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1},
	13	Type.Avg : {Type.Max : 1, Type.Min : 0, Type.Avg : 1, Type.Var : 1}}
	14
	15	def make_typemap():
	16	return copy.deepcopy(default_typemap)
	17
	18	def dict_str(adict, sep = "\n"):
	19	return sep.join(["%s: %s" % (k, str(v)) for (k,v) in adict.iteritems()])
	20
	21	class Measurement(object):
	22	def __init__(self, id = None, kv = {}):
	23	self.id = id
	24	self.stats = {}
	25	for k, v in kv.iteritems():
	26	self[k] = v
	27
	28	def from_array(self,array):
	29	array = np.array(array)
	30	self[Type.Max] = array.max()
	31	self[Type.Avg] = array.mean()
	32	self[Type.Var] = array.var()
	33	return self
	34
	35	def __check_type(self, type):
	36	if not type in Type:
	37	raise AttributeError("Not a valid type '%s'" % type)
	38
	39	def __getitem__(self, type):
	40	self.__check_type(type)
	41	return self.stats[type]
	42
	43	def __iter__(self):
	44	return self.stats.iteritems()
	45
	46	def __contains__(self, type):
	47	self.__check_type(type)
	48	return type in self.stats
	49
	50	def __setitem__(self, type, value):
	51	self.__check_type(type)
	52	self.stats[type] = value
	53
	54	def __str__(self):
	55	return "<Measurement-%s> %s" % (self.id, dict_str(self.stats, " "))
	56
	57
	58	class Summary(Measurement):
	59	def __init__(self, id, measures, typemap = default_typemap):
	60	super(Summary, self).__init__("Summary-%s" % id)
	61
	62	self.__check_types(measures, typemap)
	63	self.__summarize(measures, typemap)
	64
	65	def __check_types(self, measures, typemap):
	66	required_types = self.__get_required(typemap)
	67	for m in measures:
	68	for type in required_types:
	69	if type not in m:
	70	raise ValueError("measurement '%s' missing type '%s'" %
	71	(self.id, type))
	72
	73	def __summarize(self, measures, typemap):
	74	for sum_type in Type:
	75	self[sum_type] = Measurement(self.id)
	76
	77	def avg(vals):
	78	return sum(vals) / len(vals)
	79
	80	for base_type in Type:
	81	for sum_type, func in (Type.Min,min),(Type.Max,max),(Type.Avg, avg):
	82	if typemap[sum_type][base_type]:
	83	val = func([m[base_type] for m in measures])
	84	self[sum_type][base_type] = val
	85
	86	def __get_required(self, typemap):
	87	required = []
	88	for base_type in Type:
	89	matches = [t[base_type] for t in typemap.itervalues()]
	90	if bool(sum(matches)):
	91	required += [base_type]
	92	return required
	93
	94	class ExpPoint(object):
	95	def __init__(self, id = "", init = {}):
	96	self.stats = {}
	97	for type, value in init.iteritems():
	98	self[type] = value
	99	self.id = id
	100
	101	def __check_val(self, obj):
	102	if not isinstance(obj, Measurement):
	103	raise AttributeError("Not a valid measurement '%s'" % obj)
	104
	105	def __getitem__(self, type):
	106	return self.stats[type]
	107
	108	def __iter__(self):
	109	return self.stats.iteritems()
	110
	111	def __contains__(self, type):
	112	return type in self.stats
	113
	114	def __setitem__(self, type, value):
	115	self.__check_val(value)
	116	self.stats[type] = value
	117
	118	def __str__(self):
	119	return "<ExpPoint-%s>\n%s" % (self.id, dict_str(self.stats))
	120
	121	def get_stats(self):
	122	return self.stats.keys()
	123
	124	class SummaryPoint(ExpPoint):
	125	def __init__(self, id, points, typemap = default_typemap):
	126	super(SummaryPoint,self).__init__("Summary-%s" % id)
	127
	128	grouped = defaultdict(lambda : [])
	129
	130	for exp in points:
	131	for name,measure in exp.stats.iteritems():
	132	grouped[name] += [measure]
	133
	134	for key in grouped.iterkeys():
	135	self[key] = Summary(key, grouped[key], typemap)


diff --git a/parse/sched.py b/parse/sched.py new file mode 100644 index 0000000..ec4d917 --- /dev/null +++ b/parse/sched.py
@@ -0,0 +1,89 @@
	1	import config.config as conf
	2	import os
	3	import re
	4	import numpy as np
	5	import subprocess
	6
	7	from collections import namedtuple
	8	from point import Measurement
	9
	10	Task = namedtuple('Task', ['pid', 'period'])
	11
	12	def get_st_output(data_dir, out_dir):
	13	bin_files = conf.FILES['sched_data'].format(".*")
	14	bins = [f for f in os.listdir(data_dir) if re.match(bin_files, f)]
	15
	16	output_file = "%s/out-st" % out_dir
	17
	18	if os.path.isfile(output_file):
	19	return output_file
	20
	21	if len(bins) != 0:
	22	cmd_arr = [conf.BINS['st_show']]
	23	cmd_arr.extend(bins)
	24	with open(output_file, "w") as f:
	25	subprocess.call(cmd_arr, cwd=data_dir, stdout=f)
	26	else:
	27	return None
	28	return output_file
	29
	30	def get_tasks(data):
	31	reg = r"PARAM.?(\d+).?cost:\s+[\d\.]+ms.?period.?([\d.]+)"
	32	return [Task(x[0], x[1]) for x in re.findall(reg, data)]
	33
	34	def extract_tardy_vals(data, exp_point):
	35	ratios = []
	36	tards = []
	37
	38	for t in get_tasks(data):
	39	reg = r"TARDY.?" + t.pid + "/(\d+).?Tot.?([\d.]+).?ms.([\d.]+).?ms.*?([\d.]+)"
	40	matches = re.findall(reg, data)
	41	if len(matches) != 0:
	42	jobs = float(matches[0][0])
	43	total_tard = float(matches[0][1])
	44	# max_tard = float(matches[0][2])
	45	misses = float(matches[0][3])
	46	rel_tard = (total_tard / jobs) / float(t.period)
	47	if misses != 0:
	48	miss_ratio = (misses / jobs)
	49	else:
	50	miss_ratio = 0
	51
	52	ratios.append(miss_ratio)
	53	tards.append(rel_tard)
	54
	55	for (array, name) in ((tards, "rel-tard"), (ratios, "miss-ratio")):
	56	exp_point[name] = Measurement().from_array(array)
	57
	58	def extract_variance(data, exp_point):
	59	varz = []
	60	for t in get_tasks(data):
	61	reg = r"COMPLETION.?" + t.pid + r".?([\d\.]+)ms"
	62	matches = re.findall(reg, data)
	63
	64	if len(matches) == 0:
	65	return 0
	66
	67	job_times = np.array(filter(lambda x: float(x) != 0, matches), dtype=np.float)
	68
	69	# Coefficient of variation
	70	cv = job_times.std() / job_times.mean()
	71	# Correction, assuming normal distributions
	72	corrected = (1 + 1/(4 * len(job_times))) * cv
	73
	74	varz.append(corrected)
	75
	76	exp_point['var'] = Measurement().from_array(varz)
	77
	78	def get_sched_data(data_file, result):
	79	with open(data_file, 'r') as f:
	80	data = f.read()
	81
	82	# if conf != BASE:
	83	# (our_values, their_values) = extract_exec_vals(our_data, their_data)
	84	# conf_result = get_stats(our_values, their_values)
	85	# for key in conf_result.keys():
	86	# result[key][conf] = conf_result[key]
	87
	88	extract_tardy_vals(data, result)
	89	extract_variance(data, result)


diff --git a/parse/tuple_table.py b/parse/tuple_table.py new file mode 100644 index 0000000..df80b37 --- /dev/null +++ b/parse/tuple_table.py
@@ -0,0 +1,76 @@
	1	from collections import defaultdict
	2	from point import SummaryPoint
	3	from dir_map import DirMap
	4
	5	class ColMap(object):
	6	def __init__(self):
	7	self.rev_map = {}
	8	self.col_list = []
	9
	10	def columns(self):
	11	return self.col_list
	12
	13	def get_key(self, kv):
	14	key = ()
	15	added = 0
	16
	17	for col in self.col_list:
	18	if col not in kv:
	19	key += (None,)
	20	else:
	21	added += 1
	22	key += (kv[col],)
	23
	24	if added != len(kv):
	25	raise Exception("column map '%s' missed field in map\n%s" %
	26	(self.col_list, kv))
	27
	28	return key
	29
	30	def get_map(self, tuple):
	31	map = {}
	32	for i in range(0, len(tuple)):
	33	map[self.col_list[i]] = tuple[i]
	34	return map
	35
	36	def try_add(self, column):
	37	if column not in self.rev_map:
	38	self.rev_map[column] = len(self.col_list)
	39	self.col_list += [column]
	40
	41	def __str__(self):
	42	return "<ColMap>%s" % (self.rev_map)
	43
	44	class TupleTable(object):
	45	def __init__(self, col_map):
	46	self.col_map = col_map
	47	self.table = defaultdict(lambda: [])
	48	self.reduced = False
	49
	50	def add_exp(self, kv, point):
	51	key = self.col_map.get_key(kv)
	52	self.table[key] += [point]
	53
	54	def __reduce(self):
	55	if self.reduced:
	56	raise Exception("cannot reduce twice!")
	57	self.reduced = True
	58	for key, values in self.table.iteritems():
	59	self.table[key] = SummaryPoint(key, values)
	60
	61	def write_result(self, out_dir):
	62	dir_map = DirMap(out_dir)
	63	self.__reduce()
	64	for key, point in self.table.iteritems():
	65	kv = self.col_map.get_map(key)
	66
	67	for col in self.col_map.columns():
	68	val = kv[col]
	69	kv.pop(col)
	70
	71	dir_map.add_point(col, val, kv, point)
	72
	73	kv[col] = val
	74
	75	dir_map.reduce()
	76	dir_map.write()