From 2844e3887b4ff635dfa85e9b2ec773b06fe9af4f Mon Sep 17 00:00:00 2001
From: Jonathan Herman <hermanjl@cs.unc.edu>
Date: Fri, 28 Sep 2012 16:55:30 -0400
Subject: Added option to parse scaling factors against a base task set.

---
 experiment/experiment.py |   2 +-
 parse/ft.py              |   2 +-
 parse/sched.py           | 123 ++++++++++++++++++++++++++++++++++++++---------
 parse/tuple_table.py     |   8 ++-
 parse_exps.py            |  67 +++++++++++++++++++++-----
 run_exps.py              |   2 +-
 6 files changed, 165 insertions(+), 39 deletions(-)

diff --git a/experiment/experiment.py b/experiment/experiment.py
index 5ed6480..a95ca42 100644
--- a/experiment/experiment.py
+++ b/experiment/experiment.py
@@ -161,7 +161,7 @@ class Experiment(object):
 
         self.log("Starting %d tracers" % len(self.tracers))
         map(methodcaller('start_tracing'), self.tracers)
-        time.sleep(2)
+        time.sleep(4)
 
     def teardown(self):
         sleep_time = 5
diff --git a/parse/ft.py b/parse/ft.py
index 9837898..868c8ca 100644
--- a/parse/ft.py
+++ b/parse/ft.py
@@ -41,7 +41,7 @@ def get_ft_output(data_dir, out_dir):
         return None
     return output_file
 
-def get_ft_data(data_file, result, overheads):
+def extract_ft_data(data_file, result, overheads):
     rstr = r",(?:\s+[^\s]+){3}.*?([\d\.]+).*?([\d\.]+),(?:\s+[^\s]+){3}.*?([\d\.]+)"
 
     with open(data_file) as f:
diff --git a/parse/sched.py b/parse/sched.py
index a84aece..94ab000 100644
--- a/parse/sched.py
+++ b/parse/sched.py
@@ -1,5 +1,6 @@
 """
 TODO: make regexes indexable by name
+
 """
 
 import config.config as conf
@@ -8,10 +9,11 @@ import re
 import numpy as np
 import subprocess
 
-from collections import namedtuple
-from point import Measurement
+from collections import namedtuple,defaultdict
+from point import Measurement,Type
 
-Task = namedtuple('Task', ['pid', 'period'])
+TaskConfig = namedtuple('TaskConfig', ['cpu','wcet','period'])
+Task = namedtuple('Task', ['pid', 'config'])
 
 def get_st_output(data_dir, out_dir):
     bin_files = conf.FILES['sched_data'].format(".*")
@@ -32,32 +34,54 @@ def get_st_output(data_dir, out_dir):
     return output_file
 
 def get_tasks(data):
-    reg = r"PARAM.*?(\d+).*?cost:\s+[\d\.]+ms.*?period.*?([\d.]+)"
-    return [Task(x[0], x[1]) for x in re.findall(reg, data)]
+    reg = r"PARAM *?(\d+)\/.*?cost:\s+([\d\.]+)ms.*?period.*?([\d.]+)ms.*?part.*?(\d+)"
+    ret = []
+    for match in re.findall(reg, data):
+        t = Task(match[0], TaskConfig(match[3],match[1],match[2]))
+        ret += [t]
+    return ret
+
+def get_task_exits(data):
+    reg = r"TASK_EXIT *?(\d+)/.*?Avg.*?(\d+).*?Max.*?(\d+)"
+    ret = []
+    for match in re.findall(reg, data):
+        m = Measurement(match[0], {Type.Max : match[2], Type.Avg : match[1]})
+        ret += [m]
+    return ret
+        
 
 def extract_tardy_vals(data, exp_point):
-    ratios = []
-    tards = []
+    ratios    = []
+    avg_tards = []
+    max_tards = []
 
     for t in get_tasks(data):
-        reg = r"TARDY.*?" + t.pid + "/(\d+).*?Tot.*?([\d.]+).*?ms.*([\d.]+).*?ms.*?([\d.]+)"
+        reg = r"TARDY.*?" + t.pid + "/(\d+).*?Tot.*?([\d\.]+).*?ms.*([\d\.]+).*?ms.*?([\d\.]+)"
         matches = re.findall(reg, data)
         if len(matches) != 0:
             jobs = float(matches[0][0])
+
             total_tard = float(matches[0][1])
-            # max_tard = float(matches[0][2])
+            print("total tard: %s" % total_tard)
+            avg_tard = (total_tard / jobs) / float(t.config.period)
+            max_tard = float(matches[0][2]) / float(t.config.period)
+
+            print("avg tard: %s" % avg_tard)
+
             misses = float(matches[0][3])
-            rel_tard = (total_tard / jobs) / float(t.period)
             if misses != 0:
                 miss_ratio = (misses / jobs)
+                print("misses is %d, jobs is %d" % (misses, jobs))
             else:
                 miss_ratio = 0
 
-            ratios.append(miss_ratio)
-            tards.append(rel_tard)
+            ratios    += [miss_ratio]
+            avg_tards += [avg_tard]
+            max_tards += [max_tard]
 
-    for (array, name) in ((tards, "rel-tard"), (ratios, "miss-ratio")):
-        exp_point[name] = Measurement().from_array(array)
+    exp_point["avg-rel-tard"] = Measurement().from_array(avg_tards)
+    exp_point["max-rel-tard"] = Measurement().from_array(max_tards)
+    exp_point["miss-ratio"] = Measurement().from_array(ratios)
 
 def extract_variance(data, exp_point):
     varz = []
@@ -77,17 +101,70 @@ def extract_variance(data, exp_point):
 
         varz.append(corrected)
 
-    exp_point['var'] = Measurement().from_array(varz)
+    exp_point['exec-var'] = Measurement().from_array(varz)
 
-def get_sched_data(data_file, result):
+def extract_sched_data(data_file, result):
     with open(data_file, 'r') as f:
         data = f.read()
 
-        # if conf != BASE:
-        #     (our_values, their_values) = extract_exec_vals(our_data, their_data)
-        #     conf_result = get_stats(our_values, their_values)
-        #     for key in conf_result.keys():
-        #         result[key][conf] = conf_result[key]
+    extract_tardy_vals(data, result)
+    extract_variance(data, result)
 
-        extract_tardy_vals(data, result)
-        extract_variance(data, result)
+def config_exit_stats(file):
+    with open(file, 'r') as f:
+        data = f.read()
+        
+    tasks = get_tasks(data)
+
+    # Dictionary of task exit measurements by pid
+    exits = get_task_exits(data)
+    exit_dict = dict((e.id, e) for e in exits)
+
+    # Dictionary where keys are configurations, values are list
+    # of tasks with those configuratino
+    config_dict = defaultdict(lambda: [])
+    for t in tasks:
+        config_dict[t.config] += [t]
+
+    for config in config_dict:
+        task_list = sorted(config_dict[config])
+
+        # Replace tasks with corresponding exit stats
+        exit_list = [exit_dict[t.pid] for t in task_list]
+        config_dict[config] = exit_list        
+
+    return config_dict
+
+saved_stats = {}
+def get_base_stats(base_file):
+    if base_file in saved_stats:
+        return saved_stats[base_file]
+    result = config_exit_stats(base_file)
+    saved_stats[base_file] = result
+    return result
+
+def extract_scaling_data(data_file, base_file, result):
+    # Generate trees of tasks with matching configurations
+    data_stats = config_exit_stats(data_file)
+    base_stats = get_base_stats(base_file)
+
+    # Scaling factors are calculated by matching groups of tasks with the same
+    # config, then comparing task-to-task exec times in order of PID within
+    # each group
+    max_scales = []
+    avg_scales = []
+    for config in data_stats:
+        if len(data_stats[config]) != len(base_stats[config]):
+            # Quit, we are missing a record and can't guarantee
+            # a task-to-task comparison
+            continue
+        for data_stat, base_stat in zip(data_stats[config],base_stats[config]):
+            # How much larger is their exec stat than ours?
+            avg_scale = float(base_stat[Type.Avg]) / float(base_stat[Type.Avg])
+            max_scale = float(base_stat[Type.Max]) / float(base_stat[Type.Max])
+
+            avg_scales += [avg_scale]
+            max_scales += [max_scale]
+
+    result['max-scale'] = Measurement().from_array(max_scales)
+    result['avg-scale'] = Measurement().from_array(avg_scales)
diff --git a/parse/tuple_table.py b/parse/tuple_table.py
index 0cf6bec..b56fa6c 100644
--- a/parse/tuple_table.py
+++ b/parse/tuple_table.py
@@ -21,8 +21,8 @@ class ColMap(object):
                 added += 1
                 key += (kv[col],)
 
-        if added != len(kv):
-            raise Exception("column map '%s' missed field in map\n%s" %
+        if added < len(kv):
+            raise Exception("column map '%s' missed field in map '%s'" %
                             (self.col_list, kv))
                
         return key
@@ -51,6 +51,10 @@ class TupleTable(object):
         key = self.col_map.get_key(kv)
         self.table[key] += [point]
 
+    def get_exps(self, kv):
+        key = self.col_map.get_key(kv)
+        return self.table[key]
+
     def __reduce(self):
         if self.reduced:
             raise Exception("cannot reduce twice!")
diff --git a/parse_exps.py b/parse_exps.py
index ecb1cac..3a1d1b9 100755
--- a/parse_exps.py
+++ b/parse_exps.py
@@ -2,9 +2,11 @@
 from __future__ import print_function
 
 import config.config as conf
+import copy
 import os
 import parse.ft as ft
 import parse.sched as st
+import re
 
 from collections import namedtuple
 from common import load_params
@@ -17,6 +19,9 @@ def parse_args():
 
     parser.add_option('-o', '--out-dir', dest='out_dir',
                       help='directory for data output', default=os.getcwd())
+    parser.add_option('-s', '--scale-against', dest='scale_against',
+                      metavar='PARAM=VALUE', default="",
+                      help='calculate task scaling factors against these configs')
 
     return parser.parse_args()
 
@@ -41,8 +46,10 @@ def get_exp_params(data_dir, col_map):
     return params
 
 
-def gen_exp_data(exp_dirs, col_map):
-    exps = []
+def gen_exp_data(exp_dirs, base_conf, col_map):
+    plain_exps = []
+    scaling_bases  = []
+    
     for data_dir in exp_dirs:
         if not os.path.isdir(data_dir):
             raise IOError("Invalid experiment '%s'" % os.path.abspath(data_dir))
@@ -51,34 +58,72 @@ def gen_exp_data(exp_dirs, col_map):
         if not os.path.exists(tmp_dir):
             os.mkdir(tmp_dir)
 
+        # Read and translate exp output files
         params = get_exp_params(data_dir, col_map)
         st_output = st.get_st_output(data_dir, tmp_dir)
         ft_output = ft.get_ft_output(data_dir, tmp_dir)
 
+        # Create experiment named after the data dir
         exp_data = ExpData(data_dir, params, DataFiles(ft_output, st_output))
-        exps += [exp_data]
 
-    return exps
+        if base_conf and base_conf.viewitems() & params.viewitems():
+            if not st_output:
+                raise Exception("Scaling base '%s' useless without sched data!"
+                                % data_dir)
+            params.pop(base_conf.keys()[0])
+            scaling_bases += [exp_data]
+        else:
+            plain_exps += [exp_data]
+
+    return (plain_exps, scaling_bases)
 
 def main():
     opts, args = parse_args()
 
     args = args or [os.getcwd()]
+
+    # Configuration key for task systems used to calculate task
+    # execution scaling factors
+    base_conf = dict(re.findall("(.*)=(.*)", opts.scale_against))
+    
     col_map = ColMap()
-    exps = gen_exp_data(args, col_map)
 
-    table = TupleTable(col_map)
+    (plain_exps, scaling_bases) = gen_exp_data(args, base_conf, col_map)
+
+    base_table   = TupleTable(col_map)
+    result_table = TupleTable(col_map)
 
-    for exp in exps:
+    # Used to find matching scaling_base for each experiment
+    for base in scaling_bases:
+        base_table.add_exp(base.params, base)
+
+    for exp in plain_exps:
         result = ExpPoint(exp.name)
+        
         if exp.data_files.ft:
-            ft.get_ft_data(exp.data_files.ft, result, conf.BASE_EVENTS)
+            # Write overheads into result
+            ft.extract_ft_data(exp.data_files.ft, result, conf.BASE_EVENTS)
+
         if exp.data_files.st:
-            st.get_sched_data(exp.data_files.st, result)
+            if base_conf:
+                # Try to find a scaling base
+                base_params = copy.deepcopy(exp.params)
+                base_params.pop(base_conf.keys()[0])
+                base = base_table.get_exps(base_params)[0]
+            if base:
+                # Write scaling factor (vs base) into result
+                st.extract_scaling_data(exp.data_files.st,
+                                        base.data_files.st,
+                                        result)
+            # Write deadline misses / tardiness into result
+            st.extract_sched_data(exp.data_files.st, result)
+
+        result_table.add_exp(exp.params, result)
+
+        print(result)
 
-        table.add_exp(exp.params, result)
 
-    table.write_result(opts.out_dir)
+    result_table.write_result(opts.out_dir)
     
 if __name__ == '__main__':
     main()
diff --git a/run_exps.py b/run_exps.py
index bda0e40..4484952 100755
--- a/run_exps.py
+++ b/run_exps.py
@@ -218,7 +218,7 @@ def main():
     print("  Successful:\t\t%d" % succ)
     print("  Failed:\t\t%d" % failed)
     print("  Already Done:\t\t%d" % done)
-    print("  Invalid Kernel:\t\t%d" % invalid)
+    print("  Wrong Kernel:\t\t%d" % invalid)
 
 
 if __name__ == '__main__':
-- 
cgit v1.2.2