From 1abea5f67c2c70053af0a59db715a210df2e0bef Mon Sep 17 00:00:00 2001
From: Jonathan Herman <hermanjl@cs.unc.edu>
Date: Tue, 20 Nov 2012 14:38:25 -0500
Subject: Removed dependency on overhead-analysis repo.

---
 experiment/experiment.py  |   3 +-
 experiment/litmus_util.py |  13 +++++
 parse/ft.py               | 132 ++++++++++++++++++++++++++++++++++------------
 parse_exps.py             |   9 +++-
 run_exps.py               |   6 ++-
 5 files changed, 125 insertions(+), 38 deletions(-)

diff --git a/experiment/experiment.py b/experiment/experiment.py
index 8c88799..deb4ff2 100644
--- a/experiment/experiment.py
+++ b/experiment/experiment.py
@@ -178,7 +178,8 @@ class Experiment(object):
         self.log("Writing %d proc entries" % len(self.proc_entries))
         map(methodcaller('write_proc'), self.proc_entries)
 
-        time.sleep(5)
+        if len(self.proc_entries):
+            time.sleep(2)
 
         self.log("Switching to %s" % self.scheduler)
         litmus_util.switch_scheduler(self.scheduler)
diff --git a/experiment/litmus_util.py b/experiment/litmus_util.py
index cde0bca..42d3e5f 100644
--- a/experiment/litmus_util.py
+++ b/experiment/litmus_util.py
@@ -17,6 +17,19 @@ def num_cpus():
                 cpus += 1
     return cpus
 
+def cpu_freq():
+    """
+    The frequency (in MHz) of the CPU.
+    """
+    reg = re.compile(r'^cpu MHz\s*:\s*(\d+)', re.M)
+    with open('/proc/cpuinfo', 'r') as f:
+        data = f.read()
+
+    match = re.search(reg, data)
+    if not match:
+        raise Exception("Cannot parse CPU frequency!")
+    return int(match.group(1))
+
 def switch_scheduler(switch_to_in):
     """Switch the scheduler to whatever is passed in.
 
diff --git a/parse/ft.py b/parse/ft.py
index 4e310b0..cbf75f2 100644
--- a/parse/ft.py
+++ b/parse/ft.py
@@ -1,4 +1,5 @@
 import config.config as conf
+import numpy as np
 import os
 import re
 import shutil as sh
@@ -6,14 +7,17 @@ import subprocess
 
 from point import Measurement,Type
 
-def get_ft_output(data_dir, out_dir, force=False):
+SPLIT_DATA_NAME = "overhead={}.bin"
+FT_DATA_NAME    = "sorted-ft.bin"
+FIELDS = ["Overhead", "samples", "max", "avg", "min", "med", "std", "var"]
+
+def get_ft_output(data_dir, cycles, out_dir, force=False):
     """
-    Create and return files containing sorted and analyzed overhead data
+    Create and return file containing analyzed overhead data
     """
-    bin_file = conf.FILES['ft_data'] + "$"
-    bins = [f for f in os.listdir(data_dir) if re.match(bin_file, f)]
+    freg = conf.FILES['ft_data'] + "$"
+    bins = [f for f in os.listdir(data_dir) if re.match(freg, f)]
 
-    FT_DATA_NAME = "scheduler=x-ft"
     output_file  = "{}/out-ft".format(out_dir)
 
     if os.path.isfile(output_file):
@@ -23,44 +27,106 @@ def get_ft_output(data_dir, out_dir, force=False):
             return output_file
 
     if len(bins) != 0:
+        bin_file = "{}/{}".format(data_dir, bins[0])
         err_file = open("%s/err-ft" % out_dir, 'w')
-        # Need to make a copy of the original data file so scripts can change it
-        sh.copyfile("{}/{}".format(data_dir, bins[0]),
-                    "{}/{}".format(out_dir, FT_DATA_NAME))
-
-        subprocess.call([conf.BINS['sort'], FT_DATA_NAME],
-                        cwd=out_dir, stderr=err_file, stdout=err_file)
-        subprocess.call([conf.BINS['split'], FT_DATA_NAME],
-                        cwd=out_dir, stderr=err_file, stdout=err_file)
-
-        # Previous subprocesses just spit out all these intermediate files
-        bins = [f for f in os.listdir(out_dir) if re.match(".*overhead=.*bin", f)]
-        bins = [f for f in bins if os.stat("%s/%s"%(out_dir, f)).st_size]
-
-        # Analyze will summarize those
-        # todo pass in f
-        cmd_arr = [conf.BINS['analyze']]
-        cmd_arr.extend(bins)
-        with open(output_file, "w") as f:
-            subprocess.call(cmd_arr, cwd=out_dir, stdout=f, stderr=err_file)
+
+        sorted_bin = sort_ft(bin_file, err_file, out_dir)
+        make_data_file(sorted_bin, cycles, output_file, err_file, out_dir)
+
+        os.remove(sorted_bin)
+
+        return output_file
     else:
         return None
     return output_file
 
-def extract_ft_data(data_file, result, overheads):
-    rstr = r",(?:\s+[^\s]+){3}.*?([\d\.]+).*?([\d\.]+),(?:\s+[^\s]+){3}.*?([\d\.]+)"
+def fmt_cell(x):
+    if type(x) == str:
+        return "%15s" % x
+    if type(x) == int:
+        return "%15d" % x
+    else:
+        return "%15.3f" % x
+
+def make_data_file(sorted_bin, cycles, out_fname, err_file, out_dir):
+    """
+    Create file containing all overhead information.
+    """
+    base_name = "{}/{}".format(out_dir, SPLIT_DATA_NAME)
+
+    with open(out_fname, "w") as f:
+        f.write("#%s" % ", ".join(fmt_cell(x) for x in FIELDS))
+        f.write("\n")
+
+        for event in conf.BASE_EVENTS:
+                ovh_fname = base_name.format(event.replace("_", "-"))
+
+                if os.path.exists(ovh_fname):
+                    os.remove(ovh_fname)
+                ovh_file = open(ovh_fname, 'w')
+
+                # Extract matching overhead events into a seperate file
+                cmd = [conf.BINS["split"], "-r", "-b", event, sorted_bin]
+                ret = subprocess.call(cmd, cwd=out_dir,
+                                      stderr=err_file, stdout=ovh_file)
+                size = os.stat(ovh_fname).st_size
+
+                if ret:
+                    err_file.write("Failed with command: %s" % " ".join(cmd))
+                if not size:
+                    os.remove(ovh_fname)
+                if not size or ret:
+                    continue
+
+                # Map and sort file for stats
+                data = np.memmap(ovh_fname, dtype="float32", mode='c')
+                data /= float(cycles) # Scale for processor speed
+                data.sort()
 
+                stats = [event, len(data), data[-1], np.mean(data), data[0],
+                         np.median(data), np.std(data, ddof=1), np.var(data)]
+                f.write(", ".join([fmt_cell(x) for x in stats]))
+                f.write("\n")
+
+                os.remove(ovh_fname)
+
+def sort_ft(ft_file, err_file, out_dir):
+    """
+    Create and return file with sorted overheads from @ft_file.
+    """
+    out_fname = "{}/{}".format(out_dir, FT_DATA_NAME)
+
+    # Sort happens in-place
+    sh.copyfile(ft_file, out_fname)
+    cmd = [conf.BINS['ftsort'], out_fname]
+    ret = subprocess.call(cmd, cwd=out_dir, stderr=err_file, stdout=err_file)
+
+    if ret:
+        raise Exception("Sort failed with command: %s" % " ".join(cmd))
+
+    return out_fname
+
+def extract_ft_data(data_file, result, overheads):
+    """
+    Return exp point with overhead measurements from data_file
+    """
     with open(data_file) as f:
         data = f.read()
 
     for ovh in overheads:
+        regex = r"({}[^\n]*)".format(ovh)
+        line = re.search(regex, data)
+
+        if not line:
+            continue
+
+        vals = re.split(r"[,\s]+", line.groups(1)[0])
+
         measure = Measurement("%s-%s" % (data_file, ovh))
-        vals = re.findall(r"\s+{}".format(ovh.replace('_','-')) + rstr, data);
-        if len(vals) != 0:
-            vals = vals[0]
-            measure[Type.Max] = float(vals[0])
-            measure[Type.Avg] = float(vals[1])
-            measure[Type.Var] = float(vals[2])
-            result[ovh] = measure
+        measure[Type.Max] = float(vals[FIELDS.index("max")])
+        measure[Type.Avg] = float(vals[FIELDS.index("avg")])
+        measure[Type.Var] = float(vals[FIELDS.index("var")])
+
+        result[ovh] = measure
 
     return result
diff --git a/parse_exps.py b/parse_exps.py
index 87d0783..24bdb85 100755
--- a/parse_exps.py
+++ b/parse_exps.py
@@ -52,13 +52,17 @@ def get_exp_params(data_dir, col_map):
     # Keep only params that uniquely identify the experiment
     params = load_params(param_file)
     for ignored in conf.PARAMS.itervalues():
-        if ignored in params:
+        # Always include cycles or overhead parsing fails
+        if ignored in params and ignored != conf.PARAMS['cycles']:
             params.pop(ignored)
 
     # Track all changed params
     for key, value in params.iteritems():
         col_map.try_add(key, value)
 
+    if conf.PARAMS['cycles'] not in params:
+        params[conf.PARAMS['cycles']] = conf.DEFAULTS['cycles']
+
     return params
 
 
@@ -78,8 +82,9 @@ def gen_exp_data(exp_dirs, base_conf, col_map, force):
 
         # Read and translate exp output files
         params = get_exp_params(data_dir, col_map)
+        cycles = int(params[conf.PARAMS['cycles']])
         st_output = st.get_st_output(data_dir, tmp_dir, force)
-        ft_output = ft.get_ft_output(data_dir, tmp_dir, force)
+        ft_output = ft.get_ft_output(data_dir, cycles, tmp_dir, force)
 
 
         if base_conf and base_conf.viewitems() & params.viewitems():
diff --git a/run_exps.py b/run_exps.py
index 8812bc6..3efb09d 100755
--- a/run_exps.py
+++ b/run_exps.py
@@ -114,9 +114,11 @@ def load_experiment(sched_file, scheduler, duration, param_file, out_dir):
     run_exp(sched_file, schedule, scheduler, kernel, duration, work_dir, out_dir)
 
     # Save parameters used to run experiment in out_dir
+    # Cycles is saved here for accurate overhead calculations later
     out_params = dict(params.items() +
-                      [(conf.PARAMS['sched'], scheduler),
-                       (conf.PARAMS['dur'],   duration)])
+                      [(conf.PARAMS['sched'],  scheduler),
+                       (conf.PARAMS['dur'],    duration),
+                       (conf.PARAMS['cycles'], lu.cpu_freq())])
     with open("%s/%s" % (out_dir, conf.DEFAULTS['params_file']), 'w') as f:
         f.write(str(out_params))
 
-- 
cgit v1.2.2