Added script to parse directory data, create CSVs for every chagned value.

This change also makes SchedTrace and OverheadTrace events configurable.
author: Jonathan Herman <hermanjl@cs.unc.edu> 2012-09-27 19:03:22 -0400
committer: Jonathan Herman <hermanjl@cs.unc.edu> 2012-09-27 19:03:22 -0400
commit: 7c09ec981c6e06af2e62d67a609eb53728267954 (patch)
tree: 76a93db7cadc452ac70eabbd52fdd87ed5fd54c4 /parse/sched.py
parent: 5554e053e9f3d5f7987d3f1d889802b211af8eab (diff)
1 files changed, 89 insertions, 0 deletions
diff --git a/parse/sched.py b/parse/sched.py
new file mode 100644
index 0000000..ec4d917
--- /dev/null
+++ b/parse/sched.py
@@ -0,0 +1,89 @@
+import config.config as conf
+import os
+import re
+import numpy as np
+import subprocess
+from collections import namedtuple
+from point import Measurement
+Task = namedtuple('Task', ['pid', 'period'])
+def get_st_output(data_dir, out_dir):
+    bin_files = conf.FILES['sched_data'].format(".*")
+    bins = [f for f in os.listdir(data_dir) if re.match(bin_files, f)]
+    output_file = "%s/out-st" % out_dir
+    if os.path.isfile(output_file):
+        return output_file
+    if len(bins) != 0:
+        cmd_arr = [conf.BINS['st_show']]
+        cmd_arr.extend(bins)
+        with open(output_file, "w") as f:
+            subprocess.call(cmd_arr, cwd=data_dir, stdout=f)
+    else:
+        return None
+    return output_file
+def get_tasks(data):
+    reg = r"PARAM.*?(\d+).*?cost:\s+[\d\.]+ms.*?period.*?([\d.]+)"
+    return [Task(x[0], x[1]) for x in re.findall(reg, data)]
+def extract_tardy_vals(data, exp_point):
+    ratios = []
+    tards = []
+    for t in get_tasks(data):
+        reg = r"TARDY.*?" + t.pid + "/(\d+).*?Tot.*?([\d.]+).*?ms.*([\d.]+).*?ms.*?([\d.]+)"
+        matches = re.findall(reg, data)
+        if len(matches) != 0:
+            jobs = float(matches[0][0])
+            total_tard = float(matches[0][1])
+            # max_tard = float(matches[0][2])
+            misses = float(matches[0][3])
+            rel_tard = (total_tard / jobs) / float(t.period)
+            if misses != 0:
+                miss_ratio = (misses / jobs)
+            else:
+                miss_ratio = 0
+            ratios.append(miss_ratio)
+            tards.append(rel_tard)
+    for (array, name) in ((tards, "rel-tard"), (ratios, "miss-ratio")):
+        exp_point[name] = Measurement().from_array(array)
+def extract_variance(data, exp_point):
+    varz = []
+    for t in get_tasks(data):
+        reg = r"COMPLETION.*?" + t.pid + r".*?([\d\.]+)ms"
+        matches = re.findall(reg, data)
+        if len(matches) == 0:
+            return 0
+        job_times = np.array(filter(lambda x: float(x) != 0, matches), dtype=np.float)
+        # Coefficient of variation
+        cv = job_times.std() / job_times.mean()
+        # Correction, assuming normal distributions
+        corrected = (1 + 1/(4 * len(job_times))) * cv
+        varz.append(corrected)
+    exp_point['var'] = Measurement().from_array(varz)
+def get_sched_data(data_file, result):
+    with open(data_file, 'r') as f:
+        data = f.read()
+        # if conf != BASE:
+        #     (our_values, their_values) = extract_exec_vals(our_data, their_data)
+        #     conf_result = get_stats(our_values, their_values)
+        #     for key in conf_result.keys():
+        #         result[key][conf] = conf_result[key]
+        extract_tardy_vals(data, result)
+        extract_variance(data, result)
author	Jonathan Herman <hermanjl@cs.unc.edu>	2012-09-27 19:03:22 -0400
committer	Jonathan Herman <hermanjl@cs.unc.edu>	2012-09-27 19:03:22 -0400
commit	7c09ec981c6e06af2e62d67a609eb53728267954 (patch)
tree	76a93db7cadc452ac70eabbd52fdd87ed5fd54c4 /parse/sched.py
parent	5554e053e9f3d5f7987d3f1d889802b211af8eab (diff)

diff --git a/parse/sched.py b/parse/sched.py new file mode 100644 index 0000000..ec4d917 --- /dev/null +++ b/parse/sched.py
@@ -0,0 +1,89 @@
	1	import config.config as conf
	2	import os
	3	import re
	4	import numpy as np
	5	import subprocess
	6
	7	from collections import namedtuple
	8	from point import Measurement
	9
	10	Task = namedtuple('Task', ['pid', 'period'])
	11
	12	def get_st_output(data_dir, out_dir):
	13	bin_files = conf.FILES['sched_data'].format(".*")
	14	bins = [f for f in os.listdir(data_dir) if re.match(bin_files, f)]
	15
	16	output_file = "%s/out-st" % out_dir
	17
	18	if os.path.isfile(output_file):
	19	return output_file
	20
	21	if len(bins) != 0:
	22	cmd_arr = [conf.BINS['st_show']]
	23	cmd_arr.extend(bins)
	24	with open(output_file, "w") as f:
	25	subprocess.call(cmd_arr, cwd=data_dir, stdout=f)
	26	else:
	27	return None
	28	return output_file
	29
	30	def get_tasks(data):
	31	reg = r"PARAM.?(\d+).?cost:\s+[\d\.]+ms.?period.?([\d.]+)"
	32	return [Task(x[0], x[1]) for x in re.findall(reg, data)]
	33
	34	def extract_tardy_vals(data, exp_point):
	35	ratios = []
	36	tards = []
	37
	38	for t in get_tasks(data):
	39	reg = r"TARDY.?" + t.pid + "/(\d+).?Tot.?([\d.]+).?ms.([\d.]+).?ms.*?([\d.]+)"
	40	matches = re.findall(reg, data)
	41	if len(matches) != 0:
	42	jobs = float(matches[0][0])
	43	total_tard = float(matches[0][1])
	44	# max_tard = float(matches[0][2])
	45	misses = float(matches[0][3])
	46	rel_tard = (total_tard / jobs) / float(t.period)
	47	if misses != 0:
	48	miss_ratio = (misses / jobs)
	49	else:
	50	miss_ratio = 0
	51
	52	ratios.append(miss_ratio)
	53	tards.append(rel_tard)
	54
	55	for (array, name) in ((tards, "rel-tard"), (ratios, "miss-ratio")):
	56	exp_point[name] = Measurement().from_array(array)
	57
	58	def extract_variance(data, exp_point):
	59	varz = []
	60	for t in get_tasks(data):
	61	reg = r"COMPLETION.?" + t.pid + r".?([\d\.]+)ms"
	62	matches = re.findall(reg, data)
	63
	64	if len(matches) == 0:
	65	return 0
	66
	67	job_times = np.array(filter(lambda x: float(x) != 0, matches), dtype=np.float)
	68
	69	# Coefficient of variation
	70	cv = job_times.std() / job_times.mean()
	71	# Correction, assuming normal distributions
	72	corrected = (1 + 1/(4 * len(job_times))) * cv
	73
	74	varz.append(corrected)
	75
	76	exp_point['var'] = Measurement().from_array(varz)
	77
	78	def get_sched_data(data_file, result):
	79	with open(data_file, 'r') as f:
	80	data = f.read()
	81
	82	# if conf != BASE:
	83	# (our_values, their_values) = extract_exec_vals(our_data, their_data)
	84	# conf_result = get_stats(our_values, their_values)
	85	# for key in conf_result.keys():
	86	# result[key][conf] = conf_result[key]
	87
	88	extract_tardy_vals(data, result)
	89	extract_variance(data, result)