Improved accuracy of sched_trace measurement parsing.

* Measurements from tasks missing > 20% of their scheduling records are ignored. This is configurable in config/config.py. * Measurements which only have zero values are ignored. * If either of these 2 situations are encountered print out a message the first time using the common.log_once() method. See parse_exps.py for how this is used with multiple threads. * Measurements from a task's last job are ignored. * Miss ratio is calculated only as a fraction of the number of jobs whose matching release and completion records were found, not just release.
author: Jonathan Herman <hermanjl@cs.unc.edu> 2013-04-23 14:01:35 -0400
committer: Jonathan Herman <hermanjl@cs.unc.edu> 2013-04-23 14:01:35 -0400
commit: 7545402506aa76261e18d85af585ff0ac1cf05c1 (patch)
tree: 6b5a6d2e819c10311f3b4cdc94174877bdfcfbde /parse
parent: 25ccdb0cbc6b959b1f96c89b8bce91963cb67b4c (diff)
3 files changed, 68 insertions, 24 deletions
diff --git a/parse/point.py b/parse/point.py
index ac47c70..b1d9d53 100644
--- a/parse/point.py
+++ b/parse/point.py
@@ -133,6 +133,10 @@ class ExpPoint(object):
    def get_stats(self):
        return self.stats.keys()
+    def __bool__(self):
+        return bool(self.stats)
+    __nonzero__ = __bool__
 class SummaryPoint(ExpPoint):
    def __init__(self, id="", points=[], typemap = default_typemap):
diff --git a/parse/sched.py b/parse/sched.py
index b56324b..4933037 100644
--- a/parse/sched.py
+++ b/parse/sched.py
@@ -5,35 +5,55 @@ import struct
 import subprocess
 from collections import defaultdict,namedtuple
-from common import recordtype
+from common import recordtype,log_once
 from point import Measurement
 from ctypes import *
 class TimeTracker:
    '''Store stats for durations of time demarcated by sched_trace records.'''
    def __init__(self):
-        self.begin = self.avg = self.max = self.num = self.job = 0
+        self.begin = self.avg = self.max = self.num = self.next_job = 0
-    def store_time(self, record):
+        # Count of times the job in start_time matched that in store_time
+        self.matches = 0
+        # And the times it didn't
+        self.disjoints = 0
+        # Measurements are recorded in store_ time using the previous matching
+        # record which was passed to store_time. This way, the last record for
+        # any task is always skipped
+        self.last_record = None
+    def store_time(self, next_record):
        '''End duration of time.'''
-        dur = record.when - self.begin
+        dur = (self.last_record.when - self.begin) if self.last_record else -1
-        if self.job == record.job and dur > 0:
+        if self.next_job == next_record.job:
-            self.max  = max(self.max, dur)
+            self.last_record = next_record
-            self.avg *= float(self.num / (self.num + 1))
-            self.num += 1
-            self.avg += dur / float(self.num)
-            self.begin = 0
+            if self.last_record:
-            self.job   = 0
+                self.matches += 1
+            if dur > 0:
+                self.max  = max(self.max, dur)
+                self.avg *= float(self.num / (self.num + 1))
+                self.num += 1
+                self.avg += dur / float(self.num)
+                self.begin = 0
+                self.next_job   = 0
+        else:
+            self.disjoints += 1
    def start_time(self, record, time = None):
        '''Start duration of time.'''
-        if not time:
+        if self.last_record:
-            self.begin = record.when
+            if not time:
-        else:
+                self.begin = self.last_record.when
-            self.begin = time
+            else:
-        self.job = record.job
+                self.begin = time
+        self.next_job = record.job
 # Data stored for each task
 TaskParams = namedtuple('TaskParams',  ['wcet', 'period', 'cpu'])
@@ -203,6 +223,12 @@ def create_task_dict(data_dir, work_dir = None):
    return task_dict
+LOSS_MSG = """Found task missing more than %d%% of its scheduling records.
+These won't be included in scheduling statistics!"""%(100*conf.MAX_RECORD_LOSS)
+SKIP_MSG = """Measurement '%s' has no non-zero values.
+Measurements like these are not included in scheduling statistics.
+If a measurement is missing, this is why."""
 def extract_sched_data(result, data_dir, work_dir):
    task_dict = create_task_dict(data_dir, work_dir)
    stat_data = defaultdict(list)
@@ -213,19 +239,29 @@ def extract_sched_data(result, data_dir, work_dir):
            # Currently unknown where these invalid tasks come from...
            continue
-        miss_ratio = float(tdata.misses.num) / tdata.jobs
+        miss = tdata.misses
-        stat_data["miss-ratio"].append(float(tdata.misses.num) / tdata.jobs)
+        record_loss = float(miss.disjoints)/(miss.matches + miss.disjoints)
+        stat_data["record-loss"].append(record_loss)
+        if record_loss > conf.MAX_RECORD_LOSS:
+            log_once(LOSS_MSG)
+            continue
+        miss_ratio = float(miss.num) / miss.matches
+        avg_tard = miss.avg * miss_ratio
+        stat_data["miss-ratio" ].append(miss_ratio)
-        stat_data["max-tard"  ].append(tdata.misses.max / tdata.params.wcet)
+        stat_data["max-tard"].append(miss.max / tdata.params.period)
-        # Scale average down to account for jobs with 0 tardiness
+        stat_data["avg-tard"].append(avg_tard / tdata.params.period)
-        avg_tard = tdata.misses.avg * miss_ratio
-        stat_data["avg-tard"  ].append(avg_tard / tdata.params.wcet)
-        stat_data["avg-block" ].append(tdata.blocks.avg / NSEC_PER_MSEC)
+        stat_data["avg-block"].append(tdata.blocks.avg / NSEC_PER_MSEC)
-        stat_data["max-block" ].append(tdata.blocks.max / NSEC_PER_MSEC)
+        stat_data["max-block"].append(tdata.blocks.max / NSEC_PER_MSEC)
    # Summarize value groups
    for name, data in stat_data.iteritems():
        if not data or not sum(data):
+            log_once(SKIP_MSG, SKIP_MSG % name)
            continue
        result[name] = Measurement(str(name)).from_array(data)
diff --git a/parse/tuple_table.py b/parse/tuple_table.py
index 47fb6b6..320d9dd 100644
--- a/parse/tuple_table.py
+++ b/parse/tuple_table.py
@@ -13,6 +13,10 @@ class TupleTable(object):
    def get_col_map(self):
        return self.col_map
+    def __bool__(self):
+        return bool(self.table)
+    __nonzero__ = __bool__
    def __getitem__(self, kv):
        key = self.col_map.get_key(kv)
        return self.table[key]
author	Jonathan Herman <hermanjl@cs.unc.edu>	2013-04-23 14:01:35 -0400
committer	Jonathan Herman <hermanjl@cs.unc.edu>	2013-04-23 14:01:35 -0400
commit	7545402506aa76261e18d85af585ff0ac1cf05c1 (patch)
tree	6b5a6d2e819c10311f3b4cdc94174877bdfcfbde /parse
parent	25ccdb0cbc6b959b1f96c89b8bce91963cb67b4c (diff)

diff --git a/parse/point.py b/parse/point.py index ac47c70..b1d9d53 100644 --- a/parse/point.py +++ b/parse/point.py
@@ -133,6 +133,10 @@ class ExpPoint(object):
133	def get_stats(self):	133	def get_stats(self):
134	return self.stats.keys()	134	return self.stats.keys()
135		135
		136	def __bool__(self):
		137	return bool(self.stats)
		138	__nonzero__ = __bool__
		139
136		140
137	class SummaryPoint(ExpPoint):	141	class SummaryPoint(ExpPoint):
138	def __init__(self, id="", points=[], typemap = default_typemap):	142	def __init__(self, id="", points=[], typemap = default_typemap):


diff --git a/parse/sched.py b/parse/sched.py index b56324b..4933037 100644 --- a/parse/sched.py +++ b/parse/sched.py
@@ -5,35 +5,55 @@ import struct
5	import subprocess	5	import subprocess
6		6
7	from collections import defaultdict,namedtuple	7	from collections import defaultdict,namedtuple
8	from common import recordtype	8	from common import recordtype,log_once
9	from point import Measurement	9	from point import Measurement
10	from ctypes import *	10	from ctypes import *
11		11
12	class TimeTracker:	12	class TimeTracker:
13	'''Store stats for durations of time demarcated by sched_trace records.'''	13	'''Store stats for durations of time demarcated by sched_trace records.'''
14	def __init__(self):	14	def __init__(self):
15	self.begin = self.avg = self.max = self.num = self.job = 0	15	self.begin = self.avg = self.max = self.num = self.next_job = 0
16		16
17	def store_time(self, record):	17	# Count of times the job in start_time matched that in store_time
		18	self.matches = 0
		19	# And the times it didn't
		20	self.disjoints = 0
		21
		22	# Measurements are recorded in store_ time using the previous matching
		23	# record which was passed to store_time. This way, the last record for
		24	# any task is always skipped
		25	self.last_record = None
		26
		27	def store_time(self, next_record):
18	'''End duration of time.'''	28	'''End duration of time.'''
19	dur = record.when - self.begin	29	dur = (self.last_record.when - self.begin) if self.last_record else -1
20		30
21	if self.job == record.job and dur > 0:	31	if self.next_job == next_record.job:
22	self.max = max(self.max, dur)	32	self.last_record = next_record
23	self.avg *= float(self.num / (self.num + 1))
24	self.num += 1
25	self.avg += dur / float(self.num)
26		33
27	self.begin = 0	34	if self.last_record:
28	self.job = 0	35	self.matches += 1
		36
		37	if dur > 0:
		38	self.max = max(self.max, dur)
		39	self.avg *= float(self.num / (self.num + 1))
		40	self.num += 1
		41	self.avg += dur / float(self.num)
		42
		43	self.begin = 0
		44	self.next_job = 0
		45	else:
		46	self.disjoints += 1
29		47
30	def start_time(self, record, time = None):	48	def start_time(self, record, time = None):
31	'''Start duration of time.'''	49	'''Start duration of time.'''
32	if not time:	50	if self.last_record:
33	self.begin = record.when	51	if not time:
34	else:	52	self.begin = self.last_record.when
35	self.begin = time	53	else:
36	self.job = record.job	54	self.begin = time
		55
		56	self.next_job = record.job
37		57
38	# Data stored for each task	58	# Data stored for each task
39	TaskParams = namedtuple('TaskParams', ['wcet', 'period', 'cpu'])	59	TaskParams = namedtuple('TaskParams', ['wcet', 'period', 'cpu'])
@@ -203,6 +223,12 @@ def create_task_dict(data_dir, work_dir = None):
203		223
204	return task_dict	224	return task_dict
205		225
		226	LOSS_MSG = """Found task missing more than %d%% of its scheduling records.
		227	These won't be included in scheduling statistics!"""%(100*conf.MAX_RECORD_LOSS)
		228	SKIP_MSG = """Measurement '%s' has no non-zero values.
		229	Measurements like these are not included in scheduling statistics.
		230	If a measurement is missing, this is why."""
		231
206	def extract_sched_data(result, data_dir, work_dir):	232	def extract_sched_data(result, data_dir, work_dir):
207	task_dict = create_task_dict(data_dir, work_dir)	233	task_dict = create_task_dict(data_dir, work_dir)
208	stat_data = defaultdict(list)	234	stat_data = defaultdict(list)
@@ -213,19 +239,29 @@ def extract_sched_data(result, data_dir, work_dir):
213	# Currently unknown where these invalid tasks come from...	239	# Currently unknown where these invalid tasks come from...
214	continue	240	continue
215		241
216	miss_ratio = float(tdata.misses.num) / tdata.jobs	242	miss = tdata.misses
217	stat_data["miss-ratio"].append(float(tdata.misses.num) / tdata.jobs)	243
		244	record_loss = float(miss.disjoints)/(miss.matches + miss.disjoints)
		245	stat_data["record-loss"].append(record_loss)
		246
		247	if record_loss > conf.MAX_RECORD_LOSS:
		248	log_once(LOSS_MSG)
		249	continue
		250
		251	miss_ratio = float(miss.num) / miss.matches
		252	avg_tard = miss.avg * miss_ratio
		253
		254	stat_data["miss-ratio" ].append(miss_ratio)
218		255
219	stat_data["max-tard" ].append(tdata.misses.max / tdata.params.wcet)	256	stat_data["max-tard"].append(miss.max / tdata.params.period)
220	# Scale average down to account for jobs with 0 tardiness	257	stat_data["avg-tard"].append(avg_tard / tdata.params.period)
221	avg_tard = tdata.misses.avg * miss_ratio
222	stat_data["avg-tard" ].append(avg_tard / tdata.params.wcet)
223		258
224	stat_data["avg-block" ].append(tdata.blocks.avg / NSEC_PER_MSEC)	259	stat_data["avg-block"].append(tdata.blocks.avg / NSEC_PER_MSEC)
225	stat_data["max-block" ].append(tdata.blocks.max / NSEC_PER_MSEC)	260	stat_data["max-block"].append(tdata.blocks.max / NSEC_PER_MSEC)
226		261
227	# Summarize value groups	262	# Summarize value groups
228	for name, data in stat_data.iteritems():	263	for name, data in stat_data.iteritems():
229	if not data or not sum(data):	264	if not data or not sum(data):
		265	log_once(SKIP_MSG, SKIP_MSG % name)
230	continue	266	continue
231	result[name] = Measurement(str(name)).from_array(data)	267	result[name] = Measurement(str(name)).from_array(data)


diff --git a/parse/tuple_table.py b/parse/tuple_table.py index 47fb6b6..320d9dd 100644 --- a/parse/tuple_table.py +++ b/parse/tuple_table.py
@@ -13,6 +13,10 @@ class TupleTable(object):
13	def get_col_map(self):	13	def get_col_map(self):
14	return self.col_map	14	return self.col_map
15		15
		16	def __bool__(self):
		17	return bool(self.table)
		18	__nonzero__ = __bool__
		19
16	def __getitem__(self, kv):	20	def __getitem__(self, kv):
17	key = self.col_map.get_key(kv)	21	key = self.col_map.get_key(kv)
18	return self.table[key]	22	return self.table[key]