From 7545402506aa76261e18d85af585ff0ac1cf05c1 Mon Sep 17 00:00:00 2001
From: Jonathan Herman <hermanjl@cs.unc.edu>
Date: Tue, 23 Apr 2013 14:01:35 -0400
Subject: Improved accuracy of sched_trace measurement parsing.

* Measurements from tasks missing > 20% of their scheduling records are
ignored. This is configurable in config/config.py.
* Measurements which only have zero values are ignored.
* If either of these 2 situations are encountered print out a message the
first time using the common.log_once() method. See parse_exps.py for how
this is used with multiple threads.
* Measurements from a task's last job are ignored.
* Miss ratio is calculated only as a fraction of the number of jobs whose
matching release and completion records were found, not just release.
---
 parse/sched.py | 84 +++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 60 insertions(+), 24 deletions(-)

(limited to 'parse/sched.py')

diff --git a/parse/sched.py b/parse/sched.py
index b56324b..4933037 100644
--- a/parse/sched.py
+++ b/parse/sched.py
@@ -5,35 +5,55 @@ import struct
 import subprocess
 
 from collections import defaultdict,namedtuple
-from common import recordtype
+from common import recordtype,log_once
 from point import Measurement
 from ctypes import *
 
 class TimeTracker:
     '''Store stats for durations of time demarcated by sched_trace records.'''
     def __init__(self):
-        self.begin = self.avg = self.max = self.num = self.job = 0
+        self.begin = self.avg = self.max = self.num = self.next_job = 0
 
-    def store_time(self, record):
+        # Count of times the job in start_time matched that in store_time
+        self.matches = 0
+        # And the times it didn't
+        self.disjoints = 0
+
+        # Measurements are recorded in store_ time using the previous matching
+        # record which was passed to store_time. This way, the last record for
+        # any task is always skipped
+        self.last_record = None
+
+    def store_time(self, next_record):
         '''End duration of time.'''
-        dur = record.when - self.begin
+        dur = (self.last_record.when - self.begin) if self.last_record else -1
 
-        if self.job == record.job and dur > 0:
-            self.max  = max(self.max, dur)
-            self.avg *= float(self.num / (self.num + 1))
-            self.num += 1
-            self.avg += dur / float(self.num)
+        if self.next_job == next_record.job:
+            self.last_record = next_record
 
-            self.begin = 0
-            self.job   = 0
+            if self.last_record:
+                self.matches += 1
+
+            if dur > 0:
+                self.max  = max(self.max, dur)
+                self.avg *= float(self.num / (self.num + 1))
+                self.num += 1
+                self.avg += dur / float(self.num)
+
+                self.begin = 0
+                self.next_job   = 0
+        else:
+            self.disjoints += 1
 
     def start_time(self, record, time = None):
         '''Start duration of time.'''
-        if not time:
-            self.begin = record.when
-        else:
-            self.begin = time
-        self.job = record.job
+        if self.last_record:
+            if not time:
+                self.begin = self.last_record.when
+            else:
+                self.begin = time
+
+        self.next_job = record.job
 
 # Data stored for each task
 TaskParams = namedtuple('TaskParams',  ['wcet', 'period', 'cpu'])
@@ -203,6 +223,12 @@ def create_task_dict(data_dir, work_dir = None):
 
     return task_dict
 
+LOSS_MSG = """Found task missing more than %d%% of its scheduling records.
+These won't be included in scheduling statistics!"""%(100*conf.MAX_RECORD_LOSS)
+SKIP_MSG = """Measurement '%s' has no non-zero values.
+Measurements like these are not included in scheduling statistics.
+If a measurement is missing, this is why."""
+
 def extract_sched_data(result, data_dir, work_dir):
     task_dict = create_task_dict(data_dir, work_dir)
     stat_data = defaultdict(list)
@@ -213,19 +239,29 @@ def extract_sched_data(result, data_dir, work_dir):
             # Currently unknown where these invalid tasks come from...
             continue
 
-        miss_ratio = float(tdata.misses.num) / tdata.jobs
-        stat_data["miss-ratio"].append(float(tdata.misses.num) / tdata.jobs)
+        miss = tdata.misses
+
+        record_loss = float(miss.disjoints)/(miss.matches + miss.disjoints)
+        stat_data["record-loss"].append(record_loss)
+
+        if record_loss > conf.MAX_RECORD_LOSS:
+            log_once(LOSS_MSG)
+            continue
+
+        miss_ratio = float(miss.num) / miss.matches
+        avg_tard = miss.avg * miss_ratio
+
+        stat_data["miss-ratio" ].append(miss_ratio)
 
-        stat_data["max-tard"  ].append(tdata.misses.max / tdata.params.wcet)
-        # Scale average down to account for jobs with 0 tardiness
-        avg_tard = tdata.misses.avg * miss_ratio
-        stat_data["avg-tard"  ].append(avg_tard / tdata.params.wcet)
+        stat_data["max-tard"].append(miss.max / tdata.params.period)
+        stat_data["avg-tard"].append(avg_tard / tdata.params.period)
 
-        stat_data["avg-block" ].append(tdata.blocks.avg / NSEC_PER_MSEC)
-        stat_data["max-block" ].append(tdata.blocks.max / NSEC_PER_MSEC)
+        stat_data["avg-block"].append(tdata.blocks.avg / NSEC_PER_MSEC)
+        stat_data["max-block"].append(tdata.blocks.max / NSEC_PER_MSEC)
 
     # Summarize value groups
     for name, data in stat_data.iteritems():
         if not data or not sum(data):
+            log_once(SKIP_MSG, SKIP_MSG % name)
             continue
         result[name] = Measurement(str(name)).from_array(data)
-- 
cgit v1.2.2