From 3bb7a40648683d15174b44cb68740d78adf29b86 Mon Sep 17 00:00:00 2001
From: Andrea Bastoni <bastoni@cs.unc.edu>
Date: Thu, 25 Mar 2010 15:40:20 -0400
Subject: Restructure preempt migration data analyzer

---
 pm_data_analysis/pm_data_analyzer.py | 507 ++++++++++++++++-------------------
 1 file changed, 234 insertions(+), 273 deletions(-)

(limited to 'pm_data_analysis/pm_data_analyzer.py')

diff --git a/pm_data_analysis/pm_data_analyzer.py b/pm_data_analysis/pm_data_analyzer.py
index 43de82a..02c5098 100755
--- a/pm_data_analysis/pm_data_analyzer.py
+++ b/pm_data_analysis/pm_data_analyzer.py
@@ -1,11 +1,19 @@
 #!/usr/bin/env python
-#
-# Preemption and migration overheads analysis.
-#    Take a single-task-set-size file and generate max / avg for the valid
-#    samples for preemption and different kinds of migration
-#
-#    Save computed valid overheads and print a file suitable for processing
-#    using Bjorn's gnuplot wrapper
+"""
+Usage: %prog [options] filename
+
+FILENAME is where the .raw overhead data are. Filename
+and the path to it also gives the base path and filename for the
+files that contains already processed overheads and the directory
+where to save the output data.
+FILENAME should be something like: "res_plugin=GSN-EDF_wss=WSS_tss=TSS.raw".
+Also, take a look at the "compact_results" script
+"""
+
+import defapp
+
+from optparse import make_option as o
+from os.path import splitext, basename, dirname
 
 import sys
 import numpy as np
@@ -15,8 +23,50 @@ import pm
 import pmserialize as pms
 import statanalyzer as pmstat
 
-from optparse import OptionParser
-from os.path import splitext, basename, dirname
+options = [
+    o("-l", "--cores-per-l2", dest="coresL2", action="store", type="int",
+        help="Number of cores per L2 cache; if all cores share the same \
+L2 (i.e., no L3) set this to 0 (default = 2)"),
+    o("-p", "--phys-cpu", dest="pcpu", action="store", type="int",
+        help="Number of physical sockets on this machine (default 4)"),
+    o(None, "--limit-preempt", dest="npreempt", action="store", type="int",
+        help="Limit the number of preemption sample used in statistics \
+to NPREEMPT"),
+    o(None, "--limit-l2", dest="nl2cache", action="store", type="int",
+        help="Limit the number of l2cache sample used in statistics \
+to NL2CACHE"),
+    o(None, "--limit-onchip", dest="nonchip", action="store", type="int",
+        help="Limit the number of onchip sample used in statistics \
+to NONCHIP"),
+    o(None, "--limit-offchip", dest="noffchip", action="store", type="int",
+        help="Limit the number of offchip sample used in statistics \
+to NOFFCHIP"),
+    o("-r", "--read-valid-data", dest="read_valid", action="store_true",
+        help="read already processed data from file"),
+    o("-v", "--verbose", dest="verbose", action="store_true"),
+    o("-d", "--debug", dest="debug", action="store_true"),
+    o("-u", "--microsec", dest="cpufreq", action="store", type="float",
+        help="Print overhead results in microseconds; \
+CPUFREQ is the cpu freq in MHz (cat /proc/cpuinfo)"),
+    ]
+# this cores per chip parameter implies a different topology model not fully
+# supported atm
+#    o("-c", "--cores-per-chip", dest="coresC",
+#                      action="store", type="int", default="6",
+#            help="number of cores per chip (default = 6)")
+
+defaults = {
+        'coresL2'   : 2,
+        'pcpu'      : 4,
+        'npreempt'  : 0,
+        'nl2cache'  : 0,
+        'nonchip'   : 0,
+        'noffchip'  : 0,
+        'read_valid': False,
+        'verbose'   : False,
+        'debug'     : False,
+        'cpufrequ'  : 0,
+        }
 
 # from Bjoern's simple-gnuplot-wrapper
 def decode(name):
@@ -47,279 +97,190 @@ class Overhead:
     def add(self, ovd_vector, label):
         self.overheads.append([ovd_vector, label])
 
-# read previously saved overhead data
-def read_valid_data(filename, coresL2, valid_ovds):
-    nf = filename + '_preemption.vbin'
-    if debug:
-        print "Reading '%s'" % nf
-    valid_ovds.add(pms.unpickl_it(nf), 'preemtion')
-
-    nf = filename + '_onchip.vbin'
-    if debug:
-        print "Reading '%s'" % nf
-    valid_ovds.add(pms.unpickl_it(nf), 'onchip')
-
-    nf = filename + '_offchip.vbin'
-    if debug:
-        print "Reading '%s'" % nf
-    valid_ovds.add(pms.unpickl_it(nf), 'offchip')
-
-    if coresL2 != 0:
-        nf = filename + '_l2cache.vbin'
-        if debug:
+class Analyzer(defapp.App):
+    def __init__(self):
+        defapp.App.__init__(self, options, defaults, no_std_opts=True)
+        self.lsamples = {}
+        if self.options.npreempt:
+            self.lsamples['preemption'] = self.options.npreempt
+        if self.options.nl2cache:
+            self.lsamples['l2cache'] = self.options.nl2cache
+        if self.options.nonchip:
+            self.lsamples['onchip'] = self.options.nonchip
+        if self.options.noffchip:
+            self.lsamples['offchip'] = self.options.noffchip
+
+    # read previously saved overhead data
+    def read_valid_data(self, filename):
+        nf = filename + '_preemption.vbin'
+        if self.options.debug:
             print "Reading '%s'" % nf
-        valid_ovds.add(pms.unpickl_it(nf), 'l2cache')
-
-
-def process_raw_data(filename, coresL2, pcpu, valid_ovds):
-    # initialize pmmodule
-    pm.load(filename, coresL2, pcpu)
-
-    ovds = Overhead()
+        self.valid_ovds.add(pms.unpickl_it(nf), 'preemtion')
 
-    # get overheads
-    ovds.add(pm.getPreemption(), 'preemption')
-    ovds.add(pm.getOnChipMigration(), 'onchip')
-    ovds.add(pm.getOffChipMigration(), 'offchip')
-    if coresL2 != 0:
-        ovds.add(pm.getL2Migration(), 'l2cache')
-
-    if debug:
-        for i in ovds:
-            print i[0], i[1]
-
-    # instance the statistical analizer to remove outliers
-    sd = pmstat.InterQuartileRange(25,75, True)
+        nf = filename + '_onchip.vbin'
+        if self.options.debug:
+            print "Reading '%s'" % nf
+        self.valid_ovds.add(pms.unpickl_it(nf), 'onchip')
 
-    for i in ovds:
-        if len(i[0]) != 0:
-            # just add overheads, "forget" preemption length
-            # FIXME: is really needed?
-            # valid_ovds.add(sd.remOutliers(i[0][:,0]), i[1])
-            valid_ovds.add(i[0][:,0], i[1])
-        else:
-            print "Warning: no valid data collected..."
-            valid_ovds.add([], i[1])
+        nf = filename + '_offchip.vbin'
+        if debug:
+            print "Reading '%s'" % nf
+        self.valid_ovds.add(pms.unpickl_it(nf), 'offchip')
+
+        if self.options.coresL2 != 0:
+            nf = filename + '_l2cache.vbin'
+            if self.options.debug:
+                print "Reading '%s'" % nf
+            self.valid_ovds.add(pms.unpickl_it(nf), 'l2cache')
+
+    def process_raw_data(self, datafile):
+        coresL2 = self.options.coresL2
+        pcpu = self.options.pcpu
+        # initialize pmmodule
+        pm.load(datafile, coresL2, pcpu)
+        ovds = Overhead()
+        # get overheads
+        ovds.add(pm.getPreemption(), 'preemption')
+        ovds.add(pm.getOnChipMigration(), 'onchip')
+        ovds.add(pm.getOffChipMigration(), 'offchip')
+        if coresL2 != 0:
+            ovds.add(pm.getL2Migration(), 'l2cache')
+
+        if self.options.debug:
+            for i in ovds:
+                print i[0], i[1]
+
+        # instance the statistical analizer to remove outliers
+        sd = pmstat.InterQuartileRange(25,75, True)
 
-    if debug:
-        # check outliers removals
-        print "Before outliers removal"
         for i in ovds:
-            print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
-        print "After outliers removal"
-        for i in valid_ovds:
-            print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
-
-    if verbose:
-        for i in valid_ovds:
-            print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
-
-    # serialize valid overheads
-    for i in valid_ovds:
-        dname = dirname(filename)
-        fname, ext = splitext(basename(filename))
-
-        curf = dname + '/' + fname + '_' + i[1] + '.vbin'
-        pms.pickl_it(i[0], curf)
-
-    del ovds
-
-# The output is one csv WSS file per ovhd type, "tss, max_ovd, avg_ovd"
-# Filename output format:
-# hard_pm_plugin=GSN-EDF_dist=uni_light_wss=2048_ovd=preemption.csv
-# ovd: preemption, onchip, offchip, l2cache
-
-def analize_data(valid_ovds, dname, conf, samples_limits):
-
-    csvbname = dname + '/pm_plugin=' + conf['plugin'] + \
-            '_dist=uni_light_wss=' + conf['wss']
-
-    if verbose:
-        print "(WSS = %(0)s, TSS = %(1)s)" % {"0":conf['wss'], \
+            if len(i[0]) != 0:
+                # just add overheads, "forget" preemption length
+                # FIXME: is really needed?
+                # valid_ovds.add(sd.remOutliers(i[0][:,0]), i[1])
+                self.valid_ovds.add(i[0][:,0], i[1])
+            else:
+                print "Warning: no valid data collected..."
+                self.valid_ovds.add([], i[1])
+
+        if self.options.debug:
+            # check outliers removals
+            print "Before outliers removal"
+            for i in ovds:
+                print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
+            print "After outliers removal"
+            for i in self.valid_ovds:
+                print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
+
+        if self.options.verbose:
+            for i in self.valid_ovds:
+                print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
+
+        # serialize valid overheads
+        for i in self.valid_ovds:
+            dname = dirname(datafile)
+            fname, ext = splitext(basename(datafile))
+
+            curf = dname + '/' + fname + '_' + i[1] + '.vbin'
+            pms.pickl_it(i[0], curf)
+
+        del ovds
+
+    # The output is one csv WSS file per ovhd type, "tss, max_ovd, avg_ovd"
+    # Filename output format:
+    # pm_plugin=GSN-EDF_dist=uni_light_wss=2048_ovd=preemption.csv
+    # ovd: preemption, onchip, offchip, l2cache
+
+    def analyze_data(self, dname, conf):
+
+        csvbname = dname + '/pm_plugin=' + conf['plugin'] + \
+                '_dist=uni_light_wss=' + conf['wss']
+        if self.options.verbose:
+            print "(WSS = %(0)s, TSS = %(1)s)" % {"0":conf['wss'], \
                 "1":conf['tss']}
 
-    for i in valid_ovds:
+        for i in self.valid_ovds:
+            csvfname = csvbname + '_ovd=' + i[1] + '.csv'
+            if self.options.debug:
+                print "Saving csv '%s'" % csvfname
+
+            csvf = open(csvfname, 'a')
+            csvlist = [conf['tss']]
+
+            # data (valid_ovds already have only overheads, not length)
+            # vector = i[0][:,0]
+            #
+            # Check if we need to limit the number of samples
+            # that we use in the computation of max and avg.
+            # Statistically, this is more sound than other choices
+            if i[1] in self.lsamples:
+                if self.lsamples[i[1]] > 0:
+                    nsamples = min(self.lsamples[i[1]], len(i[0]))
+                    if self.options.verbose:
+                        print "Computing %(0)s stat only on %(1)d samples" % \
+                            {"0":i[1],
+                            "1":nsamples}
+
+                    vector = i[0][0:nsamples]
+            else:
+                vector = i[0]
 
-        csvfname = csvbname + '_ovd=' + i[1] + '.csv'
-        if debug:
-            print "Saving csv '%s'" % csvfname
-
-        csvf = open(csvfname, 'a')
-        csvlist = [conf['tss']]
-
-        # data (valid_ovds already have only overheads, not length)
-        # vector = i[0][:,0]
-        #
-        # Check if we need to limit the number of samples
-        # that we use in the computation of max and avg.
-        # Statistically, this is more sound than other choices
-        if i[1] in samples_limits:
-           if verbose:
-               print "Computing %(0)s stat only on %(1)d samples" % \
-               {"0":i[1], "1":samples_limits[i[1]]}
-
-           if samples_limits[i[1]] != 0:
-               vector = i[0][0:samples_limits[i[1]]]
-           else:
-               vector = i[0]
-        else:
-            vector = i[0]
+            if vector != []:
+                # FIXME if after disabling prefetching there are
+                # still negative value, they shouldn't be considered
+                max_vec = np.max(vector)
+                avg_vec = np.average(vector)
+            else:
+                max_vec = 0
+                avg_vec = 0
 
-        if vector != []:
-            # FIXME if after disabling prefetching there are
-            # still negative value, they shouldn't be considered
-            max_vec = np.max(vector)
-            avg_vec = np.average(vector)
+            if self.options.cpufreq == 0:
+                max_vec_str = "%5.5f" % max_vec
+                avg_vec_str = "%5.5f" % avg_vec
+            else:
+                max_vec_str = "%5.5f" % (max_vec / self.options.cpufreq)
+                avg_vec_str = "%5.5f" % (avg_vec / self.options.cpufreq)
+
+            csvlist.append(max_vec_str)
+            csvlist.append(avg_vec_str)
+            pms.csv_it(csvf, csvlist)
+            csvf.close()
+
+            if self.options.verbose:
+                if self.options.cpufreq == 0:
+                    print i[1] + " overheads (ticks)"
+                    print "Max = %5.5f" % max_vec
+                    print "Avg = %5.5f" % avg_vec
+                else:
+                    print i[1] + " overheads (us)"
+                    print "Max = %5.5f" % (max_vec / self.options.cpufreq)
+                    print "Avg = %5.5f" % (avg_vec / self.options.cpufreq)
+
+    def process_datafile(self, datafile):
+        dname = dirname(datafile)
+        bname = basename(datafile)
+        fname, ext = splitext(bname)
+        if ext != '.raw':
+            self.err("Warning: '%s' doesn't look like a .raw file"
+                    % bname)
+        if self.options.verbose:
+            print "\nProcessing: " + fname
+        conf = decode(fname)
+
+        self.valid_ovds = Overhead()
+        if self.options.read_valid:
+            # .vbin output should be in same directory as input filename
+            readf = dname + '/' + fname
+            self.read_valid_data(readf)
         else:
-            max_vec = 0
-            avg_vec = 0
+            self.process_raw_data(datafile)
 
-        if cpufreq == 0:
-            max_vec_str = "%5.5f" % max_vec
-            avg_vec_str = "%5.5f" % avg_vec
-        else:
-            max_vec_str = "%5.5f" % (max_vec / cpufreq)
-            avg_vec_str = "%5.5f" % (avg_vec / cpufreq)
-
-        csvlist.append(max_vec_str)
-        csvlist.append(avg_vec_str)
-        pms.csv_it(csvf, csvlist)
-        csvf.close()
-
-        if verbose:
-            if cpufreq == 0:
-                print i[1] + " overheads (ticks)"
-                print "Max = %5.5f" % max_vec
-                print "Avg = %5.5f" % avg_vec
-            else:
-                print i[1] + " overheads (us)"
-                print "Max = %5.5f" % (max_vec / cpufreq)
-                print "Avg = %5.5f" % (avg_vec / cpufreq)
-
-
-# filename-extension convention to get "pretty" output filenames
-# .raw for raw bin data
-# .vbin for valid overheads
-# .csv for processed final data
-def main():
-    usage = "Usage: %prog [options] filename"
-    description = """FILENAME is where the .raw overhead data are. Filename
-and the path to it also gives the base path and filename for the
-files that contains already processed overheads and the directory
-where to save the output data.
-FILENAME should be something like: "res_plugin=GSN-EDF_wss=WSS_tss=TSS.raw".
-Take a look at the "compact_results" script
-"""
-    parser = OptionParser(usage=usage, description=description)
-    parser.add_option("-l", "--cores-per-l2", dest="coresL2",
-                      action="store", type="int", default="2",
-            help="number of cores per L2 cache;  "
-                 "if all cores share the same L2 (i.e., no L3) set this to 0 "
-                 "(default = 2)")
-# this cores per chip parameter implies a different topology model not fully
-# supported atm
-#    parser.add_option("-c", "--cores-per-chip", dest="coresC",
-#                      action="store", type="int", default="6",
-#            help="number of cores per chip (default = 6)")
-    parser.add_option("-p", "--phys-cpu", dest="pcpu",
-                      action="store", type="int", default="4",
-            help="Number of physical sockets on this machine (default 4)")
-
-    parser.add_option("", "--limit-preempt", dest="npreempt",
-                      action="store", type="int", default="0",
-            help="Limit the number of preemption sample used in "
-                 "statistics to NPREEMPT")
-    parser.add_option("", "--limit-l2", dest="nl2cache",
-                      action="store", type="int", default="0",
-            help="Limit the number of l2cache sample used in "
-                 "statistics to NL2CACHE")
-    parser.add_option("", "--limit-onchip", dest="nonchip",
-                      action="store", type="int", default="0",
-            help="Limit the number of onchip sample used in "
-                 "statistics to NONCHIP")
-    parser.add_option("", "--limit-offchip", dest="noffchip",
-                      action="store", type="int", default="0",
-            help="Limit the number of offchip sample used in "
-                 "statistics to NOFFCHIP")
-
-    parser.add_option("-r", "--read-valid-data", dest="read_valid",
-                      action="store_true", default=False,
-                      help="read already processed data from file")
-
-    parser.add_option("-v", "--verbose", dest="verbose",
-                      action="store_true", default=False,
-                      help="Be verbose")
-    parser.add_option("-d", "--debug", dest="debug",
-                      action="store_true", default=False,
-                      help="Debugging information")
-
-    parser.add_option("-u", "--microsec", dest="cpufreq",
-                    action="store", type="float",
-                    help="Print overhead results in microseconds; \
-                          CPUFREQ is the cpu freq in MHz (cat /proc/cpuinfo)")
-
-    (options, args) = parser.parse_args()
-    if len(args) != 1:
-        parser.error("Argument missing")
-        sys.exit(-1)
-
-    valid_ovds = Overhead()
-
-    global verbose
-    global debug
-    global cpufreq
-    if options.verbose:
-        verbose = 1
-    else:
-        verbose = 0
-
-    if options.debug:
-        debug = 1
-    else:
-        debug = 0
-
-    if options.cpufreq:
-        cpufreq = options.cpufreq
-    else:
-        cpufreq = 0
-
-    # filename processing
-    dname = dirname(args[0])
-    bname = basename(args[0])
-    fname, ext = splitext(bname)
-    conf = decode(fname)
-
-    if ext != '.raw':
-        print "Warning: '%s' doesn't look like a .raw file" % bname
-        return -1
-
-    if verbose:
-        print "\nProcessing: " + fname
-
-    if options.read_valid:
-        # .vbin output should be in same directory as input filename
-        # TODO can be improved with custom directory and file reading
-        readf = dname + '/' + fname
-        read_valid_data(readf, options.coresL2, valid_ovds)
-    else:
-        ret = process_raw_data(args[0], options.coresL2, options.pcpu,
-            valid_ovds)
-        if ret == -1:
-            print "Cannot process raw data, quitting"
-            return None
-
-    lsamples = {}
-    if options.npreempt:
-        lsamples['preemption'] = options.npreempt
-    if options.nl2cache:
-        lsamples['l2cache'] = options.nl2cache
-    if options.nonchip:
-        lsamples['onchip'] = options.nonchip
-    if options.noffchip:
-        lsamples['offchip'] = options.noffchip
-
-    analize_data(valid_ovds, dname, conf, lsamples)
-
-if __name__ == '__main__':
-    main()
+        self.analyze_data(dname, conf)
+        del self.valid_ovds
+
+    def default(self, _):
+        for datafile in self.args:
+            self.process_datafile(datafile)
 
+if __name__ == "__main__":
+    Analyzer().launch()
-- 
cgit v1.2.2