Add schedule-sensitive PM samples analyzer

- main data analyzer for preemption and migration (schedule-sensitive method)
author: Andrea Bastoni <bastoni@cs.unc.edu> 2010-04-12 23:26:13 -0400
committer: Andrea Bastoni <bastoni@cs.unc.edu> 2010-04-12 23:26:13 -0400
commit: 4538f7e4fcaa3bf199377b9b735562e53cd1c7d3 (patch)
tree: 7f7129e661e4826352301f1b2d3bbd8a945c0d9d
parent: 3b21f9d15822453117d1e908ab97cacd8f7f39be (diff)
3 files changed, 497 insertions, 1 deletions
diff --git a/SConstruct b/SConstruct
index 85deeec..3241f68 100644
--- a/SConstruct
+++ b/SConstruct
@@ -52,7 +52,9 @@ INCLUDE_DIRS = [
    # Linux kernel headers
    '${LITMUS_KERNEL}/include/',
    # Linux architecture-specific kernel headers
-    '${LITMUS_KERNEL}/arch/${INCLUDE_ARCH}/include'
+    '${LITMUS_KERNEL}/arch/${INCLUDE_ARCH}/include',
+    # Python headers
+    '${PYTHON_HEADERS}'
    ]
 # #####################################################################
@@ -69,6 +71,10 @@ vars.AddVariables(
                 'Where to find the LITMUS^RT kernel.',
                 '../litmus2010'),
+    PathVariable('PYTHON_HEADERS',
+                 'Where to find Python headers.',
+                 '/usr/include/python2.5'),
    EnumVariable('ARCH',
                 'Target architecture.',
                 arch,
@@ -120,6 +126,7 @@ def dump_config(env):
    dump('CPPPATH')
    dump('CCFLAGS')
    dump('LINKFLAGS')
+    dump('PYTHON_HEADERS')
 if GetOption('dump'):
    print "\n"
@@ -161,6 +168,10 @@ if not (env.GetOption('clean') or env.GetOption('help')):
        abort("Cannot find liblitmus headers in '$LIBLITMUS'",
              "Please ensure that LIBLITMUS in .config is a valid path'")
+    conf.CheckCHeader('Python.h') or \
+        abort("Cannot find Python headers in '$PYTHON_HEADERS'",
+              "Please ensure that PYTHON_HEADERS in .config is set to a valid path.")
    env = conf.Finish()
 # #####################################################################
@@ -192,6 +203,10 @@ pmpy.Replace(LINKFLAGS = '')
 pmrt.Program('pm_task', ['bin/pm_task.c', 'bin/pm_common.c'])
 pmrt.Program('pm_polluter', ['bin/pm_polluter.c', 'bin/pm_common.c'])
+pmpy.SharedLibrary('pm', ['c2python/pmmodule.c', 'bin/pm_common.c'])
+Command("pm.so", "libpm.so", Move("$TARGET", "$SOURCE"))
 # #####################################################################
 # Additional Help
diff --git a/data_analysis/defapp.py b/data_analysis/defapp.py
new file mode 100644
index 0000000..c03f118
--- /dev/null
+++ b/data_analysis/defapp.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+"""
+A basic Python application shell, for copy&paste development.
+"""
+import optparse
+import cmd
+import sys
+o = optparse.make_option
+class App(cmd.Cmd):
+    def __init__(self, opts=None, defaults=None, no_std_opts=False,
+                 stdout=sys.stdout, stderr=sys.stderr, default_cmd=None):
+        cmd.Cmd.__init__(self, None, stdout, stderr)
+        self.default_cmd = default_cmd
+        if not opts:
+            opts = []
+        if not defaults:
+            defaults = {}
+        defaults["_App_file"] = None
+        self.f               = None
+        if not no_std_opts:
+            opts += [ o('-o', '--output', action='store', dest='_App_file',
+                        help='store output in FILE', metavar='FILE')]
+        (self.options, self.args) = self.__parse(opts, defaults)
+    def __parse(self, opts, defaults):
+        parser = optparse.OptionParser(option_list=opts)
+        parser.set_defaults(**defaults)
+        return parser.parse_args()
+    def launch(self, args=None):
+        if args:
+            self.args = args
+        try:
+            if self.options._App_file:
+                self.f  = open(self.options._App_file, 'w')
+            self.onecmd(' '.join(self.args))
+        except IOError, msg:
+            self.err("I/O Error:", msg)
+        except KeyboardInterrupt:
+            self.err("Interrupted.")
+        if self.f:
+            self.f.close()
+    def outfile(self):
+        if self.f:
+            return self.f
+        else:
+            return sys.stdout
+    def emptyline(self):
+        if self.default_cmd:
+            self.onecmd(self.default_cmd)
+    def default(self, line):
+        self.err("%s: Command not recognized." % line)
+    def do_dump_config(self, key):
+        """Display the configuration as parsed on the console."""
+        def is_private(k): return k[0] == '_'
+        def show(k): print "%20s : %10s" % (k, str(self.options.__dict__[k]))
+        if not key:
+            for x in sorted(self.options.__dict__.keys()):
+                if not is_private(x):
+                    show(x)
+        elif not is_private(key) and key in self.options.__dict__:
+            show(key)
+        else:
+            self.err("%s: unknown option." % key)
+    @staticmethod
+    def __write(stream, *args, **kargs):
+        stream.write(" ".join([str(a) for a in args]))
+        if not ('omit_newline' in kargs and kargs['omit_newline']):
+            stream.write("\n")
+        stream.flush()
+    def err(self, *args, **kargs):
+        self.__write(sys.stderr, *args, **kargs)
+    def msg(self, *args, **kargs):
+        self.__write(sys.stdout, *args, **kargs)
+    def out(self, *args, **kargs):
+        if self.f:
+            self.__write(self.f, *args, **kargs)
+        else:
+            self.__write(sys.stdout, *args, **kargs)
+if __name__ == "__main__":
+    a = App()
+    a.launch()
diff --git a/data_analysis/pm_data_analyzer.py b/data_analysis/pm_data_analyzer.py
new file mode 100755
index 0000000..e730383
--- /dev/null
+++ b/data_analysis/pm_data_analyzer.py
@@ -0,0 +1,386 @@
+#!/usr/bin/env python
+"""
+Usage: %prog [options] filename
+FILENAME is where the .raw overhead data are. Filename
+and the path to it also gives the base path and filename for the
+files that contains already processed overheads and the directory
+where to save the output data.
+FILENAME should be something like: "res_plugin=GSN-EDF_wss=WSS_tss=TSS.raw".
+Also, take a look at the "compact_results" script
+"""
+import defapp
+from optparse import make_option as o
+from os.path import splitext, basename, dirname
+import sys
+import numpy as np
+# preemption and migration C data exchanger
+import pm
+import pmserialize as pms
+import statanalyzer as pmstat
+options = [
+    o("-l", "--cores-per-l2", dest="coresL2", action="store", type="int",
+        help="Number of cores per L2 cache; if all cores share the same \
+L2 (i.e., no L3) set this to 0 (default = 2)"),
+    o("-p", "--phys-cpu", dest="pcpu", action="store", type="int",
+        help="Number of physical sockets on this machine (default 4)"),
+    o(None, "--limit-preempt", dest="npreempt", action="store", type="int",
+        help="Limit the number of preemption sample used in statistics \
+to NPREEMPT"),
+    o(None, "--limit-l2", dest="nl2cache", action="store", type="int",
+        help="Limit the number of l2cache sample used in statistics \
+to NL2CACHE"),
+    o(None, "--limit-onchip", dest="nonchip", action="store", type="int",
+        help="Limit the number of onchip sample used in statistics \
+to NONCHIP"),
+    o(None, "--limit-offchip", dest="noffchip", action="store", type="int",
+        help="Limit the number of offchip sample used in statistics \
+to NOFFCHIP"),
+    o("-a", "--autocap", dest="autocap", action="store_true",
+        help="Autodetect the minimum number of samples to use for statistics"),
+    o("-r", "--read-valid-data", dest="read_valid", action="store_true",
+        help="read already processed data from file"),
+    o("-v", "--verbose", dest="verbose", action="store_true"),
+    o("-d", "--debug", dest="debug", action="store_true"),
+    o("-u", "--microsec", dest="cpufreq", action="store", type="float",
+        help="Print overhead results in microseconds; \
+CPUFREQ is the cpu freq in MHz (cat /proc/cpuinfo)"),
+    ]
+# this cores per chip parameter implies a different topology model not fully
+# supported atm
+#    o("-c", "--cores-per-chip", dest="coresC",
+#                      action="store", type="int", default="6",
+#            help="number of cores per chip (default = 6)")
+defaults = {
+        'coresL2'   : 2,
+        'pcpu'      : 4,
+        'npreempt'  : 0,
+        'nl2cache'  : 0,
+        'nonchip'   : 0,
+        'noffchip'  : 0,
+        'read_valid': False,
+        'verbose'   : False,
+        'debug'     : False,
+        'cpufreq'   : 0,
+        }
+# from Bjoern's simple-gnuplot-wrapper
+def decode(name):
+    params = {}
+    parts = name.split('_')
+    for p in parts:
+        kv = p.split('=')
+        k = kv[0]
+        v = kv[1] if len(kv) > 1 else None
+        params[k] = v
+    return params
+class Overhead:
+    def __init__(self):
+        self.overheads = []
+        self.index = 0
+    def __iter__(self):
+        return self
+    def next(self):
+        if self.index == len(self.overheads):
+            self.index = 0
+            raise StopIteration
+        self.index += 1
+        return self.overheads[self.index - 1]
+    def add(self, ovd_vector, label):
+        self.overheads.append([ovd_vector, label])
+class Analyzer(defapp.App):
+    def __init__(self):
+        defapp.App.__init__(self, options, defaults, no_std_opts=True)
+        self.last_conf = {}
+        self.valid_ovds_list = {}
+        self.min_sample_tss = {}
+        self.lsamples = {}
+        if self.options.npreempt:
+            self.lsamples['preemption'] = self.options.npreempt
+        if self.options.nl2cache:
+            self.lsamples['l2cache'] = self.options.nl2cache
+        if self.options.nonchip:
+            self.lsamples['onchip'] = self.options.nonchip
+        if self.options.noffchip:
+            self.lsamples['offchip'] = self.options.noffchip
+    # read previously saved overhead data
+    def read_valid_data(self, filename):
+        valid_ovds = Overhead()
+        nf = filename + '_preemption.vbin'
+        if self.options.debug:
+            print "Reading '%s'" % nf
+        valid_ovds.add(pms.unpickl_it(nf), 'preemtion')
+        nf = filename + '_onchip.vbin'
+        if self.options.debug:
+            print "Reading '%s'" % nf
+        valid_ovds.add(pms.unpickl_it(nf), 'onchip')
+        nf = filename + '_offchip.vbin'
+        if self.options.debug:
+            print "Reading '%s'" % nf
+        valid_ovds.add(pms.unpickl_it(nf), 'offchip')
+        if self.options.coresL2 != 0:
+            nf = filename + '_l2cache.vbin'
+            if self.options.debug:
+                print "Reading '%s'" % nf
+            valid_ovds.add(pms.unpickl_it(nf), 'l2cache')
+        return valid_ovds
+    def process_raw_data(self, datafile, conf):
+        coresL2 = self.options.coresL2
+        pcpu = self.options.pcpu
+        # initialize pmmodule
+        pm.load(datafile, coresL2, pcpu, int(conf['wss']), int(conf['tss']))
+        # raw overheads
+        ovds = Overhead()
+        # valid overheads
+        valid_ovds = Overhead()
+        # get overheads
+        ovds.add(pm.getPreemption(), 'preemption')
+        ovds.add(pm.getOnChipMigration(), 'onchip')
+        ovds.add(pm.getOffChipMigration(), 'offchip')
+        if coresL2 != 0:
+            ovds.add(pm.getL2Migration(), 'l2cache')
+        if self.options.debug:
+            for i in ovds:
+                print i[0], i[1]
+        # instance the statistical analizer to remove outliers
+        sd = pmstat.InterQuartileRange(25,75, True)
+        for i in ovds:
+            if len(i[0]) != 0:
+                # just add overheads, "forget" preemption length
+                # FIXME: is really needed?
+                # valid_ovds.add(sd.remOutliers(i[0][:,0]), i[1])
+                valid_ovds.add(i[0][:,0], i[1])
+            else:
+                print "Warning: no valid data collected..."
+                valid_ovds.add([], i[1])
+        if self.options.debug:
+            # check outliers removals
+            print "Before outliers removal"
+            for i in ovds:
+                print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
+            print "After outliers removal"
+            for i in valid_ovds:
+                print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
+        count_sample = {}
+        if self.options.autocap or self.options.verbose:
+            for i in valid_ovds:
+                if self.options.verbose:
+                    print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
+                count_sample[i[1]] = len(i[0])
+            if self.options.autocap:
+                if self.min_sample_tss == {}:
+                    self.min_sample_tss = {
+                            'preemption':count_sample['preemption'],
+                            'onchip':count_sample['onchip'],
+                            'offchip':count_sample['offchip'],
+                            'l2cache':count_sample['l2cache']}
+                else:
+                    # it is normally sufficient to check num samples for
+                    # preemptions to get tss with min num samples in wss
+                    if self.min_sample_tss['preemption'] > \
+                            count_sample['preemption']:
+                        self.min_sample_tss = {
+                            'preemption':count_sample['preemption'],
+                            'onchip':count_sample['onchip'],
+                            'offchip':count_sample['offchip'],
+                            'l2cache':count_sample['l2cache']}
+        # serialize valid overheads
+        for i in valid_ovds:
+            dname = dirname(datafile)
+            fname, ext = splitext(basename(datafile))
+            curf = dname + '/' + fname + '_' + i[1] + '.vbin'
+            pms.pickl_it(i[0], curf)
+        del ovds
+        return valid_ovds
+    # The output is one csv WSS file per ovhd type, "tss, max_ovd, avg_ovd"
+    # Filename output format:
+    # pm_wss=2048_ovd=preemption.csv
+    # ovd: preemption, onchip, offchip, l2cache
+    def analyze_data(self, dname, conf):
+        csvbname = dname + '/pm_wss=' + conf['wss']
+        for tss in sorted(self.valid_ovds_list.keys(), key=int):
+            vohs = self.valid_ovds_list[tss]
+            if self.options.verbose:
+                print "\n(WSS = %(0)s, TSS = %(1)s)" % {"0":conf['wss'], \
+                    "1":tss}
+            for i in vohs:
+                csvfname = csvbname + '_ovd=' + i[1] + '.csv'
+                if self.options.debug:
+                    print "Saving csv '%s'" % csvfname
+                csvf = open(csvfname, 'a')
+                csvlist = [tss]
+                # data (valid_ovds already have only overheads, not length)
+                # vector = i[0][:,0]
+                #
+                # Check if we need to limit the number of samples
+                # that we use in the computation of max and avg.
+                # Statistically, this is more sound than other choices
+                if i[1] in self.lsamples:
+                    if self.lsamples[i[1]] > 0:
+                        nsamples = min(self.lsamples[i[1]], len(i[0]))
+                        if self.options.verbose:
+                            print "Computing %(0)s stat only on %(1)d samples" % \
+                                {"0":i[1],
+                                "1":nsamples}
+                        vector = i[0][0:nsamples]
+                elif self.options.autocap: # we can also autocompute the cap
+                    nsamples = self.min_sample_tss[i[1]]
+                    if self.options.verbose:
+                        print "Computing %(0)s stat only on %(1)d samples" % \
+                            {"0":i[1], "1":nsamples}
+                    vector = i[0][0:nsamples]
+                else:
+                    vector = i[0]
+                if vector != []:
+                    # FIXME if after disabling prefetching there are
+                    # still negative value, they shouldn't be considered
+                    max_vec = np.max(vector)
+                    avg_vec = np.average(vector)
+                    std_vec = np.std(vector)
+                else:
+                    max_vec = 0
+                    avg_vec = 0
+                    std_vec = 0
+                if self.options.cpufreq == 0:
+                    max_vec_str = "%5.5f" % max_vec
+                    avg_vec_str = "%5.5f" % avg_vec
+                    std_vec_up = "%5.5f" % (avg_vec + std_vec)
+                    std_vec_down = "%5.5f" % (avg_vec - std_vec)
+                else:
+                    max_vec_str = "%5.5f" % (max_vec / self.options.cpufreq)
+                    avg_vec_str = "%5.5f" % (avg_vec / self.options.cpufreq)
+                    std_vec_up = "%5.5f" % ((avg_vec + std_vec) / self.options.cpufreq)
+                    std_vec_down = "%5.5f" % ((avg_vec - std_vec) / self.options.cpufreq)
+                csvlist.append(max_vec_str)
+                csvlist.append(avg_vec_str)
+                csvlist.append(std_vec_down)
+                csvlist.append(std_vec_up)
+                pms.csv_it(csvf, csvlist)
+                csvf.close()
+                if self.options.verbose:
+                    if self.options.cpufreq == 0:
+                        print i[1] + " overheads (ticks)"
+                        print "Max = %5.5f" % max_vec
+                        print "Avg = %5.5f" % avg_vec
+                        print "Std = %5.5f" % std_vec
+                    else:
+                        print i[1] + " overheads (us)"
+                        print "Max = %5.5f" % (max_vec / self.options.cpufreq)
+                        print "Avg = %5.5f" % (avg_vec / self.options.cpufreq)
+                        print "Std = %5.5f" % (std_vec / self.options.cpufreq)
+                del vector
+            del vohs
+    def process_datafile(self, datafile, dname, fname, conf):
+        if self.options.verbose:
+            print "\nProcessing: " + fname
+        if self.options.read_valid:
+            # .vbin output should be in same directory as input filename
+            readf = dname + '/' + fname
+            self.valid_ovds_list[conf['tss']] = self.read_valid_data(readf)
+        else:
+            self.valid_ovds_list[conf['tss']] = \
+                    self.process_raw_data(datafile, conf)
+    def default(self, _):
+        # TODO: to support this combination we should store also the min
+        # number of samples in the .vbin file
+        if self.options.read_valid and self.options.autocap:
+            self.err("Read stored values + autocap not currently supported")
+            return None
+        for datafile in self.args:
+            dname = dirname(datafile)
+            bname = basename(datafile)
+            fname, ext = splitext(bname)
+            if ext != '.raw':
+                self.err("Warning: '%s' doesn't look like a .raw file"
+                        % bname)
+            conf = decode(fname)
+            if datafile == self.args[-1]:
+                # manage single file / last of list
+                if ('wss' in self.last_conf) and (conf['wss'] != \
+                        self.last_conf['wss']):
+                    # we have already analyzed at least one file,
+                    # this is the first file of a new set of WSS,
+                    # and it is also the last file of the list
+                    self.analyze_data(dname, self.last_conf)
+                    # reinit dictionaries
+                    del self.valid_ovds_list
+                    del self.min_sample_tss
+                    self.valid_ovds_list = {}
+                    self.min_sample_tss = {}
+                    # analyze this file
+                    self.process_datafile(datafile, dname, fname, conf)
+                    self.analyze_data(dname, conf)
+                    del self.valid_ovds_list
+                    del self.min_sample_tss
+                else:
+                    # just the end of a list of wss files or 1 single file
+                    self.process_datafile(datafile, dname, fname, conf)
+                    if self.args[0] == self.args[-1]:
+                        self.analyze_data(dname, conf)
+                    else:
+                        self.analyze_data(dname, self.last_conf)
+                    del self.valid_ovds_list
+            else:
+                # assume WSS are anayzed in order (all 1024s, all 256s, etc.)
+                if ('wss' in self.last_conf) and (conf['wss'] != \
+                        self.last_conf['wss']):
+                    # we have already analyzed at least one file,
+                    # this is the first file of a new set of WSS,
+                    # analyze tss for previous wss
+                    self.analyze_data(dname, self.last_conf)
+                    # reinit dictionary
+                    del self.valid_ovds_list
+                    del self.min_sample_tss
+                    self.valid_ovds_list = {}
+                    self.min_sample_tss = {}
+                # add tss to valid ovds list for this wss
+                self.process_datafile(datafile, dname, fname, conf)
+                # save previously analyzed configuration
+                self.last_conf = conf
+if __name__ == "__main__":
+    Analyzer().launch()
author	Andrea Bastoni <bastoni@cs.unc.edu>	2010-04-12 23:26:13 -0400
committer	Andrea Bastoni <bastoni@cs.unc.edu>	2010-04-12 23:26:13 -0400
commit	4538f7e4fcaa3bf199377b9b735562e53cd1c7d3 (patch)
tree	7f7129e661e4826352301f1b2d3bbd8a945c0d9d
parent	3b21f9d15822453117d1e908ab97cacd8f7f39be (diff)

diff --git a/SConstruct b/SConstruct index 85deeec..3241f68 100644 --- a/SConstruct +++ b/SConstruct
@@ -52,7 +52,9 @@ INCLUDE_DIRS = [
52	# Linux kernel headers	52	# Linux kernel headers
53	'${LITMUS_KERNEL}/include/',	53	'${LITMUS_KERNEL}/include/',
54	# Linux architecture-specific kernel headers	54	# Linux architecture-specific kernel headers
55	'${LITMUS_KERNEL}/arch/${INCLUDE_ARCH}/include'	55	'${LITMUS_KERNEL}/arch/${INCLUDE_ARCH}/include',
		56	# Python headers
		57	'${PYTHON_HEADERS}'
56	]	58	]
57		59
58	# #####################################################################	60	# #####################################################################
@@ -69,6 +71,10 @@ vars.AddVariables(
69	'Where to find the LITMUS^RT kernel.',	71	'Where to find the LITMUS^RT kernel.',
70	'../litmus2010'),	72	'../litmus2010'),
71		73
		74	PathVariable('PYTHON_HEADERS',
		75	'Where to find Python headers.',
		76	'/usr/include/python2.5'),
		77
72	EnumVariable('ARCH',	78	EnumVariable('ARCH',
73	'Target architecture.',	79	'Target architecture.',
74	arch,	80	arch,
@@ -120,6 +126,7 @@ def dump_config(env):
120	dump('CPPPATH')	126	dump('CPPPATH')
121	dump('CCFLAGS')	127	dump('CCFLAGS')
122	dump('LINKFLAGS')	128	dump('LINKFLAGS')
		129	dump('PYTHON_HEADERS')
123		130
124	if GetOption('dump'):	131	if GetOption('dump'):
125	print "\n"	132	print "\n"
@@ -161,6 +168,10 @@ if not (env.GetOption('clean') or env.GetOption('help')):
161	abort("Cannot find liblitmus headers in '$LIBLITMUS'",	168	abort("Cannot find liblitmus headers in '$LIBLITMUS'",
162	"Please ensure that LIBLITMUS in .config is a valid path'")	169	"Please ensure that LIBLITMUS in .config is a valid path'")
163		170
		171	conf.CheckCHeader('Python.h') or \
		172	abort("Cannot find Python headers in '$PYTHON_HEADERS'",
		173	"Please ensure that PYTHON_HEADERS in .config is set to a valid path.")
		174
164	env = conf.Finish()	175	env = conf.Finish()
165		176
166	# #####################################################################	177	# #####################################################################
@@ -192,6 +203,10 @@ pmpy.Replace(LINKFLAGS = '')
192		203
193	pmrt.Program('pm_task', ['bin/pm_task.c', 'bin/pm_common.c'])	204	pmrt.Program('pm_task', ['bin/pm_task.c', 'bin/pm_common.c'])
194	pmrt.Program('pm_polluter', ['bin/pm_polluter.c', 'bin/pm_common.c'])	205	pmrt.Program('pm_polluter', ['bin/pm_polluter.c', 'bin/pm_common.c'])
		206
		207	pmpy.SharedLibrary('pm', ['c2python/pmmodule.c', 'bin/pm_common.c'])
		208
		209	Command("pm.so", "libpm.so", Move("$TARGET", "$SOURCE"))
195	# #####################################################################	210	# #####################################################################
196	# Additional Help	211	# Additional Help
197		212


diff --git a/data_analysis/defapp.py b/data_analysis/defapp.py new file mode 100644 index 0000000..c03f118 --- /dev/null +++ b/data_analysis/defapp.py
@@ -0,0 +1,95 @@
		1	#!/usr/bin/env python
		2
		3	"""
		4	A basic Python application shell, for copy&paste development.
		5	"""
		6
		7	import optparse
		8	import cmd
		9	import sys
		10
		11	o = optparse.make_option
		12
		13	class App(cmd.Cmd):
		14	def __init__(self, opts=None, defaults=None, no_std_opts=False,
		15	stdout=sys.stdout, stderr=sys.stderr, default_cmd=None):
		16	cmd.Cmd.__init__(self, None, stdout, stderr)
		17	self.default_cmd = default_cmd
		18	if not opts:
		19	opts = []
		20	if not defaults:
		21	defaults = {}
		22	defaults["_App_file"] = None
		23	self.f = None
		24	if not no_std_opts:
		25	opts += [ o('-o', '--output', action='store', dest='_App_file',
		26	help='store output in FILE', metavar='FILE')]
		27	(self.options, self.args) = self.__parse(opts, defaults)
		28
		29	def __parse(self, opts, defaults):
		30	parser = optparse.OptionParser(option_list=opts)
		31	parser.set_defaults(**defaults)
		32	return parser.parse_args()
		33
		34	def launch(self, args=None):
		35	if args:
		36	self.args = args
		37	try:
		38	if self.options._App_file:
		39	self.f = open(self.options._App_file, 'w')
		40	self.onecmd(' '.join(self.args))
		41	except IOError, msg:
		42	self.err("I/O Error:", msg)
		43	except KeyboardInterrupt:
		44	self.err("Interrupted.")
		45	if self.f:
		46	self.f.close()
		47
		48	def outfile(self):
		49	if self.f:
		50	return self.f
		51	else:
		52	return sys.stdout
		53
		54	def emptyline(self):
		55	if self.default_cmd:
		56	self.onecmd(self.default_cmd)
		57
		58	def default(self, line):
		59	self.err("%s: Command not recognized." % line)
		60
		61	def do_dump_config(self, key):
		62	"""Display the configuration as parsed on the console."""
		63	def is_private(k): return k[0] == '_'
		64	def show(k): print "%20s : %10s" % (k, str(self.options.__dict__[k]))
		65	if not key:
		66	for x in sorted(self.options.__dict__.keys()):
		67	if not is_private(x):
		68	show(x)
		69	elif not is_private(key) and key in self.options.__dict__:
		70	show(key)
		71	else:
		72	self.err("%s: unknown option." % key)
		73
		74	@staticmethod
		75	def __write(stream, args, *kargs):
		76	stream.write(" ".join([str(a) for a in args]))
		77	if not ('omit_newline' in kargs and kargs['omit_newline']):
		78	stream.write("\n")
		79	stream.flush()
		80
		81	def err(self, args, *kargs):
		82	self.__write(sys.stderr, args, *kargs)
		83
		84	def msg(self, args, *kargs):
		85	self.__write(sys.stdout, args, *kargs)
		86
		87	def out(self, args, *kargs):
		88	if self.f:
		89	self.__write(self.f, args, *kargs)
		90	else:
		91	self.__write(sys.stdout, args, *kargs)
		92
		93	if __name__ == "__main__":
		94	a = App()
		95	a.launch()


diff --git a/data_analysis/pm_data_analyzer.py b/data_analysis/pm_data_analyzer.py new file mode 100755 index 0000000..e730383 --- /dev/null +++ b/data_analysis/pm_data_analyzer.py
@@ -0,0 +1,386 @@
		1	#!/usr/bin/env python
		2	"""
		3	Usage: %prog [options] filename
		4
		5	FILENAME is where the .raw overhead data are. Filename
		6	and the path to it also gives the base path and filename for the
		7	files that contains already processed overheads and the directory
		8	where to save the output data.
		9	FILENAME should be something like: "res_plugin=GSN-EDF_wss=WSS_tss=TSS.raw".
		10	Also, take a look at the "compact_results" script
		11	"""
		12
		13	import defapp
		14
		15	from optparse import make_option as o
		16	from os.path import splitext, basename, dirname
		17
		18	import sys
		19	import numpy as np
		20
		21	# preemption and migration C data exchanger
		22	import pm
		23	import pmserialize as pms
		24	import statanalyzer as pmstat
		25
		26	options = [
		27	o("-l", "--cores-per-l2", dest="coresL2", action="store", type="int",
		28	help="Number of cores per L2 cache; if all cores share the same \
		29	L2 (i.e., no L3) set this to 0 (default = 2)"),
		30	o("-p", "--phys-cpu", dest="pcpu", action="store", type="int",
		31	help="Number of physical sockets on this machine (default 4)"),
		32	o(None, "--limit-preempt", dest="npreempt", action="store", type="int",
		33	help="Limit the number of preemption sample used in statistics \
		34	to NPREEMPT"),
		35	o(None, "--limit-l2", dest="nl2cache", action="store", type="int",
		36	help="Limit the number of l2cache sample used in statistics \
		37	to NL2CACHE"),
		38	o(None, "--limit-onchip", dest="nonchip", action="store", type="int",
		39	help="Limit the number of onchip sample used in statistics \
		40	to NONCHIP"),
		41	o(None, "--limit-offchip", dest="noffchip", action="store", type="int",
		42	help="Limit the number of offchip sample used in statistics \
		43	to NOFFCHIP"),
		44	o("-a", "--autocap", dest="autocap", action="store_true",
		45	help="Autodetect the minimum number of samples to use for statistics"),
		46	o("-r", "--read-valid-data", dest="read_valid", action="store_true",
		47	help="read already processed data from file"),
		48	o("-v", "--verbose", dest="verbose", action="store_true"),
		49	o("-d", "--debug", dest="debug", action="store_true"),
		50	o("-u", "--microsec", dest="cpufreq", action="store", type="float",
		51	help="Print overhead results in microseconds; \
		52	CPUFREQ is the cpu freq in MHz (cat /proc/cpuinfo)"),
		53	]
		54	# this cores per chip parameter implies a different topology model not fully
		55	# supported atm
		56	# o("-c", "--cores-per-chip", dest="coresC",
		57	# action="store", type="int", default="6",
		58	# help="number of cores per chip (default = 6)")
		59
		60	defaults = {
		61	'coresL2' : 2,
		62	'pcpu' : 4,
		63	'npreempt' : 0,
		64	'nl2cache' : 0,
		65	'nonchip' : 0,
		66	'noffchip' : 0,
		67	'read_valid': False,
		68	'verbose' : False,
		69	'debug' : False,
		70	'cpufreq' : 0,
		71	}
		72
		73	# from Bjoern's simple-gnuplot-wrapper
		74	def decode(name):
		75	params = {}
		76	parts = name.split('_')
		77	for p in parts:
		78	kv = p.split('=')
		79	k = kv[0]
		80	v = kv[1] if len(kv) > 1 else None
		81	params[k] = v
		82	return params
		83
		84	class Overhead:
		85	def __init__(self):
		86	self.overheads = []
		87	self.index = 0
		88
		89	def __iter__(self):
		90	return self
		91
		92	def next(self):
		93	if self.index == len(self.overheads):
		94	self.index = 0
		95	raise StopIteration
		96	self.index += 1
		97	return self.overheads[self.index - 1]
		98
		99	def add(self, ovd_vector, label):
		100	self.overheads.append([ovd_vector, label])
		101
		102	class Analyzer(defapp.App):
		103	def __init__(self):
		104	defapp.App.__init__(self, options, defaults, no_std_opts=True)
		105	self.last_conf = {}
		106	self.valid_ovds_list = {}
		107	self.min_sample_tss = {}
		108	self.lsamples = {}
		109	if self.options.npreempt:
		110	self.lsamples['preemption'] = self.options.npreempt
		111	if self.options.nl2cache:
		112	self.lsamples['l2cache'] = self.options.nl2cache
		113	if self.options.nonchip:
		114	self.lsamples['onchip'] = self.options.nonchip
		115	if self.options.noffchip:
		116	self.lsamples['offchip'] = self.options.noffchip
		117
		118	# read previously saved overhead data
		119	def read_valid_data(self, filename):
		120	valid_ovds = Overhead()
		121	nf = filename + '_preemption.vbin'
		122	if self.options.debug:
		123	print "Reading '%s'" % nf
		124	valid_ovds.add(pms.unpickl_it(nf), 'preemtion')
		125
		126	nf = filename + '_onchip.vbin'
		127	if self.options.debug:
		128	print "Reading '%s'" % nf
		129	valid_ovds.add(pms.unpickl_it(nf), 'onchip')
		130
		131	nf = filename + '_offchip.vbin'
		132	if self.options.debug:
		133	print "Reading '%s'" % nf
		134	valid_ovds.add(pms.unpickl_it(nf), 'offchip')
		135
		136	if self.options.coresL2 != 0:
		137	nf = filename + '_l2cache.vbin'
		138	if self.options.debug:
		139	print "Reading '%s'" % nf
		140	valid_ovds.add(pms.unpickl_it(nf), 'l2cache')
		141	return valid_ovds
		142
		143	def process_raw_data(self, datafile, conf):
		144	coresL2 = self.options.coresL2
		145	pcpu = self.options.pcpu
		146	# initialize pmmodule
		147	pm.load(datafile, coresL2, pcpu, int(conf['wss']), int(conf['tss']))
		148	# raw overheads
		149	ovds = Overhead()
		150	# valid overheads
		151	valid_ovds = Overhead()
		152	# get overheads
		153	ovds.add(pm.getPreemption(), 'preemption')
		154	ovds.add(pm.getOnChipMigration(), 'onchip')
		155	ovds.add(pm.getOffChipMigration(), 'offchip')
		156	if coresL2 != 0:
		157	ovds.add(pm.getL2Migration(), 'l2cache')
		158
		159	if self.options.debug:
		160	for i in ovds:
		161	print i[0], i[1]
		162
		163	# instance the statistical analizer to remove outliers
		164	sd = pmstat.InterQuartileRange(25,75, True)
		165
		166	for i in ovds:
		167	if len(i[0]) != 0:
		168	# just add overheads, "forget" preemption length
		169	# FIXME: is really needed?
		170	# valid_ovds.add(sd.remOutliers(i[0][:,0]), i[1])
		171	valid_ovds.add(i[0][:,0], i[1])
		172	else:
		173	print "Warning: no valid data collected..."
		174	valid_ovds.add([], i[1])
		175
		176	if self.options.debug:
		177	# check outliers removals
		178	print "Before outliers removal"
		179	for i in ovds:
		180	print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
		181	print "After outliers removal"
		182	for i in valid_ovds:
		183	print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
		184
		185	count_sample = {}
		186	if self.options.autocap or self.options.verbose:
		187	for i in valid_ovds:
		188	if self.options.verbose:
		189	print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
		190	count_sample[i[1]] = len(i[0])
		191
		192	if self.options.autocap:
		193	if self.min_sample_tss == {}:
		194	self.min_sample_tss = {
		195	'preemption':count_sample['preemption'],
		196	'onchip':count_sample['onchip'],
		197	'offchip':count_sample['offchip'],
		198	'l2cache':count_sample['l2cache']}
		199	else:
		200	# it is normally sufficient to check num samples for
		201	# preemptions to get tss with min num samples in wss
		202	if self.min_sample_tss['preemption'] > \
		203	count_sample['preemption']:
		204	self.min_sample_tss = {
		205	'preemption':count_sample['preemption'],
		206	'onchip':count_sample['onchip'],
		207	'offchip':count_sample['offchip'],
		208	'l2cache':count_sample['l2cache']}
		209
		210	# serialize valid overheads
		211	for i in valid_ovds:
		212	dname = dirname(datafile)
		213	fname, ext = splitext(basename(datafile))
		214
		215	curf = dname + '/' + fname + '_' + i[1] + '.vbin'
		216	pms.pickl_it(i[0], curf)
		217
		218	del ovds
		219	return valid_ovds
		220
		221	# The output is one csv WSS file per ovhd type, "tss, max_ovd, avg_ovd"
		222	# Filename output format:
		223	# pm_wss=2048_ovd=preemption.csv
		224	# ovd: preemption, onchip, offchip, l2cache
		225
		226	def analyze_data(self, dname, conf):
		227	csvbname = dname + '/pm_wss=' + conf['wss']
		228
		229	for tss in sorted(self.valid_ovds_list.keys(), key=int):
		230	vohs = self.valid_ovds_list[tss]
		231
		232	if self.options.verbose:
		233	print "\n(WSS = %(0)s, TSS = %(1)s)" % {"0":conf['wss'], \
		234	"1":tss}
		235
		236	for i in vohs:
		237	csvfname = csvbname + '_ovd=' + i[1] + '.csv'
		238	if self.options.debug:
		239	print "Saving csv '%s'" % csvfname
		240
		241	csvf = open(csvfname, 'a')
		242	csvlist = [tss]
		243
		244	# data (valid_ovds already have only overheads, not length)
		245	# vector = i[0][:,0]
		246	#
		247	# Check if we need to limit the number of samples
		248	# that we use in the computation of max and avg.
		249	# Statistically, this is more sound than other choices
		250	if i[1] in self.lsamples:
		251	if self.lsamples[i[1]] > 0:
		252	nsamples = min(self.lsamples[i[1]], len(i[0]))
		253	if self.options.verbose:
		254	print "Computing %(0)s stat only on %(1)d samples" % \
		255	{"0":i[1],
		256	"1":nsamples}
		257	vector = i[0][0:nsamples]
		258	elif self.options.autocap: # we can also autocompute the cap
		259	nsamples = self.min_sample_tss[i[1]]
		260	if self.options.verbose:
		261	print "Computing %(0)s stat only on %(1)d samples" % \
		262	{"0":i[1], "1":nsamples}
		263	vector = i[0][0:nsamples]
		264	else:
		265	vector = i[0]
		266
		267	if vector != []:
		268	# FIXME if after disabling prefetching there are
		269	# still negative value, they shouldn't be considered
		270	max_vec = np.max(vector)
		271	avg_vec = np.average(vector)
		272	std_vec = np.std(vector)
		273	else:
		274	max_vec = 0
		275	avg_vec = 0
		276	std_vec = 0
		277
		278	if self.options.cpufreq == 0:
		279	max_vec_str = "%5.5f" % max_vec
		280	avg_vec_str = "%5.5f" % avg_vec
		281	std_vec_up = "%5.5f" % (avg_vec + std_vec)
		282	std_vec_down = "%5.5f" % (avg_vec - std_vec)
		283
		284	else:
		285	max_vec_str = "%5.5f" % (max_vec / self.options.cpufreq)
		286	avg_vec_str = "%5.5f" % (avg_vec / self.options.cpufreq)
		287	std_vec_up = "%5.5f" % ((avg_vec + std_vec) / self.options.cpufreq)
		288	std_vec_down = "%5.5f" % ((avg_vec - std_vec) / self.options.cpufreq)
		289
		290	csvlist.append(max_vec_str)
		291	csvlist.append(avg_vec_str)
		292	csvlist.append(std_vec_down)
		293	csvlist.append(std_vec_up)
		294	pms.csv_it(csvf, csvlist)
		295	csvf.close()
		296
		297	if self.options.verbose:
		298	if self.options.cpufreq == 0:
		299	print i[1] + " overheads (ticks)"
		300	print "Max = %5.5f" % max_vec
		301	print "Avg = %5.5f" % avg_vec
		302	print "Std = %5.5f" % std_vec
		303	else:
		304	print i[1] + " overheads (us)"
		305	print "Max = %5.5f" % (max_vec / self.options.cpufreq)
		306	print "Avg = %5.5f" % (avg_vec / self.options.cpufreq)
		307	print "Std = %5.5f" % (std_vec / self.options.cpufreq)
		308
		309	del vector
		310	del vohs
		311
		312	def process_datafile(self, datafile, dname, fname, conf):
		313	if self.options.verbose:
		314	print "\nProcessing: " + fname
		315	if self.options.read_valid:
		316	# .vbin output should be in same directory as input filename
		317	readf = dname + '/' + fname
		318	self.valid_ovds_list[conf['tss']] = self.read_valid_data(readf)
		319	else:
		320	self.valid_ovds_list[conf['tss']] = \
		321	self.process_raw_data(datafile, conf)
		322
		323	def default(self, _):
		324	# TODO: to support this combination we should store also the min
		325	# number of samples in the .vbin file
		326	if self.options.read_valid and self.options.autocap:
		327	self.err("Read stored values + autocap not currently supported")
		328	return None
		329
		330	for datafile in self.args:
		331	dname = dirname(datafile)
		332	bname = basename(datafile)
		333	fname, ext = splitext(bname)
		334	if ext != '.raw':
		335	self.err("Warning: '%s' doesn't look like a .raw file"
		336	% bname)
		337
		338	conf = decode(fname)
		339
		340	if datafile == self.args[-1]:
		341	# manage single file / last of list
		342	if ('wss' in self.last_conf) and (conf['wss'] != \
		343	self.last_conf['wss']):
		344	# we have already analyzed at least one file,
		345	# this is the first file of a new set of WSS,
		346	# and it is also the last file of the list
		347	self.analyze_data(dname, self.last_conf)
		348	# reinit dictionaries
		349	del self.valid_ovds_list
		350	del self.min_sample_tss
		351	self.valid_ovds_list = {}
		352	self.min_sample_tss = {}
		353	# analyze this file
		354	self.process_datafile(datafile, dname, fname, conf)
		355	self.analyze_data(dname, conf)
		356	del self.valid_ovds_list
		357	del self.min_sample_tss
		358	else:
		359	# just the end of a list of wss files or 1 single file
		360	self.process_datafile(datafile, dname, fname, conf)
		361	if self.args[0] == self.args[-1]:
		362	self.analyze_data(dname, conf)
		363	else:
		364	self.analyze_data(dname, self.last_conf)
		365	del self.valid_ovds_list
		366	else:
		367	# assume WSS are anayzed in order (all 1024s, all 256s, etc.)
		368	if ('wss' in self.last_conf) and (conf['wss'] != \
		369	self.last_conf['wss']):
		370	# we have already analyzed at least one file,
		371	# this is the first file of a new set of WSS,
		372	# analyze tss for previous wss
		373	self.analyze_data(dname, self.last_conf)
		374	# reinit dictionary
		375	del self.valid_ovds_list
		376	del self.min_sample_tss
		377	self.valid_ovds_list = {}
		378	self.min_sample_tss = {}
		379
		380	# add tss to valid ovds list for this wss
		381	self.process_datafile(datafile, dname, fname, conf)
		382	# save previously analyzed configuration
		383	self.last_conf = conf
		384
		385	if __name__ == "__main__":
		386	Analyzer().launch()