From baee00899b87c09e2cfb08dc59cdf5862c7f9255 Mon Sep 17 00:00:00 2001
From: Andrea Bastoni <bastoni@cs.unc.edu>
Date: Fri, 26 Mar 2010 13:55:47 -0400
Subject: Fix autocomputation of cap on data samples

---
 pm_data_analysis/pm_data_analyzer.py | 246 +++++++++++++++++++++--------------
 1 file changed, 151 insertions(+), 95 deletions(-)

(limited to 'pm_data_analysis/pm_data_analyzer.py')

diff --git a/pm_data_analysis/pm_data_analyzer.py b/pm_data_analysis/pm_data_analyzer.py
index 63baa27..cc6e0a5 100755
--- a/pm_data_analysis/pm_data_analyzer.py
+++ b/pm_data_analysis/pm_data_analyzer.py
@@ -102,7 +102,8 @@ class Overhead:
 class Analyzer(defapp.App):
     def __init__(self):
         defapp.App.__init__(self, options, defaults, no_std_opts=True)
-        self.min_sample_wss = {}
+        self.last_conf = {}
+        self.valid_ovds_list = {}
         self.lsamples = {}
         if self.options.npreempt:
             self.lsamples['preemption'] = self.options.npreempt
@@ -115,33 +116,38 @@ class Analyzer(defapp.App):
 
     # read previously saved overhead data
     def read_valid_data(self, filename):
+        valid_ovds = Overhead()
         nf = filename + '_preemption.vbin'
         if self.options.debug:
             print "Reading '%s'" % nf
-        self.valid_ovds.add(pms.unpickl_it(nf), 'preemtion')
+        valid_ovds.add(pms.unpickl_it(nf), 'preemtion')
 
         nf = filename + '_onchip.vbin'
         if self.options.debug:
             print "Reading '%s'" % nf
-        self.valid_ovds.add(pms.unpickl_it(nf), 'onchip')
+        valid_ovds.add(pms.unpickl_it(nf), 'onchip')
 
         nf = filename + '_offchip.vbin'
-        if debug:
+        if self.options.debug:
             print "Reading '%s'" % nf
-        self.valid_ovds.add(pms.unpickl_it(nf), 'offchip')
+        valid_ovds.add(pms.unpickl_it(nf), 'offchip')
 
         if self.options.coresL2 != 0:
             nf = filename + '_l2cache.vbin'
             if self.options.debug:
                 print "Reading '%s'" % nf
-            self.valid_ovds.add(pms.unpickl_it(nf), 'l2cache')
+            valid_ovds.add(pms.unpickl_it(nf), 'l2cache')
+        return valid_ovds
 
     def process_raw_data(self, datafile, conf):
         coresL2 = self.options.coresL2
         pcpu = self.options.pcpu
         # initialize pmmodule
         pm.load(datafile, coresL2, pcpu, int(conf['wss']), int(conf['tss']))
+        # raw overheads
         ovds = Overhead()
+        # valid overheads
+        valid_ovds = Overhead()
         # get overheads
         ovds.add(pm.getPreemption(), 'preemption')
         ovds.add(pm.getOnChipMigration(), 'onchip')
@@ -161,10 +167,10 @@ class Analyzer(defapp.App):
                 # just add overheads, "forget" preemption length
                 # FIXME: is really needed?
                 # valid_ovds.add(sd.remOutliers(i[0][:,0]), i[1])
-                self.valid_ovds.add(i[0][:,0], i[1])
+                valid_ovds.add(i[0][:,0], i[1])
             else:
                 print "Warning: no valid data collected..."
-                self.valid_ovds.add([], i[1])
+                valid_ovds.add([], i[1])
 
         if self.options.debug:
             # check outliers removals
@@ -172,36 +178,36 @@ class Analyzer(defapp.App):
             for i in ovds:
                 print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
             print "After outliers removal"
-            for i in self.valid_ovds:
+            for i in valid_ovds:
                 print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
 
         count_sample = {}
         if self.options.autocap or self.options.verbose:
-            for i in self.valid_ovds:
+            for i in valid_ovds:
                 if self.options.verbose:
                     print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])}
                 count_sample[i[1]] = len(i[0])
 
             if self.options.autocap:
-                if conf['wss'] in self.min_sample_wss:
+                if 'min' in self.valid_ovds_list:
                     # it is normally sufficient to check num samples for
                     # preemptions to get tss with min num samples in wss
-                    if self.min_sample_wss[conf['wss']]['preemption'] > \
+                    if self.valid_ovds_list['min']['preemption'] > \
                             count_sample['preemption']:
-                        self.min_sample_wss[conf['wss']] = {'tss':conf['tss'],
+                        self.valid_ovds_list['min'] = {
+                                'preemption':count_sample['preemption'],
+                                'onchip':count_sample['onchip'],
+                                'offchip':count_sample['offchip'],
+                                'l2cache':count_sample['l2cache']}
+                else:
+                    self.valid_ovds_list['min'] = {
                             'preemption':count_sample['preemption'],
                             'onchip':count_sample['onchip'],
                             'offchip':count_sample['offchip'],
                             'l2cache':count_sample['l2cache']}
-                else:
-                    self.min_sample_wss[conf['wss']] = {'tss':conf['tss'],
-                        'preemption':count_sample['preemption'],
-                        'onchip':count_sample['onchip'],
-                        'offchip':count_sample['offchip'],
-                        'l2cache':count_sample['l2cache']}
 
         # serialize valid overheads
-        for i in self.valid_ovds:
+        for i in valid_ovds:
             dname = dirname(datafile)
             fname, ext = splitext(basename(datafile))
 
@@ -209,6 +215,7 @@ class Analyzer(defapp.App):
             pms.pickl_it(i[0], curf)
 
         del ovds
+        return valid_ovds
 
     # The output is one csv WSS file per ovhd type, "tss, max_ovd, avg_ovd"
     # Filename output format:
@@ -216,100 +223,149 @@ class Analyzer(defapp.App):
     # ovd: preemption, onchip, offchip, l2cache
 
     def analyze_data(self, dname, conf):
-
         csvbname = dname + '/pm_plugin=' + conf['plugin'] + \
                 '_dist=uni_light_wss=' + conf['wss']
-        if self.options.verbose:
-            print "(WSS = %(0)s, TSS = %(1)s)" % {"0":conf['wss'], \
-                "1":conf['tss']}
 
-        for i in self.valid_ovds:
-            csvfname = csvbname + '_ovd=' + i[1] + '.csv'
-            if self.options.debug:
-                print "Saving csv '%s'" % csvfname
-
-            csvf = open(csvfname, 'a')
-            csvlist = [conf['tss']]
-
-            # data (valid_ovds already have only overheads, not length)
-            # vector = i[0][:,0]
-            #
-            # Check if we need to limit the number of samples
-            # that we use in the computation of max and avg.
-            # Statistically, this is more sound than other choices
-            if i[1] in self.lsamples:
-                if self.lsamples[i[1]] > 0:
-                    nsamples = min(self.lsamples[i[1]], len(i[0]))
+        for tss,vohs in self.valid_ovds_list.iteritems():
+            if tss == 'min':
+                # do not analyze fake 'min' tss
+                continue
+
+            if self.options.verbose:
+                print "\n(WSS = %(0)s, TSS = %(1)s)" % {"0":conf['wss'], \
+                    "1":tss}
+
+            for i in vohs:
+                csvfname = csvbname + '_ovd=' + i[1] + '.csv'
+                if self.options.debug:
+                    print "Saving csv '%s'" % csvfname
+
+                csvf = open(csvfname, 'a')
+                csvlist = [tss]
+
+                # data (valid_ovds already have only overheads, not length)
+                # vector = i[0][:,0]
+                #
+                # Check if we need to limit the number of samples
+                # that we use in the computation of max and avg.
+                # Statistically, this is more sound than other choices
+                if i[1] in self.lsamples:
+                    if self.lsamples[i[1]] > 0:
+                        nsamples = min(self.lsamples[i[1]], len(i[0]))
+                        if self.options.verbose:
+                            print "Computing %(0)s stat only on %(1)d samples" % \
+                                {"0":i[1],
+                                "1":nsamples}
+                        vector = i[0][0:nsamples]
+                elif self.options.autocap: # we can also autocompute the cap
+                    nsamples = self.valid_ovds_list['min'][i[1]]
                     if self.options.verbose:
                         print "Computing %(0)s stat only on %(1)d samples" % \
-                            {"0":i[1],
-                            "1":nsamples}
+                            {"0":i[1], "1":nsamples}
                     vector = i[0][0:nsamples]
-            elif self.options.autocap: # we can also autocompute the cap
-                nsamples = self.min_sample_wss[conf['wss']][i[1]]
-                if self.options.verbose:
-                    print "Computing %(0)s stat only on %(1)d samples" % \
-                        {"0":i[1], "1":nsamples}
-                vector = i[0][0:nsamples]
-            else:
-                vector = i[0]
-
-            if vector != []:
-                # FIXME if after disabling prefetching there are
-                # still negative value, they shouldn't be considered
-                max_vec = np.max(vector)
-                avg_vec = np.average(vector)
-            else:
-                max_vec = 0
-                avg_vec = 0
-
-            if self.options.cpufreq == 0:
-                max_vec_str = "%5.5f" % max_vec
-                avg_vec_str = "%5.5f" % avg_vec
-            else:
-                max_vec_str = "%5.5f" % (max_vec / self.options.cpufreq)
-                avg_vec_str = "%5.5f" % (avg_vec / self.options.cpufreq)
+                else:
+                    vector = i[0]
 
-            csvlist.append(max_vec_str)
-            csvlist.append(avg_vec_str)
-            pms.csv_it(csvf, csvlist)
-            csvf.close()
+                if vector != []:
+                    # FIXME if after disabling prefetching there are
+                    # still negative value, they shouldn't be considered
+                    max_vec = np.max(vector)
+                    avg_vec = np.average(vector)
+                else:
+                    max_vec = 0
+                    avg_vec = 0
 
-            if self.options.verbose:
                 if self.options.cpufreq == 0:
-                    print i[1] + " overheads (ticks)"
-                    print "Max = %5.5f" % max_vec
-                    print "Avg = %5.5f" % avg_vec
+                    max_vec_str = "%5.5f" % max_vec
+                    avg_vec_str = "%5.5f" % avg_vec
                 else:
-                    print i[1] + " overheads (us)"
-                    print "Max = %5.5f" % (max_vec / self.options.cpufreq)
-                    print "Avg = %5.5f" % (avg_vec / self.options.cpufreq)
-
-    def process_datafile(self, datafile):
-        dname = dirname(datafile)
-        bname = basename(datafile)
-        fname, ext = splitext(bname)
-        if ext != '.raw':
-            self.err("Warning: '%s' doesn't look like a .raw file"
-                    % bname)
+                    max_vec_str = "%5.5f" % (max_vec / self.options.cpufreq)
+                    avg_vec_str = "%5.5f" % (avg_vec / self.options.cpufreq)
+
+                csvlist.append(max_vec_str)
+                csvlist.append(avg_vec_str)
+                pms.csv_it(csvf, csvlist)
+                csvf.close()
+
+                if self.options.verbose:
+                    if self.options.cpufreq == 0:
+                        print i[1] + " overheads (ticks)"
+                        print "Max = %5.5f" % max_vec
+                        print "Avg = %5.5f" % avg_vec
+                    else:
+                        print i[1] + " overheads (us)"
+                        print "Max = %5.5f" % (max_vec / self.options.cpufreq)
+                        print "Avg = %5.5f" % (avg_vec / self.options.cpufreq)
+
+    def process_datafile(self, datafile, dname, fname, conf):
         if self.options.verbose:
             print "\nProcessing: " + fname
-        conf = decode(fname)
-
-        self.valid_ovds = Overhead()
         if self.options.read_valid:
             # .vbin output should be in same directory as input filename
             readf = dname + '/' + fname
-            self.read_valid_data(readf)
+            self.valid_ovds_list[conf['tss']] = self.read_valid_data(readf)
         else:
-            self.process_raw_data(datafile, conf)
-
-        self.analyze_data(dname, conf)
-        del self.valid_ovds
+            self.valid_ovds_list[conf['tss']] = \
+                    self.process_raw_data(datafile, conf)
 
     def default(self, _):
+        # TODO: to support this combination we should store also the min
+        # number of samples in the .vbin file
+        if self.options.read_valid and self.options.autocap:
+            self.err("Read stored values + autocap not currently supported")
+            return None
+
         for datafile in self.args:
-            self.process_datafile(datafile)
+            dname = dirname(datafile)
+            bname = basename(datafile)
+            fname, ext = splitext(bname)
+            if ext != '.raw':
+                self.err("Warning: '%s' doesn't look like a .raw file"
+                        % bname)
+
+            conf = decode(fname)
+
+            if datafile == self.args[-1]:
+                # manage single file / last of list
+                if ('wss' in self.last_conf) and (conf['wss'] != \
+                        self.last_conf['wss']):
+                    # we have already analyzed at least one file,
+                    # this is the first file of a new set of WSS,
+                    # and it is also the last file of the list
+                    self.analyze_data(dname, self.last_conf)
+                    # delete previously used dictionary
+                    del self.valid_ovds_list
+                    # reinit dictionary
+                    self.valid_ovds_list = {}
+                    # analyze this file
+                    self.process_datafile(datafile, dname, fname, conf)
+                    self.analyze_data(dname, conf)
+                    del self.valid_ovds_list
+                else:
+                    # just the end of a list of wss files or 1 single file
+                    self.process_datafile(datafile, dname, fname, conf)
+                    if self.args[0] == self.args[-1]:
+                        self.analyze_data(dname, conf)
+                    else:
+                        self.analyze_data(dname, self.last_conf)
+                    del self.valid_ovds_list
+            else:
+                # assume WSS are anayzed in order (all 1024s, all 256s, etc.)
+                if ('wss' in self.last_conf) and (conf['wss'] != \
+                        self.last_conf['wss']):
+                    # we have already analyzed at least one file,
+                    # this is the first file of a new set of WSS,
+                    # analyze tss for previous wss
+                    self.analyze_data(dname, self.last_conf)
+                    # delete previously used dictionary
+                    del self.valid_ovds_list
+                    # reinit dictionary
+                    self.valid_ovds_list = {}
+
+                # add tss to valid ovds list for this wss
+                self.process_datafile(datafile, dname, fname, conf)
+                # save previously analyzed configuration
+                self.last_conf = conf
 
 if __name__ == "__main__":
     Analyzer().launch()
-- 
cgit v1.2.2