From 3bb7a40648683d15174b44cb68740d78adf29b86 Mon Sep 17 00:00:00 2001 From: Andrea Bastoni Date: Thu, 25 Mar 2010 15:40:20 -0400 Subject: Restructure preempt migration data analyzer --- pm_data_analysis/pm_data_analyzer.py | 507 ++++++++++++++++------------------- 1 file changed, 234 insertions(+), 273 deletions(-) (limited to 'pm_data_analysis/pm_data_analyzer.py') diff --git a/pm_data_analysis/pm_data_analyzer.py b/pm_data_analysis/pm_data_analyzer.py index 43de82a..02c5098 100755 --- a/pm_data_analysis/pm_data_analyzer.py +++ b/pm_data_analysis/pm_data_analyzer.py @@ -1,11 +1,19 @@ #!/usr/bin/env python -# -# Preemption and migration overheads analysis. -# Take a single-task-set-size file and generate max / avg for the valid -# samples for preemption and different kinds of migration -# -# Save computed valid overheads and print a file suitable for processing -# using Bjorn's gnuplot wrapper +""" +Usage: %prog [options] filename + +FILENAME is where the .raw overhead data are. Filename +and the path to it also gives the base path and filename for the +files that contains already processed overheads and the directory +where to save the output data. +FILENAME should be something like: "res_plugin=GSN-EDF_wss=WSS_tss=TSS.raw". +Also, take a look at the "compact_results" script +""" + +import defapp + +from optparse import make_option as o +from os.path import splitext, basename, dirname import sys import numpy as np @@ -15,8 +23,50 @@ import pm import pmserialize as pms import statanalyzer as pmstat -from optparse import OptionParser -from os.path import splitext, basename, dirname +options = [ + o("-l", "--cores-per-l2", dest="coresL2", action="store", type="int", + help="Number of cores per L2 cache; if all cores share the same \ +L2 (i.e., no L3) set this to 0 (default = 2)"), + o("-p", "--phys-cpu", dest="pcpu", action="store", type="int", + help="Number of physical sockets on this machine (default 4)"), + o(None, "--limit-preempt", dest="npreempt", action="store", type="int", + help="Limit the number of preemption sample used in statistics \ +to NPREEMPT"), + o(None, "--limit-l2", dest="nl2cache", action="store", type="int", + help="Limit the number of l2cache sample used in statistics \ +to NL2CACHE"), + o(None, "--limit-onchip", dest="nonchip", action="store", type="int", + help="Limit the number of onchip sample used in statistics \ +to NONCHIP"), + o(None, "--limit-offchip", dest="noffchip", action="store", type="int", + help="Limit the number of offchip sample used in statistics \ +to NOFFCHIP"), + o("-r", "--read-valid-data", dest="read_valid", action="store_true", + help="read already processed data from file"), + o("-v", "--verbose", dest="verbose", action="store_true"), + o("-d", "--debug", dest="debug", action="store_true"), + o("-u", "--microsec", dest="cpufreq", action="store", type="float", + help="Print overhead results in microseconds; \ +CPUFREQ is the cpu freq in MHz (cat /proc/cpuinfo)"), + ] +# this cores per chip parameter implies a different topology model not fully +# supported atm +# o("-c", "--cores-per-chip", dest="coresC", +# action="store", type="int", default="6", +# help="number of cores per chip (default = 6)") + +defaults = { + 'coresL2' : 2, + 'pcpu' : 4, + 'npreempt' : 0, + 'nl2cache' : 0, + 'nonchip' : 0, + 'noffchip' : 0, + 'read_valid': False, + 'verbose' : False, + 'debug' : False, + 'cpufrequ' : 0, + } # from Bjoern's simple-gnuplot-wrapper def decode(name): @@ -47,279 +97,190 @@ class Overhead: def add(self, ovd_vector, label): self.overheads.append([ovd_vector, label]) -# read previously saved overhead data -def read_valid_data(filename, coresL2, valid_ovds): - nf = filename + '_preemption.vbin' - if debug: - print "Reading '%s'" % nf - valid_ovds.add(pms.unpickl_it(nf), 'preemtion') - - nf = filename + '_onchip.vbin' - if debug: - print "Reading '%s'" % nf - valid_ovds.add(pms.unpickl_it(nf), 'onchip') - - nf = filename + '_offchip.vbin' - if debug: - print "Reading '%s'" % nf - valid_ovds.add(pms.unpickl_it(nf), 'offchip') - - if coresL2 != 0: - nf = filename + '_l2cache.vbin' - if debug: +class Analyzer(defapp.App): + def __init__(self): + defapp.App.__init__(self, options, defaults, no_std_opts=True) + self.lsamples = {} + if self.options.npreempt: + self.lsamples['preemption'] = self.options.npreempt + if self.options.nl2cache: + self.lsamples['l2cache'] = self.options.nl2cache + if self.options.nonchip: + self.lsamples['onchip'] = self.options.nonchip + if self.options.noffchip: + self.lsamples['offchip'] = self.options.noffchip + + # read previously saved overhead data + def read_valid_data(self, filename): + nf = filename + '_preemption.vbin' + if self.options.debug: print "Reading '%s'" % nf - valid_ovds.add(pms.unpickl_it(nf), 'l2cache') - - -def process_raw_data(filename, coresL2, pcpu, valid_ovds): - # initialize pmmodule - pm.load(filename, coresL2, pcpu) - - ovds = Overhead() + self.valid_ovds.add(pms.unpickl_it(nf), 'preemtion') - # get overheads - ovds.add(pm.getPreemption(), 'preemption') - ovds.add(pm.getOnChipMigration(), 'onchip') - ovds.add(pm.getOffChipMigration(), 'offchip') - if coresL2 != 0: - ovds.add(pm.getL2Migration(), 'l2cache') - - if debug: - for i in ovds: - print i[0], i[1] - - # instance the statistical analizer to remove outliers - sd = pmstat.InterQuartileRange(25,75, True) + nf = filename + '_onchip.vbin' + if self.options.debug: + print "Reading '%s'" % nf + self.valid_ovds.add(pms.unpickl_it(nf), 'onchip') - for i in ovds: - if len(i[0]) != 0: - # just add overheads, "forget" preemption length - # FIXME: is really needed? - # valid_ovds.add(sd.remOutliers(i[0][:,0]), i[1]) - valid_ovds.add(i[0][:,0], i[1]) - else: - print "Warning: no valid data collected..." - valid_ovds.add([], i[1]) + nf = filename + '_offchip.vbin' + if debug: + print "Reading '%s'" % nf + self.valid_ovds.add(pms.unpickl_it(nf), 'offchip') + + if self.options.coresL2 != 0: + nf = filename + '_l2cache.vbin' + if self.options.debug: + print "Reading '%s'" % nf + self.valid_ovds.add(pms.unpickl_it(nf), 'l2cache') + + def process_raw_data(self, datafile): + coresL2 = self.options.coresL2 + pcpu = self.options.pcpu + # initialize pmmodule + pm.load(datafile, coresL2, pcpu) + ovds = Overhead() + # get overheads + ovds.add(pm.getPreemption(), 'preemption') + ovds.add(pm.getOnChipMigration(), 'onchip') + ovds.add(pm.getOffChipMigration(), 'offchip') + if coresL2 != 0: + ovds.add(pm.getL2Migration(), 'l2cache') + + if self.options.debug: + for i in ovds: + print i[0], i[1] + + # instance the statistical analizer to remove outliers + sd = pmstat.InterQuartileRange(25,75, True) - if debug: - # check outliers removals - print "Before outliers removal" for i in ovds: - print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])} - print "After outliers removal" - for i in valid_ovds: - print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])} - - if verbose: - for i in valid_ovds: - print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])} - - # serialize valid overheads - for i in valid_ovds: - dname = dirname(filename) - fname, ext = splitext(basename(filename)) - - curf = dname + '/' + fname + '_' + i[1] + '.vbin' - pms.pickl_it(i[0], curf) - - del ovds - -# The output is one csv WSS file per ovhd type, "tss, max_ovd, avg_ovd" -# Filename output format: -# hard_pm_plugin=GSN-EDF_dist=uni_light_wss=2048_ovd=preemption.csv -# ovd: preemption, onchip, offchip, l2cache - -def analize_data(valid_ovds, dname, conf, samples_limits): - - csvbname = dname + '/pm_plugin=' + conf['plugin'] + \ - '_dist=uni_light_wss=' + conf['wss'] - - if verbose: - print "(WSS = %(0)s, TSS = %(1)s)" % {"0":conf['wss'], \ + if len(i[0]) != 0: + # just add overheads, "forget" preemption length + # FIXME: is really needed? + # valid_ovds.add(sd.remOutliers(i[0][:,0]), i[1]) + self.valid_ovds.add(i[0][:,0], i[1]) + else: + print "Warning: no valid data collected..." + self.valid_ovds.add([], i[1]) + + if self.options.debug: + # check outliers removals + print "Before outliers removal" + for i in ovds: + print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])} + print "After outliers removal" + for i in self.valid_ovds: + print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])} + + if self.options.verbose: + for i in self.valid_ovds: + print "samples(%(0)s) = %(1)d" % {"0":i[1], "1":len(i[0])} + + # serialize valid overheads + for i in self.valid_ovds: + dname = dirname(datafile) + fname, ext = splitext(basename(datafile)) + + curf = dname + '/' + fname + '_' + i[1] + '.vbin' + pms.pickl_it(i[0], curf) + + del ovds + + # The output is one csv WSS file per ovhd type, "tss, max_ovd, avg_ovd" + # Filename output format: + # pm_plugin=GSN-EDF_dist=uni_light_wss=2048_ovd=preemption.csv + # ovd: preemption, onchip, offchip, l2cache + + def analyze_data(self, dname, conf): + + csvbname = dname + '/pm_plugin=' + conf['plugin'] + \ + '_dist=uni_light_wss=' + conf['wss'] + if self.options.verbose: + print "(WSS = %(0)s, TSS = %(1)s)" % {"0":conf['wss'], \ "1":conf['tss']} - for i in valid_ovds: + for i in self.valid_ovds: + csvfname = csvbname + '_ovd=' + i[1] + '.csv' + if self.options.debug: + print "Saving csv '%s'" % csvfname + + csvf = open(csvfname, 'a') + csvlist = [conf['tss']] + + # data (valid_ovds already have only overheads, not length) + # vector = i[0][:,0] + # + # Check if we need to limit the number of samples + # that we use in the computation of max and avg. + # Statistically, this is more sound than other choices + if i[1] in self.lsamples: + if self.lsamples[i[1]] > 0: + nsamples = min(self.lsamples[i[1]], len(i[0])) + if self.options.verbose: + print "Computing %(0)s stat only on %(1)d samples" % \ + {"0":i[1], + "1":nsamples} + + vector = i[0][0:nsamples] + else: + vector = i[0] - csvfname = csvbname + '_ovd=' + i[1] + '.csv' - if debug: - print "Saving csv '%s'" % csvfname - - csvf = open(csvfname, 'a') - csvlist = [conf['tss']] - - # data (valid_ovds already have only overheads, not length) - # vector = i[0][:,0] - # - # Check if we need to limit the number of samples - # that we use in the computation of max and avg. - # Statistically, this is more sound than other choices - if i[1] in samples_limits: - if verbose: - print "Computing %(0)s stat only on %(1)d samples" % \ - {"0":i[1], "1":samples_limits[i[1]]} - - if samples_limits[i[1]] != 0: - vector = i[0][0:samples_limits[i[1]]] - else: - vector = i[0] - else: - vector = i[0] + if vector != []: + # FIXME if after disabling prefetching there are + # still negative value, they shouldn't be considered + max_vec = np.max(vector) + avg_vec = np.average(vector) + else: + max_vec = 0 + avg_vec = 0 - if vector != []: - # FIXME if after disabling prefetching there are - # still negative value, they shouldn't be considered - max_vec = np.max(vector) - avg_vec = np.average(vector) + if self.options.cpufreq == 0: + max_vec_str = "%5.5f" % max_vec + avg_vec_str = "%5.5f" % avg_vec + else: + max_vec_str = "%5.5f" % (max_vec / self.options.cpufreq) + avg_vec_str = "%5.5f" % (avg_vec / self.options.cpufreq) + + csvlist.append(max_vec_str) + csvlist.append(avg_vec_str) + pms.csv_it(csvf, csvlist) + csvf.close() + + if self.options.verbose: + if self.options.cpufreq == 0: + print i[1] + " overheads (ticks)" + print "Max = %5.5f" % max_vec + print "Avg = %5.5f" % avg_vec + else: + print i[1] + " overheads (us)" + print "Max = %5.5f" % (max_vec / self.options.cpufreq) + print "Avg = %5.5f" % (avg_vec / self.options.cpufreq) + + def process_datafile(self, datafile): + dname = dirname(datafile) + bname = basename(datafile) + fname, ext = splitext(bname) + if ext != '.raw': + self.err("Warning: '%s' doesn't look like a .raw file" + % bname) + if self.options.verbose: + print "\nProcessing: " + fname + conf = decode(fname) + + self.valid_ovds = Overhead() + if self.options.read_valid: + # .vbin output should be in same directory as input filename + readf = dname + '/' + fname + self.read_valid_data(readf) else: - max_vec = 0 - avg_vec = 0 + self.process_raw_data(datafile) - if cpufreq == 0: - max_vec_str = "%5.5f" % max_vec - avg_vec_str = "%5.5f" % avg_vec - else: - max_vec_str = "%5.5f" % (max_vec / cpufreq) - avg_vec_str = "%5.5f" % (avg_vec / cpufreq) - - csvlist.append(max_vec_str) - csvlist.append(avg_vec_str) - pms.csv_it(csvf, csvlist) - csvf.close() - - if verbose: - if cpufreq == 0: - print i[1] + " overheads (ticks)" - print "Max = %5.5f" % max_vec - print "Avg = %5.5f" % avg_vec - else: - print i[1] + " overheads (us)" - print "Max = %5.5f" % (max_vec / cpufreq) - print "Avg = %5.5f" % (avg_vec / cpufreq) - - -# filename-extension convention to get "pretty" output filenames -# .raw for raw bin data -# .vbin for valid overheads -# .csv for processed final data -def main(): - usage = "Usage: %prog [options] filename" - description = """FILENAME is where the .raw overhead data are. Filename -and the path to it also gives the base path and filename for the -files that contains already processed overheads and the directory -where to save the output data. -FILENAME should be something like: "res_plugin=GSN-EDF_wss=WSS_tss=TSS.raw". -Take a look at the "compact_results" script -""" - parser = OptionParser(usage=usage, description=description) - parser.add_option("-l", "--cores-per-l2", dest="coresL2", - action="store", type="int", default="2", - help="number of cores per L2 cache; " - "if all cores share the same L2 (i.e., no L3) set this to 0 " - "(default = 2)") -# this cores per chip parameter implies a different topology model not fully -# supported atm -# parser.add_option("-c", "--cores-per-chip", dest="coresC", -# action="store", type="int", default="6", -# help="number of cores per chip (default = 6)") - parser.add_option("-p", "--phys-cpu", dest="pcpu", - action="store", type="int", default="4", - help="Number of physical sockets on this machine (default 4)") - - parser.add_option("", "--limit-preempt", dest="npreempt", - action="store", type="int", default="0", - help="Limit the number of preemption sample used in " - "statistics to NPREEMPT") - parser.add_option("", "--limit-l2", dest="nl2cache", - action="store", type="int", default="0", - help="Limit the number of l2cache sample used in " - "statistics to NL2CACHE") - parser.add_option("", "--limit-onchip", dest="nonchip", - action="store", type="int", default="0", - help="Limit the number of onchip sample used in " - "statistics to NONCHIP") - parser.add_option("", "--limit-offchip", dest="noffchip", - action="store", type="int", default="0", - help="Limit the number of offchip sample used in " - "statistics to NOFFCHIP") - - parser.add_option("-r", "--read-valid-data", dest="read_valid", - action="store_true", default=False, - help="read already processed data from file") - - parser.add_option("-v", "--verbose", dest="verbose", - action="store_true", default=False, - help="Be verbose") - parser.add_option("-d", "--debug", dest="debug", - action="store_true", default=False, - help="Debugging information") - - parser.add_option("-u", "--microsec", dest="cpufreq", - action="store", type="float", - help="Print overhead results in microseconds; \ - CPUFREQ is the cpu freq in MHz (cat /proc/cpuinfo)") - - (options, args) = parser.parse_args() - if len(args) != 1: - parser.error("Argument missing") - sys.exit(-1) - - valid_ovds = Overhead() - - global verbose - global debug - global cpufreq - if options.verbose: - verbose = 1 - else: - verbose = 0 - - if options.debug: - debug = 1 - else: - debug = 0 - - if options.cpufreq: - cpufreq = options.cpufreq - else: - cpufreq = 0 - - # filename processing - dname = dirname(args[0]) - bname = basename(args[0]) - fname, ext = splitext(bname) - conf = decode(fname) - - if ext != '.raw': - print "Warning: '%s' doesn't look like a .raw file" % bname - return -1 - - if verbose: - print "\nProcessing: " + fname - - if options.read_valid: - # .vbin output should be in same directory as input filename - # TODO can be improved with custom directory and file reading - readf = dname + '/' + fname - read_valid_data(readf, options.coresL2, valid_ovds) - else: - ret = process_raw_data(args[0], options.coresL2, options.pcpu, - valid_ovds) - if ret == -1: - print "Cannot process raw data, quitting" - return None - - lsamples = {} - if options.npreempt: - lsamples['preemption'] = options.npreempt - if options.nl2cache: - lsamples['l2cache'] = options.nl2cache - if options.nonchip: - lsamples['onchip'] = options.nonchip - if options.noffchip: - lsamples['offchip'] = options.noffchip - - analize_data(valid_ovds, dname, conf, lsamples) - -if __name__ == '__main__': - main() + self.analyze_data(dname, conf) + del self.valid_ovds + + def default(self, _): + for datafile in self.args: + self.process_datafile(datafile) +if __name__ == "__main__": + Analyzer().launch() -- cgit v1.2.2