From 05a5ab2cd71e9c68a7002e1e8a89b887afc4240f Mon Sep 17 00:00:00 2001 From: "Bjoern B. Brandenburg" Date: Tue, 22 Feb 2011 17:12:40 -0500 Subject: support manual outlier filtering --- oplot.py | 46 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) (limited to 'oplot.py') diff --git a/oplot.py b/oplot.py index 63589b8..249d079 100755 --- a/oplot.py +++ b/oplot.py @@ -10,7 +10,7 @@ from binary_data import get_data from math import ceil import numpy - +import csv from os.path import splitext, basename from optparse import make_option as o @@ -35,6 +35,12 @@ options = [ o('-c', '--cut-off', action='store', dest='cutoff', type='int', help='max number of samples to use'), + o('-t', '--take-off', action='store', dest='take_off', type='int', + help='manual number of outlier samples to discard'), + + o('-o', '--outlier-list', action='store', dest='outlier_file', + help='list of outliers to remove'), + o('-x', '--xmax', action='store', dest='xmax', type='int', help='determines x-axis range'), @@ -57,11 +63,16 @@ defaults = { # data processing 'cycles' : 2128, # per usec - 'extent' : 3, + 'extent' : 0, 'cutoff' : None, + 'take_off' : None, 'normalize' : False, 'use_std' : False, + # manual outlier removal + 'outlier_file' : None, + 'outliers' : {}, + # formatting options 'binsize' : 0.25, @@ -86,6 +97,19 @@ HOST_CPUS = { 'ludwig' : 24, } + +def load_outliers(fname): + outliers = {} + for row in csv.reader(open(fname, "r")): + sched = row[0] + n = int(row[1]) + cut = int(row[2]) + if not sched in outliers: + outliers[sched] = [] + outliers[sched].append((n, cut)) + return outliers + + def get_stats_label(samples): avg = numpy.mean(samples) med = numpy.median(samples) @@ -172,11 +196,20 @@ class OverheadPlotter(defapp.App): else: scale = 1.0 / self.options.cycles + take_off = self.options.take_off + if conf['scheduler'] in self.options.outliers: + n = int(conf['n']) + for (i, t) in self.options.outliers[conf['scheduler']]: + if i == n: + take_off = t + break + data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile, scale, extent=self.options.extent, cutoff=self.options.cutoff, - stdev=self.options.use_std) + stdev=self.options.use_std, + manual=take_off) samples = data[min_idx:max_idx] discarded = (len(data) - len(samples)) / float(len(data)) * 100 @@ -187,9 +220,13 @@ class OverheadPlotter(defapp.App): samples_label = "samples: total=%d filtered=%d (%.2f%%)" % \ (len(data), len(data) - len(samples), discarded) + if self.options.extent: iqr_label = "IQR: extent=%d threshold=%.2fus" % \ (self.options.extent, iqr_max) + elif take_off: + iqr_label = "%s outlier%s manually removed" % \ + (take_off, '' if take_off == 1 else 's') elif discarded > 0: iqr_label = "manual threshold=1000us [IQR not applied]" else: @@ -336,6 +373,9 @@ class OverheadPlotter(defapp.App): self.tmpfiles = [] def default(self, _): + if self.options.outlier_file: + self.options.outliers = load_outliers(self.options.outlier_file) + for i, datafile in enumerate(self.args): self.out("[%d/%d] Processing %s ..." % (i + 1, len(self.args), datafile)) self.plot_file(datafile) -- cgit v1.2.2