aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern B. Brandenburg <bbb@cs.unc.edu>2011-02-22 17:12:40 -0500
committerBjoern B. Brandenburg <bbb@cs.unc.edu>2011-02-22 17:14:32 -0500
commit05a5ab2cd71e9c68a7002e1e8a89b887afc4240f (patch)
tree0e5f02468bfc960956d7d2cbbe93dfd03f530625
parent22e1c3b33984da853d1403843d9d2ce7a596c335 (diff)
support manual outlier filtering
-rw-r--r--binary_data.py5
-rwxr-xr-xoplot.py46
2 files changed, 47 insertions, 4 deletions
diff --git a/binary_data.py b/binary_data.py
index 152afb7..39d6e05 100644
--- a/binary_data.py
+++ b/binary_data.py
@@ -5,7 +5,7 @@ from stats import iqr_remove_outliers, iqr_cutoff
5 5
6 6
7def get_data(fname, scale, extent, cutoff=None, maxval=1000.0, 7def get_data(fname, scale, extent, cutoff=None, maxval=1000.0,
8 stdev=False): 8 stdev=False, manual=None):
9 data = load_binary_file(fname) 9 data = load_binary_file(fname)
10 10
11 if cutoff and len(data) > cutoff: 11 if cutoff and len(data) > cutoff:
@@ -31,6 +31,9 @@ def get_data(fname, scale, extent, cutoff=None, maxval=1000.0,
31 31
32 min_idx, max_idx = numpy.searchsorted(data, [lower, upper]) 32 min_idx, max_idx = numpy.searchsorted(data, [lower, upper])
33 33
34 if manual:
35 max_idx -= manual
36
34 return [data, max_idx, min_idx, upper, lower] 37 return [data, max_idx, min_idx, upper, lower]
35 38
36 39
diff --git a/oplot.py b/oplot.py
index 63589b8..249d079 100755
--- a/oplot.py
+++ b/oplot.py
@@ -10,7 +10,7 @@ from binary_data import get_data
10from math import ceil 10from math import ceil
11 11
12import numpy 12import numpy
13 13import csv
14from os.path import splitext, basename 14from os.path import splitext, basename
15from optparse import make_option as o 15from optparse import make_option as o
16 16
@@ -35,6 +35,12 @@ options = [
35 o('-c', '--cut-off', action='store', dest='cutoff', type='int', 35 o('-c', '--cut-off', action='store', dest='cutoff', type='int',
36 help='max number of samples to use'), 36 help='max number of samples to use'),
37 37
38 o('-t', '--take-off', action='store', dest='take_off', type='int',
39 help='manual number of outlier samples to discard'),
40
41 o('-o', '--outlier-list', action='store', dest='outlier_file',
42 help='list of outliers to remove'),
43
38 o('-x', '--xmax', action='store', dest='xmax', type='int', 44 o('-x', '--xmax', action='store', dest='xmax', type='int',
39 help='determines x-axis range'), 45 help='determines x-axis range'),
40 46
@@ -57,11 +63,16 @@ defaults = {
57 63
58 # data processing 64 # data processing
59 'cycles' : 2128, # per usec 65 'cycles' : 2128, # per usec
60 'extent' : 3, 66 'extent' : 0,
61 'cutoff' : None, 67 'cutoff' : None,
68 'take_off' : None,
62 'normalize' : False, 69 'normalize' : False,
63 'use_std' : False, 70 'use_std' : False,
64 71
72 # manual outlier removal
73 'outlier_file' : None,
74 'outliers' : {},
75
65 # formatting options 76 # formatting options
66 'binsize' : 0.25, 77 'binsize' : 0.25,
67 78
@@ -86,6 +97,19 @@ HOST_CPUS = {
86 'ludwig' : 24, 97 'ludwig' : 24,
87} 98}
88 99
100
101def load_outliers(fname):
102 outliers = {}
103 for row in csv.reader(open(fname, "r")):
104 sched = row[0]
105 n = int(row[1])
106 cut = int(row[2])
107 if not sched in outliers:
108 outliers[sched] = []
109 outliers[sched].append((n, cut))
110 return outliers
111
112
89def get_stats_label(samples): 113def get_stats_label(samples):
90 avg = numpy.mean(samples) 114 avg = numpy.mean(samples)
91 med = numpy.median(samples) 115 med = numpy.median(samples)
@@ -172,11 +196,20 @@ class OverheadPlotter(defapp.App):
172 else: 196 else:
173 scale = 1.0 / self.options.cycles 197 scale = 1.0 / self.options.cycles
174 198
199 take_off = self.options.take_off
200 if conf['scheduler'] in self.options.outliers:
201 n = int(conf['n'])
202 for (i, t) in self.options.outliers[conf['scheduler']]:
203 if i == n:
204 take_off = t
205 break
206
175 data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile, 207 data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile,
176 scale, 208 scale,
177 extent=self.options.extent, 209 extent=self.options.extent,
178 cutoff=self.options.cutoff, 210 cutoff=self.options.cutoff,
179 stdev=self.options.use_std) 211 stdev=self.options.use_std,
212 manual=take_off)
180 213
181 samples = data[min_idx:max_idx] 214 samples = data[min_idx:max_idx]
182 discarded = (len(data) - len(samples)) / float(len(data)) * 100 215 discarded = (len(data) - len(samples)) / float(len(data)) * 100
@@ -187,9 +220,13 @@ class OverheadPlotter(defapp.App):
187 samples_label = "samples: total=%d filtered=%d (%.2f%%)" % \ 220 samples_label = "samples: total=%d filtered=%d (%.2f%%)" % \
188 (len(data), len(data) - len(samples), discarded) 221 (len(data), len(data) - len(samples), discarded)
189 222
223
190 if self.options.extent: 224 if self.options.extent:
191 iqr_label = "IQR: extent=%d threshold=%.2fus" % \ 225 iqr_label = "IQR: extent=%d threshold=%.2fus" % \
192 (self.options.extent, iqr_max) 226 (self.options.extent, iqr_max)
227 elif take_off:
228 iqr_label = "%s outlier%s manually removed" % \
229 (take_off, '' if take_off == 1 else 's')
193 elif discarded > 0: 230 elif discarded > 0:
194 iqr_label = "manual threshold=1000us [IQR not applied]" 231 iqr_label = "manual threshold=1000us [IQR not applied]"
195 else: 232 else:
@@ -336,6 +373,9 @@ class OverheadPlotter(defapp.App):
336 self.tmpfiles = [] 373 self.tmpfiles = []
337 374
338 def default(self, _): 375 def default(self, _):
376 if self.options.outlier_file:
377 self.options.outliers = load_outliers(self.options.outlier_file)
378
339 for i, datafile in enumerate(self.args): 379 for i, datafile in enumerate(self.args):
340 self.out("[%d/%d] Processing %s ..." % (i + 1, len(self.args), datafile)) 380 self.out("[%d/%d] Processing %s ..." % (i + 1, len(self.args), datafile))
341 self.plot_file(datafile) 381 self.plot_file(datafile)