diff options
-rw-r--r-- | binary_data.py | 5 | ||||
-rwxr-xr-x | oplot.py | 46 |
2 files changed, 47 insertions, 4 deletions
diff --git a/binary_data.py b/binary_data.py index 152afb7..39d6e05 100644 --- a/binary_data.py +++ b/binary_data.py | |||
@@ -5,7 +5,7 @@ from stats import iqr_remove_outliers, iqr_cutoff | |||
5 | 5 | ||
6 | 6 | ||
7 | def get_data(fname, scale, extent, cutoff=None, maxval=1000.0, | 7 | def get_data(fname, scale, extent, cutoff=None, maxval=1000.0, |
8 | stdev=False): | 8 | stdev=False, manual=None): |
9 | data = load_binary_file(fname) | 9 | data = load_binary_file(fname) |
10 | 10 | ||
11 | if cutoff and len(data) > cutoff: | 11 | if cutoff and len(data) > cutoff: |
@@ -31,6 +31,9 @@ def get_data(fname, scale, extent, cutoff=None, maxval=1000.0, | |||
31 | 31 | ||
32 | min_idx, max_idx = numpy.searchsorted(data, [lower, upper]) | 32 | min_idx, max_idx = numpy.searchsorted(data, [lower, upper]) |
33 | 33 | ||
34 | if manual: | ||
35 | max_idx -= manual | ||
36 | |||
34 | return [data, max_idx, min_idx, upper, lower] | 37 | return [data, max_idx, min_idx, upper, lower] |
35 | 38 | ||
36 | 39 | ||
@@ -10,7 +10,7 @@ from binary_data import get_data | |||
10 | from math import ceil | 10 | from math import ceil |
11 | 11 | ||
12 | import numpy | 12 | import numpy |
13 | 13 | import csv | |
14 | from os.path import splitext, basename | 14 | from os.path import splitext, basename |
15 | from optparse import make_option as o | 15 | from optparse import make_option as o |
16 | 16 | ||
@@ -35,6 +35,12 @@ options = [ | |||
35 | o('-c', '--cut-off', action='store', dest='cutoff', type='int', | 35 | o('-c', '--cut-off', action='store', dest='cutoff', type='int', |
36 | help='max number of samples to use'), | 36 | help='max number of samples to use'), |
37 | 37 | ||
38 | o('-t', '--take-off', action='store', dest='take_off', type='int', | ||
39 | help='manual number of outlier samples to discard'), | ||
40 | |||
41 | o('-o', '--outlier-list', action='store', dest='outlier_file', | ||
42 | help='list of outliers to remove'), | ||
43 | |||
38 | o('-x', '--xmax', action='store', dest='xmax', type='int', | 44 | o('-x', '--xmax', action='store', dest='xmax', type='int', |
39 | help='determines x-axis range'), | 45 | help='determines x-axis range'), |
40 | 46 | ||
@@ -57,11 +63,16 @@ defaults = { | |||
57 | 63 | ||
58 | # data processing | 64 | # data processing |
59 | 'cycles' : 2128, # per usec | 65 | 'cycles' : 2128, # per usec |
60 | 'extent' : 3, | 66 | 'extent' : 0, |
61 | 'cutoff' : None, | 67 | 'cutoff' : None, |
68 | 'take_off' : None, | ||
62 | 'normalize' : False, | 69 | 'normalize' : False, |
63 | 'use_std' : False, | 70 | 'use_std' : False, |
64 | 71 | ||
72 | # manual outlier removal | ||
73 | 'outlier_file' : None, | ||
74 | 'outliers' : {}, | ||
75 | |||
65 | # formatting options | 76 | # formatting options |
66 | 'binsize' : 0.25, | 77 | 'binsize' : 0.25, |
67 | 78 | ||
@@ -86,6 +97,19 @@ HOST_CPUS = { | |||
86 | 'ludwig' : 24, | 97 | 'ludwig' : 24, |
87 | } | 98 | } |
88 | 99 | ||
100 | |||
101 | def load_outliers(fname): | ||
102 | outliers = {} | ||
103 | for row in csv.reader(open(fname, "r")): | ||
104 | sched = row[0] | ||
105 | n = int(row[1]) | ||
106 | cut = int(row[2]) | ||
107 | if not sched in outliers: | ||
108 | outliers[sched] = [] | ||
109 | outliers[sched].append((n, cut)) | ||
110 | return outliers | ||
111 | |||
112 | |||
89 | def get_stats_label(samples): | 113 | def get_stats_label(samples): |
90 | avg = numpy.mean(samples) | 114 | avg = numpy.mean(samples) |
91 | med = numpy.median(samples) | 115 | med = numpy.median(samples) |
@@ -172,11 +196,20 @@ class OverheadPlotter(defapp.App): | |||
172 | else: | 196 | else: |
173 | scale = 1.0 / self.options.cycles | 197 | scale = 1.0 / self.options.cycles |
174 | 198 | ||
199 | take_off = self.options.take_off | ||
200 | if conf['scheduler'] in self.options.outliers: | ||
201 | n = int(conf['n']) | ||
202 | for (i, t) in self.options.outliers[conf['scheduler']]: | ||
203 | if i == n: | ||
204 | take_off = t | ||
205 | break | ||
206 | |||
175 | data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile, | 207 | data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile, |
176 | scale, | 208 | scale, |
177 | extent=self.options.extent, | 209 | extent=self.options.extent, |
178 | cutoff=self.options.cutoff, | 210 | cutoff=self.options.cutoff, |
179 | stdev=self.options.use_std) | 211 | stdev=self.options.use_std, |
212 | manual=take_off) | ||
180 | 213 | ||
181 | samples = data[min_idx:max_idx] | 214 | samples = data[min_idx:max_idx] |
182 | discarded = (len(data) - len(samples)) / float(len(data)) * 100 | 215 | discarded = (len(data) - len(samples)) / float(len(data)) * 100 |
@@ -187,9 +220,13 @@ class OverheadPlotter(defapp.App): | |||
187 | samples_label = "samples: total=%d filtered=%d (%.2f%%)" % \ | 220 | samples_label = "samples: total=%d filtered=%d (%.2f%%)" % \ |
188 | (len(data), len(data) - len(samples), discarded) | 221 | (len(data), len(data) - len(samples), discarded) |
189 | 222 | ||
223 | |||
190 | if self.options.extent: | 224 | if self.options.extent: |
191 | iqr_label = "IQR: extent=%d threshold=%.2fus" % \ | 225 | iqr_label = "IQR: extent=%d threshold=%.2fus" % \ |
192 | (self.options.extent, iqr_max) | 226 | (self.options.extent, iqr_max) |
227 | elif take_off: | ||
228 | iqr_label = "%s outlier%s manually removed" % \ | ||
229 | (take_off, '' if take_off == 1 else 's') | ||
193 | elif discarded > 0: | 230 | elif discarded > 0: |
194 | iqr_label = "manual threshold=1000us [IQR not applied]" | 231 | iqr_label = "manual threshold=1000us [IQR not applied]" |
195 | else: | 232 | else: |
@@ -336,6 +373,9 @@ class OverheadPlotter(defapp.App): | |||
336 | self.tmpfiles = [] | 373 | self.tmpfiles = [] |
337 | 374 | ||
338 | def default(self, _): | 375 | def default(self, _): |
376 | if self.options.outlier_file: | ||
377 | self.options.outliers = load_outliers(self.options.outlier_file) | ||
378 | |||
339 | for i, datafile in enumerate(self.args): | 379 | for i, datafile in enumerate(self.args): |
340 | self.out("[%d/%d] Processing %s ..." % (i + 1, len(self.args), datafile)) | 380 | self.out("[%d/%d] Processing %s ..." % (i + 1, len(self.args), datafile)) |
341 | self.plot_file(datafile) | 381 | self.plot_file(datafile) |