diff options
| -rw-r--r-- | binary_data.py | 5 | ||||
| -rwxr-xr-x | oplot.py | 46 |
2 files changed, 47 insertions, 4 deletions
diff --git a/binary_data.py b/binary_data.py index 152afb7..39d6e05 100644 --- a/binary_data.py +++ b/binary_data.py | |||
| @@ -5,7 +5,7 @@ from stats import iqr_remove_outliers, iqr_cutoff | |||
| 5 | 5 | ||
| 6 | 6 | ||
| 7 | def get_data(fname, scale, extent, cutoff=None, maxval=1000.0, | 7 | def get_data(fname, scale, extent, cutoff=None, maxval=1000.0, |
| 8 | stdev=False): | 8 | stdev=False, manual=None): |
| 9 | data = load_binary_file(fname) | 9 | data = load_binary_file(fname) |
| 10 | 10 | ||
| 11 | if cutoff and len(data) > cutoff: | 11 | if cutoff and len(data) > cutoff: |
| @@ -31,6 +31,9 @@ def get_data(fname, scale, extent, cutoff=None, maxval=1000.0, | |||
| 31 | 31 | ||
| 32 | min_idx, max_idx = numpy.searchsorted(data, [lower, upper]) | 32 | min_idx, max_idx = numpy.searchsorted(data, [lower, upper]) |
| 33 | 33 | ||
| 34 | if manual: | ||
| 35 | max_idx -= manual | ||
| 36 | |||
| 34 | return [data, max_idx, min_idx, upper, lower] | 37 | return [data, max_idx, min_idx, upper, lower] |
| 35 | 38 | ||
| 36 | 39 | ||
| @@ -10,7 +10,7 @@ from binary_data import get_data | |||
| 10 | from math import ceil | 10 | from math import ceil |
| 11 | 11 | ||
| 12 | import numpy | 12 | import numpy |
| 13 | 13 | import csv | |
| 14 | from os.path import splitext, basename | 14 | from os.path import splitext, basename |
| 15 | from optparse import make_option as o | 15 | from optparse import make_option as o |
| 16 | 16 | ||
| @@ -35,6 +35,12 @@ options = [ | |||
| 35 | o('-c', '--cut-off', action='store', dest='cutoff', type='int', | 35 | o('-c', '--cut-off', action='store', dest='cutoff', type='int', |
| 36 | help='max number of samples to use'), | 36 | help='max number of samples to use'), |
| 37 | 37 | ||
| 38 | o('-t', '--take-off', action='store', dest='take_off', type='int', | ||
| 39 | help='manual number of outlier samples to discard'), | ||
| 40 | |||
| 41 | o('-o', '--outlier-list', action='store', dest='outlier_file', | ||
| 42 | help='list of outliers to remove'), | ||
| 43 | |||
| 38 | o('-x', '--xmax', action='store', dest='xmax', type='int', | 44 | o('-x', '--xmax', action='store', dest='xmax', type='int', |
| 39 | help='determines x-axis range'), | 45 | help='determines x-axis range'), |
| 40 | 46 | ||
| @@ -57,11 +63,16 @@ defaults = { | |||
| 57 | 63 | ||
| 58 | # data processing | 64 | # data processing |
| 59 | 'cycles' : 2128, # per usec | 65 | 'cycles' : 2128, # per usec |
| 60 | 'extent' : 3, | 66 | 'extent' : 0, |
| 61 | 'cutoff' : None, | 67 | 'cutoff' : None, |
| 68 | 'take_off' : None, | ||
| 62 | 'normalize' : False, | 69 | 'normalize' : False, |
| 63 | 'use_std' : False, | 70 | 'use_std' : False, |
| 64 | 71 | ||
| 72 | # manual outlier removal | ||
| 73 | 'outlier_file' : None, | ||
| 74 | 'outliers' : {}, | ||
| 75 | |||
| 65 | # formatting options | 76 | # formatting options |
| 66 | 'binsize' : 0.25, | 77 | 'binsize' : 0.25, |
| 67 | 78 | ||
| @@ -86,6 +97,19 @@ HOST_CPUS = { | |||
| 86 | 'ludwig' : 24, | 97 | 'ludwig' : 24, |
| 87 | } | 98 | } |
| 88 | 99 | ||
| 100 | |||
| 101 | def load_outliers(fname): | ||
| 102 | outliers = {} | ||
| 103 | for row in csv.reader(open(fname, "r")): | ||
| 104 | sched = row[0] | ||
| 105 | n = int(row[1]) | ||
| 106 | cut = int(row[2]) | ||
| 107 | if not sched in outliers: | ||
| 108 | outliers[sched] = [] | ||
| 109 | outliers[sched].append((n, cut)) | ||
| 110 | return outliers | ||
| 111 | |||
| 112 | |||
| 89 | def get_stats_label(samples): | 113 | def get_stats_label(samples): |
| 90 | avg = numpy.mean(samples) | 114 | avg = numpy.mean(samples) |
| 91 | med = numpy.median(samples) | 115 | med = numpy.median(samples) |
| @@ -172,11 +196,20 @@ class OverheadPlotter(defapp.App): | |||
| 172 | else: | 196 | else: |
| 173 | scale = 1.0 / self.options.cycles | 197 | scale = 1.0 / self.options.cycles |
| 174 | 198 | ||
| 199 | take_off = self.options.take_off | ||
| 200 | if conf['scheduler'] in self.options.outliers: | ||
| 201 | n = int(conf['n']) | ||
| 202 | for (i, t) in self.options.outliers[conf['scheduler']]: | ||
| 203 | if i == n: | ||
| 204 | take_off = t | ||
| 205 | break | ||
| 206 | |||
| 175 | data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile, | 207 | data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile, |
| 176 | scale, | 208 | scale, |
| 177 | extent=self.options.extent, | 209 | extent=self.options.extent, |
| 178 | cutoff=self.options.cutoff, | 210 | cutoff=self.options.cutoff, |
| 179 | stdev=self.options.use_std) | 211 | stdev=self.options.use_std, |
| 212 | manual=take_off) | ||
| 180 | 213 | ||
| 181 | samples = data[min_idx:max_idx] | 214 | samples = data[min_idx:max_idx] |
| 182 | discarded = (len(data) - len(samples)) / float(len(data)) * 100 | 215 | discarded = (len(data) - len(samples)) / float(len(data)) * 100 |
| @@ -187,9 +220,13 @@ class OverheadPlotter(defapp.App): | |||
| 187 | samples_label = "samples: total=%d filtered=%d (%.2f%%)" % \ | 220 | samples_label = "samples: total=%d filtered=%d (%.2f%%)" % \ |
| 188 | (len(data), len(data) - len(samples), discarded) | 221 | (len(data), len(data) - len(samples), discarded) |
| 189 | 222 | ||
| 223 | |||
| 190 | if self.options.extent: | 224 | if self.options.extent: |
| 191 | iqr_label = "IQR: extent=%d threshold=%.2fus" % \ | 225 | iqr_label = "IQR: extent=%d threshold=%.2fus" % \ |
| 192 | (self.options.extent, iqr_max) | 226 | (self.options.extent, iqr_max) |
| 227 | elif take_off: | ||
| 228 | iqr_label = "%s outlier%s manually removed" % \ | ||
| 229 | (take_off, '' if take_off == 1 else 's') | ||
| 193 | elif discarded > 0: | 230 | elif discarded > 0: |
| 194 | iqr_label = "manual threshold=1000us [IQR not applied]" | 231 | iqr_label = "manual threshold=1000us [IQR not applied]" |
| 195 | else: | 232 | else: |
| @@ -336,6 +373,9 @@ class OverheadPlotter(defapp.App): | |||
| 336 | self.tmpfiles = [] | 373 | self.tmpfiles = [] |
| 337 | 374 | ||
| 338 | def default(self, _): | 375 | def default(self, _): |
| 376 | if self.options.outlier_file: | ||
| 377 | self.options.outliers = load_outliers(self.options.outlier_file) | ||
| 378 | |||
| 339 | for i, datafile in enumerate(self.args): | 379 | for i, datafile in enumerate(self.args): |
| 340 | self.out("[%d/%d] Processing %s ..." % (i + 1, len(self.args), datafile)) | 380 | self.out("[%d/%d] Processing %s ..." % (i + 1, len(self.args), datafile)) |
| 341 | self.plot_file(datafile) | 381 | self.plot_file(datafile) |
