diff options
| author | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2011-02-19 21:45:54 -0500 |
|---|---|---|
| committer | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2011-02-19 21:45:54 -0500 |
| commit | 5e5fc221d60f46951a360f3e4f637e0edc084daf (patch) | |
| tree | 1b622220d08c64a776ea4bd1d50d0dc39ece4c6b | |
| parent | c69e0012f6845fcfe8d89bc980c14b7993d99cde (diff) | |
improve data and visualization mangling in oplot.py
Also, avoid duplicating binary data parsing code.
| -rw-r--r-- | binary_data.py | 18 | ||||
| -rwxr-xr-x | oplot.py | 60 |
2 files changed, 53 insertions, 25 deletions
diff --git a/binary_data.py b/binary_data.py index d4be159..6108816 100644 --- a/binary_data.py +++ b/binary_data.py | |||
| @@ -3,18 +3,32 @@ import numpy | |||
| 3 | from util import load_binary_file | 3 | from util import load_binary_file |
| 4 | from stats import iqr_remove_outliers, iqr_cutoff | 4 | from stats import iqr_remove_outliers, iqr_cutoff |
| 5 | 5 | ||
| 6 | def compact_file(fname, scale=None, extend=1.5): | 6 | |
| 7 | def get_data(fname, scale, extent, cutoff=None, maxval=1000.0): | ||
| 7 | data = load_binary_file(fname) | 8 | data = load_binary_file(fname) |
| 8 | 9 | ||
| 10 | if cutoff and len(data) > cutoff: | ||
| 11 | data = data[:cutoff] | ||
| 12 | |||
| 9 | if not scale is None: | 13 | if not scale is None: |
| 10 | data *= scale | 14 | data *= scale |
| 11 | 15 | ||
| 12 | data.sort() | 16 | data.sort() |
| 13 | 17 | ||
| 14 | iqr_min, iqr_max = iqr_cutoff(data, extend) | 18 | if extent: |
| 19 | iqr_min, iqr_max = iqr_cutoff(data, extent) | ||
| 20 | else: | ||
| 21 | iqr_min = 0 | ||
| 22 | iqr_max = maxval | ||
| 15 | 23 | ||
| 16 | min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) | 24 | min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) |
| 17 | 25 | ||
| 26 | return [data, max_idx, min_idx, iqr_max, iqr_min] | ||
| 27 | |||
| 28 | |||
| 29 | def compact_file(*args, **kargs): | ||
| 30 | data, max_idx, min_idx, iqr_max, iqr_min = get_data(*args, **kargs) | ||
| 31 | |||
| 18 | samples = data[min_idx:max_idx] | 32 | samples = data[min_idx:max_idx] |
| 19 | 33 | ||
| 20 | filtered = len(data) - len(samples) | 34 | filtered = len(data) - len(samples) |
| @@ -5,6 +5,8 @@ from plot import decode | |||
| 5 | from util import load_csv_file, load_binary_file, write_csv_file | 5 | from util import load_csv_file, load_binary_file, write_csv_file |
| 6 | from stats import iqr_cutoff | 6 | from stats import iqr_cutoff |
| 7 | 7 | ||
| 8 | from binary_data import get_data | ||
| 9 | |||
| 8 | from math import ceil | 10 | from math import ceil |
| 9 | 11 | ||
| 10 | import numpy | 12 | import numpy |
| @@ -24,9 +26,21 @@ options = [ | |||
| 24 | o('-i', '--iqr-extent', action='store', dest='extent', type='float', | 26 | o('-i', '--iqr-extent', action='store', dest='extent', type='float', |
| 25 | help='what extent to use for outlier removal'), | 27 | help='what extent to use for outlier removal'), |
| 26 | 28 | ||
| 29 | o('-n', '--normalize', action='store_true', dest='normalize', | ||
| 30 | help='use normalize counts'), | ||
| 31 | |||
| 27 | o('-c', '--cut-off', action='store', dest='cutoff', type='int', | 32 | o('-c', '--cut-off', action='store', dest='cutoff', type='int', |
| 28 | help='max number of samples to use'), | 33 | help='max number of samples to use'), |
| 29 | 34 | ||
| 35 | o('-x', '--xmax', action='store', dest='xmax', type='int', | ||
| 36 | help='determines x-axis range'), | ||
| 37 | |||
| 38 | o('-y', '--ymax', action='store', dest='ymax', type='float', | ||
| 39 | help='determines y-axis range'), | ||
| 40 | |||
| 41 | o('-b', '--binsize', action='store', dest='binsize', type='float', | ||
| 42 | help='set binsize of histogram'), | ||
| 43 | |||
| 30 | ] | 44 | ] |
| 31 | 45 | ||
| 32 | defaults = { | 46 | defaults = { |
| @@ -39,9 +53,14 @@ defaults = { | |||
| 39 | 'cycles' : 2128, # per usec | 53 | 'cycles' : 2128, # per usec |
| 40 | 'extent' : 3, | 54 | 'extent' : 3, |
| 41 | 'cutoff' : None, | 55 | 'cutoff' : None, |
| 56 | 'normalize' : False, | ||
| 42 | 57 | ||
| 43 | # formatting options | 58 | # formatting options |
| 44 | 'binsize' : 0.25, | 59 | 'binsize' : 0.25, |
| 60 | |||
| 61 | 'xmax' : None, | ||
| 62 | 'ymax' : None, | ||
| 63 | |||
| 45 | } | 64 | } |
| 46 | 65 | ||
| 47 | 66 | ||
| @@ -55,19 +74,6 @@ TXT = { | |||
| 55 | 'TICK' : 'timer tick overhead', | 74 | 'TICK' : 'timer tick overhead', |
| 56 | } | 75 | } |
| 57 | 76 | ||
| 58 | def get_data(fname, scale, extend): | ||
| 59 | data = load_binary_file(fname) | ||
| 60 | |||
| 61 | if not scale is None: | ||
| 62 | data *= scale | ||
| 63 | |||
| 64 | data.sort() | ||
| 65 | |||
| 66 | iqr_min, iqr_max = iqr_cutoff(data, extend) | ||
| 67 | min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) | ||
| 68 | |||
| 69 | return [data, max_idx, min_idx, iqr_max, iqr_min] | ||
| 70 | |||
| 71 | 77 | ||
| 72 | def get_stats_label(samples): | 78 | def get_stats_label(samples): |
| 73 | avg = numpy.mean(samples) | 79 | avg = numpy.mean(samples) |
| @@ -114,6 +120,8 @@ class OverheadPlotter(defapp.App): | |||
| 114 | 120 | ||
| 115 | def write_histogram(self, samples, name, labels=10): | 121 | def write_histogram(self, samples, name, labels=10): |
| 116 | max = ceil(numpy.amax(samples)) | 122 | max = ceil(numpy.amax(samples)) |
| 123 | if self.options.xmax: | ||
| 124 | max = self.options.xmax | ||
| 117 | bin_size = self.options.binsize | 125 | bin_size = self.options.binsize |
| 118 | num_bins = int(max / bin_size) | 126 | num_bins = int(max / bin_size) |
| 119 | (bins, edges) = numpy.histogram(samples, bins=num_bins, | 127 | (bins, edges) = numpy.histogram(samples, bins=num_bins, |
| @@ -127,6 +135,10 @@ class OverheadPlotter(defapp.App): | |||
| 127 | cumulative += bins[i] | 135 | cumulative += bins[i] |
| 128 | data[i, 2] = cumulative | 136 | data[i, 2] = cumulative |
| 129 | 137 | ||
| 138 | if self.options.normalize: | ||
| 139 | data[:, 1] /= len(samples) | ||
| 140 | data[:, 2] /= len(samples) | ||
| 141 | |||
| 130 | label_rate = len(bins) / labels | 142 | label_rate = len(bins) / labels |
| 131 | if not label_rate: | 143 | if not label_rate: |
| 132 | label_rate = 1 | 144 | label_rate = 1 |
| @@ -135,7 +147,7 @@ class OverheadPlotter(defapp.App): | |||
| 135 | label = '%.2f' % row[0] if i % label_rate == 0 else '' | 147 | label = '%.2f' % row[0] if i % label_rate == 0 else '' |
| 136 | for_file.append([row[0], row[1], row[2], label]) | 148 | for_file.append([row[0], row[1], row[2], label]) |
| 137 | 149 | ||
| 138 | return (data, self.write(for_file, name, ext='hist')) | 150 | return (data, self.write(for_file, name, ext='hist'), edges) |
| 139 | 151 | ||
| 140 | def render(self, p): | 152 | def render(self, p): |
| 141 | if self.options.save_script: | 153 | if self.options.save_script: |
| @@ -151,10 +163,8 @@ class OverheadPlotter(defapp.App): | |||
| 151 | 163 | ||
| 152 | data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile, | 164 | data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile, |
| 153 | scale, | 165 | scale, |
| 154 | self.options.extent) | 166 | self.options.extent, |
| 155 | 167 | self.options.cutoff) | |
| 156 | if self.options.cutoff and len(data) > self.options.cutoff: | ||
| 157 | data = data[:self.options.cutoff] | ||
| 158 | 168 | ||
| 159 | samples = data[min_idx:max_idx] | 169 | samples = data[min_idx:max_idx] |
| 160 | discarded = (len(data) - len(samples)) / float(len(data)) * 100 | 170 | discarded = (len(data) - len(samples)) / float(len(data)) * 100 |
| @@ -175,20 +185,24 @@ class OverheadPlotter(defapp.App): | |||
| 175 | label(0.98, 0.95, iqr_label, | 185 | label(0.98, 0.95, iqr_label, |
| 176 | coord=['graph', 'graph'], align='right')] | 186 | coord=['graph', 'graph'], align='right')] |
| 177 | 187 | ||
| 178 | (hist, fname) = self.write_histogram(samples, name) | 188 | (hist, fname, edges) = self.write_histogram(samples, name) |
| 179 | 189 | ||
| 180 | p.setup_histogram(gap=1, boxwidth=1.0) | 190 | p.setup_histogram(gap=1, boxwidth=1.0) |
| 181 | 191 | ||
| 182 | p.title = "%s: measured %s for %s tasks per processor (host=%s)" \ | 192 | p.title = "%s: measured %s for %s tasks per processor (host=%s)" \ |
| 183 | % (conf['scheduler'], TXT[conf['overhead']], conf['n'], conf['host']) | 193 | % (conf['scheduler'], TXT[conf['overhead']], conf['n'], conf['host']) |
| 184 | 194 | ||
| 185 | p.ylabel = "number of samples" | 195 | if self.options.normalize: |
| 196 | p.ylabel = "fraction of samples" | ||
| 197 | else: | ||
| 198 | p.ylabel = "number of samples" | ||
| 186 | p.xlabel = "overhead in microseconds (bin size = %.2fus)" \ | 199 | p.xlabel = "overhead in microseconds (bin size = %.2fus)" \ |
| 187 | % self.options.binsize | 200 | % self.options.binsize |
| 188 | # p.xrange = (0, ceil(max_cost)) | 201 | |
| 202 | if self.options.ymax: | ||
| 203 | p.yrange = (0, self.options.ymax) | ||
| 204 | # p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100)) | ||
| 189 | p.xticks = (0, 10) | 205 | p.xticks = (0, 10) |
| 190 | # p.yticks = (0, 1) | ||
| 191 | p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100)) | ||
| 192 | p.curves = [curve(histogram=fname, col=2, labels_col=4)] | 206 | p.curves = [curve(histogram=fname, col=2, labels_col=4)] |
| 193 | 207 | ||
| 194 | #### Styling. | 208 | #### Styling. |
