From 5e5fc221d60f46951a360f3e4f637e0edc084daf Mon Sep 17 00:00:00 2001 From: "Bjoern B. Brandenburg" Date: Sat, 19 Feb 2011 21:45:54 -0500 Subject: improve data and visualization mangling in oplot.py Also, avoid duplicating binary data parsing code. --- binary_data.py | 18 ++++++++++++++++-- oplot.py | 60 ++++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 53 insertions(+), 25 deletions(-) diff --git a/binary_data.py b/binary_data.py index d4be159..6108816 100644 --- a/binary_data.py +++ b/binary_data.py @@ -3,18 +3,32 @@ import numpy from util import load_binary_file from stats import iqr_remove_outliers, iqr_cutoff -def compact_file(fname, scale=None, extend=1.5): + +def get_data(fname, scale, extent, cutoff=None, maxval=1000.0): data = load_binary_file(fname) + if cutoff and len(data) > cutoff: + data = data[:cutoff] + if not scale is None: data *= scale data.sort() - iqr_min, iqr_max = iqr_cutoff(data, extend) + if extent: + iqr_min, iqr_max = iqr_cutoff(data, extent) + else: + iqr_min = 0 + iqr_max = maxval min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) + return [data, max_idx, min_idx, iqr_max, iqr_min] + + +def compact_file(*args, **kargs): + data, max_idx, min_idx, iqr_max, iqr_min = get_data(*args, **kargs) + samples = data[min_idx:max_idx] filtered = len(data) - len(samples) diff --git a/oplot.py b/oplot.py index e0b4c69..89e0ef0 100755 --- a/oplot.py +++ b/oplot.py @@ -5,6 +5,8 @@ from plot import decode from util import load_csv_file, load_binary_file, write_csv_file from stats import iqr_cutoff +from binary_data import get_data + from math import ceil import numpy @@ -24,9 +26,21 @@ options = [ o('-i', '--iqr-extent', action='store', dest='extent', type='float', help='what extent to use for outlier removal'), + o('-n', '--normalize', action='store_true', dest='normalize', + help='use normalize counts'), + o('-c', '--cut-off', action='store', dest='cutoff', type='int', help='max number of samples to use'), + o('-x', '--xmax', action='store', dest='xmax', type='int', + help='determines x-axis range'), + + o('-y', '--ymax', action='store', dest='ymax', type='float', + help='determines y-axis range'), + + o('-b', '--binsize', action='store', dest='binsize', type='float', + help='set binsize of histogram'), + ] defaults = { @@ -39,9 +53,14 @@ defaults = { 'cycles' : 2128, # per usec 'extent' : 3, 'cutoff' : None, + 'normalize' : False, # formatting options 'binsize' : 0.25, + + 'xmax' : None, + 'ymax' : None, + } @@ -55,19 +74,6 @@ TXT = { 'TICK' : 'timer tick overhead', } -def get_data(fname, scale, extend): - data = load_binary_file(fname) - - if not scale is None: - data *= scale - - data.sort() - - iqr_min, iqr_max = iqr_cutoff(data, extend) - min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) - - return [data, max_idx, min_idx, iqr_max, iqr_min] - def get_stats_label(samples): avg = numpy.mean(samples) @@ -114,6 +120,8 @@ class OverheadPlotter(defapp.App): def write_histogram(self, samples, name, labels=10): max = ceil(numpy.amax(samples)) + if self.options.xmax: + max = self.options.xmax bin_size = self.options.binsize num_bins = int(max / bin_size) (bins, edges) = numpy.histogram(samples, bins=num_bins, @@ -127,6 +135,10 @@ class OverheadPlotter(defapp.App): cumulative += bins[i] data[i, 2] = cumulative + if self.options.normalize: + data[:, 1] /= len(samples) + data[:, 2] /= len(samples) + label_rate = len(bins) / labels if not label_rate: label_rate = 1 @@ -135,7 +147,7 @@ class OverheadPlotter(defapp.App): label = '%.2f' % row[0] if i % label_rate == 0 else '' for_file.append([row[0], row[1], row[2], label]) - return (data, self.write(for_file, name, ext='hist')) + return (data, self.write(for_file, name, ext='hist'), edges) def render(self, p): if self.options.save_script: @@ -151,10 +163,8 @@ class OverheadPlotter(defapp.App): data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile, scale, - self.options.extent) - - if self.options.cutoff and len(data) > self.options.cutoff: - data = data[:self.options.cutoff] + self.options.extent, + self.options.cutoff) samples = data[min_idx:max_idx] discarded = (len(data) - len(samples)) / float(len(data)) * 100 @@ -175,20 +185,24 @@ class OverheadPlotter(defapp.App): label(0.98, 0.95, iqr_label, coord=['graph', 'graph'], align='right')] - (hist, fname) = self.write_histogram(samples, name) + (hist, fname, edges) = self.write_histogram(samples, name) p.setup_histogram(gap=1, boxwidth=1.0) p.title = "%s: measured %s for %s tasks per processor (host=%s)" \ % (conf['scheduler'], TXT[conf['overhead']], conf['n'], conf['host']) - p.ylabel = "number of samples" + if self.options.normalize: + p.ylabel = "fraction of samples" + else: + p.ylabel = "number of samples" p.xlabel = "overhead in microseconds (bin size = %.2fus)" \ % self.options.binsize -# p.xrange = (0, ceil(max_cost)) + + if self.options.ymax: + p.yrange = (0, self.options.ymax) +# p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100)) p.xticks = (0, 10) -# p.yticks = (0, 1) - p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100)) p.curves = [curve(histogram=fname, col=2, labels_col=4)] #### Styling. -- cgit v1.2.2