From 6f06d564aaf93c89406cd3ff52bae03432cce2ae Mon Sep 17 00:00:00 2001 From: "Bjoern B. Brandenburg" Date: Fri, 18 Feb 2011 16:41:26 -0500 Subject: add tool for plotting raw overhead distributions --- oplot.py | 215 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100755 oplot.py (limited to 'oplot.py') diff --git a/oplot.py b/oplot.py new file mode 100755 index 0000000..2f7f0cb --- /dev/null +++ b/oplot.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python +import defapp + +from plot import decode +from util import load_csv_file, load_binary_file, write_csv_file +from stats import iqr_cutoff + +from math import ceil + +import numpy + +from os.path import splitext, basename +from optparse import make_option as o + +from gnuplot import gnuplot, FORMATS, Plot, label, curve + +options = [ + # output options + o('-f', '--format', action='store', dest='format', type='choice', + choices=FORMATS, help='output format'), + o(None, '--save-script', action='store_true', dest='save_script'), + o('-p', '--prefix', action='store', dest='prefix'), + + o('-i', '--iqr-extent', action='store', dest='extent', type='float', + help='what extent to use for outlier removal'), + + ] + +defaults = { + # output options + 'format' : 'pdf', + 'save_script' : False, + 'prefix' : '', + + # data processing + 'cycles' : 2128, # per usec + 'extent' : 3, + + # formatting options + 'binsize' : 0.25, + } + + +def get_data(fname, scale, extend): + data = load_binary_file(fname) + + if not scale is None: + data *= scale + + data.sort() + + iqr_min, iqr_max = iqr_cutoff(data, extend) + min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) + + return [data, max_idx, min_idx, iqr_max, iqr_min] + + +def get_stats_label(samples): + avg = numpy.mean(samples) + med = numpy.median(samples) + dev = numpy.std(samples) + max = samples[-1] + min = samples[0] + return "min=%.2fus max=%.2fus avg=%.2fus median=%.2fus stdev=%.2fus" \ + % (min, max, avg, med, dev) + +class OverheadPlotter(defapp.App): + def __init__(self): + defapp.App.__init__(self, options, defaults, no_std_opts=True) + self.tmpfiles = [] + + def make_plot(self, fname=None): + p = Plot() + p.output = "%s%s.%s" % (self.options.prefix, fname, self.options.format) + p.format = self.options.format + return p + + def setup_png(self, plot): + # standard png options; usually correct; never tweaked for paper + if self.options.format == 'png': + plot.font_size = 'large' + plot.size = (1024, 768) + plot.xticks = (0, 1) + plot.yticks = (0, 0.1) + plot.default_style = "linespoints" + return True + else: + return False + + def write(self, data, name, ext='data'): + if self.options.save_script: + fname = "%s.%s" % (name, ext) + write_csv_file(fname, data) + return fname + else: + tmp = write_csv_file(None, data) + # keep a reference so that it isn't deleted + self.tmpfiles.append(tmp) + return tmp.name + + def write_histogram(self, samples, name, labels=10): + max = ceil(numpy.amax(samples)) + bin_size = self.options.binsize + num_bins = int(max / bin_size) + (bins, edges) = numpy.histogram(samples, bins=num_bins, + range=(self.options.binsize / 2, + max + self.options.binsize / 2)) + data = numpy.zeros((num_bins, 3)) + cumulative = 0 + for i in xrange(len(bins)): + data[i, 0] = (edges[i] + edges[i + 1]) / 2.0 + data[i, 1] = bins[i] + cumulative += bins[i] + data[i, 2] = cumulative + + label_rate = len(bins) / labels + if not label_rate: + label_rate = 1 + for_file = [] + for i, row in enumerate(data): + label = '%.2f' % row[0] if i % label_rate == 0 else '' + for_file.append([row[0], row[1], row[2], label]) + + return (data, self.write(for_file, name, ext='hist')) + + def render(self, p): + if self.options.save_script: + p.gnuplot_save(p.output + '.plot') + else: + p.gnuplot_exec() + + def plot_samples(self, datafile, name, conf): + if conf['overhead'] == 'RELEASE-LATENCY': + scale = 1.0 / 1000.0 + else: + scale = 1.0 / self.options.cycles + + data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile, + scale, + self.options.extent) + samples = data[min_idx:max_idx] + discarded = (len(data) - len(samples)) / float(len(data)) * 100 + max_cost = data[-1] + + p = self.make_plot(name) + + iqr_label = "IQR: extent=%d threshold=%.2fus filtered=%.2f%%" % \ + (self.options.extent, iqr_max, discarded) + + p.labels = [label(0.5, 0.9, + get_stats_label(samples), + coord=['graph', 'screen'], align='center'), + label(0.98, 0.95, iqr_label, + coord=['graph', 'graph'], align='right')] + + (hist, fname) = self.write_histogram(samples, name) + + p.setup_histogram(gap=1, boxwidth=1.0) + + p.title = "measured overheads scheduler=%s; overhead=%s; host=%s" \ + % (conf['scheduler'], conf['overhead'], conf['host']) + + p.ylabel = "number of samples" + p.xlabel = "overhead in microseconds (bin size = %.2fus)" \ + % self.options.binsize +# p.xrange = (0, ceil(max_cost)) + p.xticks = (0, 10) +# p.yticks = (0, 1) + p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100)) + p.curves = [curve(histogram=fname, col=2, labels_col=4)] + + #### Styling. + + if not self.setup_png(p): + p.rounded_caps = True + p.font = 'Helvetica' + + p.font_size = '10' + p.size = ('20cm', '10cm') + p.monochrome = False + p.dashed_lines = False + p.key = 'off' + p.default_style = 'points lw 1' + + self.render(p) + + + def plot_file(self, datafile): + bname = basename(datafile) + name, ext = splitext(bname) + conf = decode(name) + plotters = { + 'taskset' : self.plot_samples, + } + + for plot_type in plotters: + if plot_type in conf: + try: + plotters[plot_type](datafile, name, conf) + except IOError as err: + self.err("Skipped '%s' (%s)." % err) + break + else: + self.err("Skipped '%s'; unkown experiment type." + % bname) + # release all tmp files + self.tmpfiles = [] + + def default(self, _): + for i, datafile in enumerate(self.args): + self.out("[%d/%d] Processing %s ..." % (i + 1, len(self.args), datafile)) + self.plot_file(datafile) + +if __name__ == "__main__": + OverheadPlotter().launch() -- cgit v1.2.2