From 6f06d564aaf93c89406cd3ff52bae03432cce2ae Mon Sep 17 00:00:00 2001
From: "Bjoern B. Brandenburg" <bbb@cs.unc.edu>
Date: Fri, 18 Feb 2011 16:41:26 -0500
Subject: add tool for plotting raw overhead distributions

---
 oplot.py | 215 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 215 insertions(+)
 create mode 100755 oplot.py

(limited to 'oplot.py')

diff --git a/oplot.py b/oplot.py
new file mode 100755
index 0000000..2f7f0cb
--- /dev/null
+++ b/oplot.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python
+import defapp
+
+from plot import decode
+from util import load_csv_file, load_binary_file, write_csv_file
+from stats import iqr_cutoff
+
+from math import ceil
+
+import numpy
+
+from os.path  import splitext, basename
+from optparse import make_option as o
+
+from gnuplot  import gnuplot, FORMATS, Plot, label, curve
+
+options = [
+    # output options
+    o('-f', '--format', action='store', dest='format', type='choice',
+      choices=FORMATS, help='output format'),
+    o(None, '--save-script', action='store_true', dest='save_script'),
+    o('-p', '--prefix', action='store', dest='prefix'),
+
+    o('-i', '--iqr-extent', action='store', dest='extent', type='float',
+      help='what extent to use for outlier removal'),
+
+    ]
+
+defaults = {
+    # output options
+    'format' : 'pdf',
+    'save_script' : False,
+    'prefix' : '',
+
+    # data processing
+    'cycles' : 2128, # per usec
+    'extent' : 3,
+
+    # formatting options
+    'binsize' : 0.25,
+    }
+
+
+def get_data(fname, scale, extend):
+    data = load_binary_file(fname)
+
+    if not scale is None:
+        data *= scale
+
+    data.sort()
+
+    iqr_min, iqr_max = iqr_cutoff(data, extend)
+    min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max])
+
+    return [data, max_idx, min_idx, iqr_max, iqr_min]
+
+
+def get_stats_label(samples):
+    avg = numpy.mean(samples)
+    med = numpy.median(samples)
+    dev = numpy.std(samples)
+    max = samples[-1]
+    min = samples[0]
+    return "min=%.2fus  max=%.2fus  avg=%.2fus  median=%.2fus  stdev=%.2fus" \
+        % (min, max, avg, med, dev)
+
+class OverheadPlotter(defapp.App):
+    def __init__(self):
+        defapp.App.__init__(self, options, defaults, no_std_opts=True)
+        self.tmpfiles = []
+
+    def make_plot(self, fname=None):
+        p = Plot()
+        p.output = "%s%s.%s" % (self.options.prefix, fname, self.options.format)
+        p.format = self.options.format
+        return p
+
+    def setup_png(self, plot):
+        # standard png options; usually correct; never tweaked for paper
+        if self.options.format == 'png':
+            plot.font_size = 'large'
+            plot.size   = (1024, 768)
+            plot.xticks = (0, 1)
+            plot.yticks = (0, 0.1)
+            plot.default_style = "linespoints"
+            return True
+        else:
+            return False
+
+    def write(self, data, name, ext='data'):
+        if self.options.save_script:
+            fname = "%s.%s" % (name, ext)
+            write_csv_file(fname, data)
+            return fname
+        else:
+            tmp = write_csv_file(None, data)
+            # keep a reference so that it isn't deleted
+            self.tmpfiles.append(tmp)
+            return tmp.name
+
+    def write_histogram(self, samples, name, labels=10):
+        max = ceil(numpy.amax(samples))
+        bin_size = self.options.binsize
+        num_bins  = int(max / bin_size)
+        (bins, edges) = numpy.histogram(samples, bins=num_bins,
+                                        range=(self.options.binsize / 2,
+                                               max + self.options.binsize / 2))
+        data = numpy.zeros((num_bins, 3))
+        cumulative = 0
+        for i in xrange(len(bins)):
+            data[i, 0]  = (edges[i] + edges[i + 1]) / 2.0
+            data[i, 1]  = bins[i]
+            cumulative += bins[i]
+            data[i, 2]  = cumulative
+
+        label_rate = len(bins) / labels
+        if not label_rate:
+            label_rate = 1
+        for_file = []
+        for i, row in enumerate(data):
+            label = '%.2f' % row[0] if i % label_rate == 0 else ''
+            for_file.append([row[0], row[1], row[2], label])
+
+        return (data, self.write(for_file, name, ext='hist'))
+
+    def render(self, p):
+        if self.options.save_script:
+            p.gnuplot_save(p.output + '.plot')
+        else:
+            p.gnuplot_exec()
+
+    def plot_samples(self, datafile, name, conf):
+        if conf['overhead'] == 'RELEASE-LATENCY':
+            scale = 1.0 / 1000.0
+        else:
+            scale = 1.0 / self.options.cycles
+
+        data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile,
+                                                            scale,
+                                                            self.options.extent)
+        samples = data[min_idx:max_idx]
+        discarded = (len(data) - len(samples)) / float(len(data)) * 100
+        max_cost = data[-1]
+
+        p = self.make_plot(name)
+
+        iqr_label = "IQR: extent=%d threshold=%.2fus filtered=%.2f%%" % \
+            (self.options.extent, iqr_max, discarded)
+
+        p.labels = [label(0.5, 0.9,
+                          get_stats_label(samples),
+                          coord=['graph', 'screen'], align='center'),
+                    label(0.98, 0.95, iqr_label,
+                          coord=['graph', 'graph'], align='right')]
+
+        (hist, fname) = self.write_histogram(samples, name)
+
+        p.setup_histogram(gap=1, boxwidth=1.0)
+
+        p.title = "measured overheads scheduler=%s; overhead=%s;  host=%s" \
+            % (conf['scheduler'], conf['overhead'], conf['host'])
+
+        p.ylabel = "number of samples"
+        p.xlabel = "overhead in microseconds (bin size = %.2fus)" \
+            % self.options.binsize
+#            p.xrange = (0, ceil(max_cost))
+        p.xticks = (0, 10)
+#            p.yticks = (0, 1)
+        p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100))
+        p.curves = [curve(histogram=fname, col=2, labels_col=4)]
+
+        #### Styling.
+
+        if not self.setup_png(p):
+            p.rounded_caps = True
+            p.font = 'Helvetica'
+
+            p.font_size = '10'
+            p.size = ('20cm', '10cm')
+            p.monochrome   = False
+            p.dashed_lines = False
+            p.key = 'off'
+            p.default_style = 'points lw 1'
+
+        self.render(p)
+
+
+    def plot_file(self, datafile):
+        bname     = basename(datafile)
+        name, ext = splitext(bname)
+        conf      = decode(name)
+        plotters = {
+            'taskset'   : self.plot_samples,
+            }
+
+        for plot_type in plotters:
+            if plot_type in conf:
+                try:
+                    plotters[plot_type](datafile, name, conf)
+                except IOError as err:
+                    self.err("Skipped '%s' (%s)." % err)
+                break
+        else:
+            self.err("Skipped '%s'; unkown experiment type."
+                     % bname)
+        # release all tmp files
+        self.tmpfiles = []
+
+    def default(self, _):
+        for i, datafile in enumerate(self.args):
+            self.out("[%d/%d] Processing %s ..." % (i + 1, len(self.args), datafile))
+            self.plot_file(datafile)
+
+if __name__ == "__main__":
+    OverheadPlotter().launch()
-- 
cgit v1.2.2