#!/usr/bin/env python import defapp from plot import decode from dplot import sched_name from util import load_csv_file, load_binary_file, write_csv_file from stats import iqr_cutoff from binary_data import get_data from math import ceil import numpy import csv from os.path import splitext, basename from optparse import make_option as o from gnuplot import gnuplot, FORMATS, Plot, label, curve options = [ # output options o('-f', '--format', action='store', dest='format', type='choice', choices=FORMATS, help='output format'), o(None, '--save-script', action='store_true', dest='save_script'), o('-p', '--prefix', action='store', dest='prefix'), o('-i', '--iqr-extent', action='store', dest='extent', type='float', help='what extent to use for outlier removal'), o('-n', '--normalize', action='store_true', dest='normalize', help='use normalize counts'), o('-d', '--stdev', action='store_true', dest='use_std', help='use standard devation filter'), o(None, '--not-per-proc', action='store_false', dest='per_proc', help='do not show per-processor task counts'), o('-c', '--cut-off', action='store', dest='cutoff', type='int', help='max number of samples to use'), o('-t', '--take-off', action='store', dest='take_off', type='int', help='manual number of outlier samples to discard'), o('-o', '--outlier-list', action='store', dest='outlier_file', help='list of outliers to remove'), o('-x', '--xmax', action='store', dest='xmax', type='int', help='determines x-axis range'), o('-y', '--ymax', action='store', dest='ymax', type='float', help='determines y-axis range'), o('-b', '--binsize', action='store', dest='binsize', type='float', help='set binsize of histogram'), o(None, '--ylog', action='store_true', dest='ylog', help='use logarithmic y-axis'), o(None, '--cycles', action='store', dest='cycles', type='int', help='how many cycles per usec'), o(None, '--compare', action='store', dest='compare', help='plot overhead comparison', choices=['max', 'avg']), ] defaults = { # output options 'format' : 'pdf', 'save_script' : False, 'prefix' : '', # data processing 'cycles' : 2128, # per usec 'extent' : 0, 'cutoff' : None, 'take_off' : None, 'normalize' : False, 'use_std' : False, # manual outlier removal 'outlier_file' : None, 'outliers' : {}, # formatting options 'binsize' : 0.25, 'per_proc' : True, 'xmax' : None, 'ymax' : None, 'ylog' : False, 'compare' : None, } TXT = { 'RELEASE-LATENCY' : 'event latency', 'RELEASE' : 'release interrupt overhead', 'SCHED' : 'scheduling overhead', 'SCHED2' : 'post-scheduling overhead', 'CXS' : 'context-switch overhead', 'SEND-RESCHED' : 'IPI latency', 'TICK' : 'timer tick overhead', } HOST_CPUS = { 'ludwig' : 24, } def load_outliers(fname): outliers = {} for row in csv.reader(open(fname, "r")): sched = row[0] n = int(row[1]) cut = int(row[2]) if not sched in outliers: outliers[sched] = [] outliers[sched].append((n, cut)) return outliers def get_stats_label(samples): avg = numpy.mean(samples) med = numpy.median(samples) dev = numpy.std(samples) max = samples[-1] min = samples[0] return "min=%.2fus max=%.2fus avg=%.2fus median=%.2fus stdev=%.2fus" \ % (min, max, avg, med, dev) class OverheadPlotter(defapp.App): def __init__(self): defapp.App.__init__(self, options, defaults, no_std_opts=True) self.tmpfiles = [] def make_plot(self, fname=None): p = Plot() p.output = "%s%s.%s" % (self.options.prefix, fname, self.options.format) p.format = self.options.format return p def setup_png(self, plot): # standard png options; usually correct; never tweaked for paper if self.options.format == 'png': plot.font_size = 'large' plot.size = (1024, 768) plot.xticks = (0, 1) plot.yticks = (0, 0.1) plot.default_style = "linespoints" return True else: return False def write(self, data, name, ext='data'): if self.options.save_script: fname = "%s.%s" % (name, ext) write_csv_file(fname, data) return fname else: tmp = write_csv_file(None, data) # keep a reference so that it isn't deleted self.tmpfiles.append(tmp) return tmp.name def write_histogram(self, samples, name, labels=10): max = ceil(numpy.amax(samples)) if self.options.xmax: max = self.options.xmax bin_size = self.options.binsize num_bins = int(max / bin_size) (bins, edges) = numpy.histogram(samples, bins=num_bins, range=(self.options.binsize / 2, max + self.options.binsize / 2)) data = numpy.zeros((num_bins, 3)) cumulative = 0 for i in xrange(len(bins)): data[i, 0] = (edges[i] + edges[i + 1]) / 2.0 data[i, 1] = bins[i] cumulative += bins[i] data[i, 2] = cumulative if self.options.normalize: data[:, 1] /= len(samples) data[:, 2] /= len(samples) label_rate = len(bins) / labels if not label_rate: label_rate = 1 for_file = [] for i, row in enumerate(data): label = '%.2f' % row[0] if i % label_rate == 0 else '' for_file.append([row[0], row[1], row[2], label]) return (data, self.write(for_file, name, ext='hist'), edges) def render(self, p): if self.options.save_script: p.gnuplot_save(p.output + '.plot') else: p.gnuplot_exec() def plot_samples(self, datafile, name, conf): if conf['overhead'] == 'RELEASE-LATENCY': scale = 1.0 / 1000.0 else: scale = 1.0 / self.options.cycles take_off = self.options.take_off if conf['scheduler'] in self.options.outliers: n = int(conf['n']) for (i, t) in self.options.outliers[conf['scheduler']]: if i == n: take_off = t break data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile, scale, extent=self.options.extent, cutoff=self.options.cutoff, stdev=self.options.use_std, manual=take_off) samples = data[min_idx:max_idx] discarded = (len(data) - len(samples)) / float(len(data)) * 100 max_cost = data[-1] p = self.make_plot(name) samples_label = "samples: total=%d filtered=%d (%.2f%%)" % \ (len(data), len(data) - len(samples), discarded) if self.options.extent: iqr_label = "IQR: extent=%d threshold=%.2fus" % \ (self.options.extent, iqr_max) elif take_off: iqr_label = "%s outlier%s manually removed" % \ (take_off, '' if take_off == 1 else 's') elif discarded > 0: iqr_label = "manual threshold=1000us [IQR not applied]" else: iqr_label = "[IQR filter not applied]" samples_label = "samples: total=%d" % len(data) data_label = "%s\\n%s" % (samples_label, iqr_label) p.labels = [label(0.5, 0.9, get_stats_label(samples), coord=['graph', 'screen'], align='center'), label(0.98, 0.95, data_label, coord=['graph', 'graph'], align='right')] (hist, fname, edges) = self.write_histogram(samples, name) p.setup_histogram(gap=1, boxwidth=1.0) p.title = "%s: measured %s for %s task%s per processor (host=%s)" \ % (conf['scheduler'], TXT[conf['overhead']], conf['n'], 's' if conf['n'] != '1' else '', conf['host']) if self.options.normalize: p.ylabel = "fraction of samples" else: p.ylabel = "number of samples" p.xlabel = "overhead in microseconds (bin size = %.2fus)" \ % self.options.binsize if self.options.ymax: if self.options.ylog: ymin = 0.5 if not self.options.normalize else 0.0001 else: ymin = 0 p.yrange = (ymin, self.options.ymax) # p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100)) p.xticks = (0, 10) p.curves = [curve(histogram=fname, col=2, labels_col=4)] p.ylog = self.options.ylog #### Styling. if not self.setup_png(p): p.rounded_caps = True p.font = 'Helvetica' p.font_size = '10' p.size = ('20cm', '10cm') p.monochrome = False p.dashed_lines = False p.key = 'off' p.default_style = 'points lw 1' self.render(p) def prepare_trends(self, datafile, name, conf, want_avg_ymax=False): data = load_csv_file(datafile) if not self.options.per_proc and \ 'host' in conf and conf['host'] in HOST_CPUS: cpus = HOST_CPUS[conf['host']] if conf['scheduler'].endswith('-RM'): cpus -= 1 else: cpus = 1 # format n_idx = 2 wc_idx = 5 avg_idx = 6 std_idx = 9 rows = [ [r[n_idx] * cpus, r[wc_idx], r[avg_idx], r[std_idx]] for r in data] if want_avg_ymax: max_y = numpy.amax(data[:,avg_idx]) else: max_y = numpy.amax(data[:,wc_idx]) return (self.write(rows, name), cpus, max_y) def plot_trends(self, datafile, name, conf): fname, cpus, max_y = self.prepare_trends(datafile, name, conf) p = self.make_plot(name) p.title = "measured %s under %s scheduling" \ % (TXT[conf['overhead']], conf['scheduler']) p.ylabel = "overhead in microseconds" if self.options.per_proc: p.xlabel = "number of tasks per processor" else: p.xlabel = "number of tasks" p.xticks = (0, max(cpus, 10)) if self.options.xmax: p.xrange = (0, self.options.xmax) elif self.options.per_proc: p.xrange = (0.5, 20.5) p.xticks = (0, 1) else: p.xrange = (0, ceil(cpus * 20 / 100.0) * 100) if self.options.ymax: p.yrange = (0, self.options.ymax) else: p.yrange = (0, (ceil(max_y / 50.0)) * 50) p.curves = [curve(fname, xcol=1, ycol=2, title="maximum"), curve(fname, xcol=1, ycol=3, style="lines", title='average'), curve(fname, xcol=1, ycol=3, error=4, title="std. deviation")] #### Styling. if not self.setup_png(p): p.rounded_caps = True p.font = 'Helvetica' p.font_size = '10' p.size = ('20cm', '10cm') p.monochrome = False p.dashed_lines = False p.key = 'left top' p.default_style = 'linespoints lw 1' self.render(p) def plot_comparison(self, datafiles): if self.options.compare == 'max': stat = 'maximum' want_avg_ymax = False else: stat = 'average' want_avg_ymax = True plots = [] max_y = 0 cpus = 0 overheads = set() for i, datafile in enumerate(datafiles): self.out("[%d/%d] Processing %s ..." % (i + 1, len(datafiles), datafile)) bname = basename(datafile) name, ext = splitext(bname) conf = decode(name) overheads.add(conf['overhead']) fname, _cpus, _max_y = self.prepare_trends(datafile, name, conf, want_avg_ymax) max_y = max(max_y, _max_y) cpus = max(cpus, _cpus) plots.append((fname, conf)) assert len(overheads) == 1 overhead = overheads.pop() schedulers = '_'.join([conf['scheduler'] for (_, conf) in plots]) name = 'compare_%s-%s_%s' % (stat, overhead, schedulers) p = self.make_plot(name) p.title = "%s measured %s" \ % (stat, TXT[overhead]) p.ylabel = "overhead in microseconds" if self.options.per_proc: p.xlabel = "number of tasks per processor" else: p.xlabel = "number of tasks" p.xticks = (0, max(cpus, 10)) if self.options.xmax: p.xrange = (0, self.options.xmax) elif self.options.per_proc: p.xrange = (0.5, 20.5) p.xticks = (0, 1) else: p.xrange = (0, ceil(cpus * 20 / 100.0) * 100) if self.options.ymax: p.yrange = (0, self.options.ymax) else: p.yrange = (0, (ceil(max_y / 10.0)) * 10) for (fname, conf) in plots: name = sched_name(conf['scheduler']) if self.options.compare == 'max': p.curves += [ curve(fname, xcol=1, ycol=2, title=name)] else: p.curves += [ curve(fname, xcol=1, ycol=3, title=name)] # curve(fname, xcol=1, ycol=3, error=4, title='notitle')] #### Styling. marker = 'lines' if len(p.curves) > 2: p.curves[2].style = marker + " ls 4" if len(p.curves) > 3: p.curves[3].style = marker + " ls 6" p.font_size = '7' p.size = ('6in', '2.50in') p.monochrome = True #False p.dashed_lines = True #True p.key = 'top left' p.default_style = marker + ' lw 1' p.pointsize = 2 self.render(p) def plot_file(self, datafile): bname = basename(datafile) name, ext = splitext(bname) conf = decode(name) plotters = { 'taskset' : self.plot_samples, 'otrend' : self.plot_trends, } for plot_type in plotters: if plot_type in conf: try: plotters[plot_type](datafile, name, conf) except IOError as err: self.err("Skipped '%s' (%s)." % (datafile, err)) break else: self.err("Skipped '%s'; unkown experiment type." % bname) # release all tmp files self.tmpfiles = [] def default(self, _): if self.options.outlier_file: self.options.outliers = load_outliers(self.options.outlier_file) if not self.options.compare is None: self.plot_comparison(self.args) else: for i, datafile in enumerate(self.args): self.out("[%d/%d] Processing %s ..." % (i + 1, len(self.args), datafile)) self.plot_file(datafile) if __name__ == "__main__": OverheadPlotter().launch()