From 4c52a0fd68f117669f79cdd89848db3581349c0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20B=2E=20Brandenburg?= Date: Fri, 26 Mar 2010 14:56:12 -0400 Subject: Switch to numpy-based loading and plot outlier thresholds. --- plot_pm2.py | 50 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'plot_pm2.py') diff --git a/plot_pm2.py b/plot_pm2.py index d7d6536..866b4b2 100755 --- a/plot_pm2.py +++ b/plot_pm2.py @@ -1,10 +1,13 @@ #!/usr/bin/env python -import defapp from os.path import splitext, basename from optparse import make_option as o from tempfile import NamedTemporaryFile as Tmp -import csv +import numpy as np +from util import load_csv_file, select + +import stats +import defapp from plot import decode from gnuplot import gnuplot, FORMATS @@ -80,6 +83,7 @@ options = [ o(None, '--paper', action='store_true', dest='paper'), o(None, '--wide', action='store_true', dest='wide'), o(None, '--split', action='store_true', dest='split'), + o(None, '--extend', action='store', type='float', dest='extend'), ] defaults = { @@ -87,17 +91,16 @@ defaults = { 'paper' : False, 'split' : False, 'wide' : False, + 'extend' : 1.5, } def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True): - for row in data: - fcpu = int(row[PMO_FROM_CPU]) - tcpu = int(row[PMO_TO_CPU]) - if cpu_filter(fcpu, tcpu): - if ycol2 is None: - yield (row[xcol], cast(row[ycol1])) - else: - yield (row[xcol], cast(row[ycol1]) - cast(row[ycol2])) + def matching_cpus(row): + return cpu_filter(row[PMO_FROM_CPU], row[PMO_TO_CPU]) + rows = select(matching_cpus, data) + if not (ycol2 is None): + rows[:,ycol1] -= rows[:,ycol2] + return rows[:,(xcol, ycol1)] class CyclePlotter(defapp.App): def __init__(self): @@ -108,19 +111,20 @@ class CyclePlotter(defapp.App): if host in MACHINE_TOPOLOGY: (cpus, hier) = MACHINE_TOPOLOGY[host] plots = [] - data = list(csv.reader(open(datafile))) + data = load_csv_file(datafile, dtype=int) for (xcol, ycol, yminus, by_mem_hierarchy) in PMO_SUBPLOTS: sub = [('all', lambda x, y: True)] if by_mem_hierarchy: sub += hier for tag, test in sub: tmp = Tmp() - for row in extract_cols(data, - xcol, ycol, yminus, - cpu_filter=test): - tmp.write("%s, %s\n" % row) + rows = extract_cols(data, + xcol, ycol, yminus, + cpu_filter=test) + for row in rows: + tmp.write("%s, %s\n" % (row[0], row[1])) tmp.flush() - plots.append((tmp, xcol, ycol, yminus, tag)) + plots.append((tmp, xcol, ycol, yminus, tag, rows)) return plots else: self.err('Unkown host: %s' % host) @@ -131,7 +135,7 @@ class CyclePlotter(defapp.App): if plots is None: print "Skipping %s..." % datafile return - for (tmp, xcol, ycol, yminus, tag) in plots: + for (tmp, xcol, ycol, yminus, tag, rows) in plots: xtag = PMO_COL_LABEL[xcol][1] ytag = PMO_COL_LABEL[ycol][1] dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" @@ -143,10 +147,20 @@ class CyclePlotter(defapp.App): title = "%s" % ylabel if ycol == 10: title += " from %s" % PMO_MEM[tag] + title += "\\n" for key in conf: if key in PMO_PARAM: title += " %s=%s" % (PMO_PARAM[key], conf[key]) - gnuplot([(tmp.name, 1, 2, ylabel)], + graphs = [(tmp.name, 1, 2, ylabel)] + # plot cutoff + (s, lo, hi) = stats.iqr(rows[:,1]) + lo -= s * self.options.extend + hi += s * self.options.extend + m99 = stats.cutoff_max(rows[:, 1]) + graphs += [(lo, 'IQR cutoff (%d)' % lo, 'line'), + (hi, 'IQR cutoff (%d)' % hi, 'line'), + (m99,'99%% cutoff (%d)' % m99, 'line lw 2')] + gnuplot(graphs, xlabel="%s (%s)" % (xlabel, xunit), ylabel="%s (%s)" % ("access cost" if yminus is None else "delta to %s" % PMO_COL_LABEL[yminus][0], -- cgit v1.2.2