From 031d41687127b7eb074229dbc114eb52340472c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20B=2E=20Brandenburg?= Date: Sat, 27 Mar 2010 20:10:00 -0400 Subject: More work on aggregate plotting. --- plot_pm2.py | 196 +++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 175 insertions(+), 21 deletions(-) (limited to 'plot_pm2.py') diff --git a/plot_pm2.py b/plot_pm2.py index 3c0174d..1332825 100755 --- a/plot_pm2.py +++ b/plot_pm2.py @@ -4,9 +4,10 @@ from optparse import make_option as o from tempfile import NamedTemporaryFile as Tmp from collections import defaultdict +from itertools import izip import numpy as np -from util import load_csv_file, select +from util import * import stats import defapp @@ -70,10 +71,14 @@ PMO_AGGR_SUBPLOTS = [ (0, 8, None, False), (0, 9, None, False), (0, 10, None, True), - (0, 10, 6, True), - (0, 10, 7, True), +# (0, 10, 6, True), +# (0, 10, 7, True), +# (0, 10, 8, True), (0, 10, 9, True), - (0, 10, 8, True), +] + +PMO_AGGR_COMBINE = [ + [(6, 'all'), (7, 'all'), (8, 'all'), (9, 'all')] ] PMO_COL_LABEL = [('measurement', 'sample', 'index'), @@ -100,6 +105,7 @@ options = [ o(None, '--split', action='store_true', dest='split'), o(None, '--extend', action='store', type='float', dest='extend'), o(None, '--aggregate', action='store_true', dest='aggregate'), + o('-c', '--cycles-per-usec', action='store', type='float', dest='cycles_per_usec'), ] defaults = { @@ -109,6 +115,7 @@ defaults = { 'wide' : False, 'aggregate' : False, 'extend' : 1.5, + 'cycles_per_usec' : None, } def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True): @@ -149,6 +156,8 @@ class CyclePlotter(defapp.App): # by tag -> by wcycle -> list of data points) by_tag = defaultdict(lambda: defaultdict(list)) + host = None + for i, datafile in enumerate(datafiles): print '[%d/%d] Processing %s...' % (i + 1, len(datafiles), datafile) bname = basename(datafile) @@ -162,9 +171,14 @@ class CyclePlotter(defapp.App): if plots is None: print "Skipping %s..." % datafile return + if not host: + host = conf['host'] + if host != conf['host']: + self.err('Mixing data from two hosts! (%s, %s)' % (host, conf['host'])) + self.err('Aborting.') + return wss = int(conf['wss']) wcycle = int(conf['wcycle']) - host = conf['host'] for (rows, xcol, ycol, yminus, tag) in plots: clean = stats.iqr_remove_outliers(rows, extend=self.options.extend) vals = clean[:,1] @@ -173,28 +187,64 @@ class CyclePlotter(defapp.App): wc = np.max(vals) n = len(vals) - xtag = PMO_COL_LABEL[xcol][1] - ytag = PMO_COL_LABEL[ycol][1] - dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" - code = "code=%s-%s-%s-%s" % \ - (xcol, ycol, yminus, tag) - figname = "host=%s_%s%s-vs-%s_%s_%s" % \ - (host, ytag, dtag, xtag, tag, code) - by_tag[figname][wcycle].append((wss, avg, std, wc, n, len(rows) - n)) + key = (xcol, ycol, yminus, tag) + by_tag[key][wcycle].append((wss, avg, std, wc, n, len(rows) - n)) del plots else: self.err("Warning: '%s' is not a PMO experiment; skipping." % bname) - for figname in by_tag: - for wcycle in by_tag[figname]: - data = by_tag[figname][wcycle] + all_wss = set() + all_wcycle = set() + + for key in by_tag: + for wcycle in by_tag[key]: + all_wcycle.add(wcycle) + + data = by_tag[key][wcycle] # sort by increasing WSS data.sort(key=lambda row: row[0]) - f = open('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), 'w') for row in data: - f.write(", ".join([str(x) for x in row])) - f.write('\n') - f.close() + all_wss.add(row[0]) + + (xcol, ycol, yminus, tag) = key + + xtag = PMO_COL_LABEL[xcol][1] + ytag = PMO_COL_LABEL[ycol][1] + dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" + code = "code=%s-%s-%s-%s" % key + figname = "host=%s_%s%s-vs-%s_%s_%s" % \ + (host, ytag, dtag, xtag, tag, code) + + write_csv_file('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), data) + + + mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]] + + for wcycle in all_wcycle: + try: + rows = [[wss] for wss in sorted(all_wss)] + header = ['wss'] + for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS: + tags = ['all'] + if split: + tags += mems + for tag in tags: + col_name = "%s %s" % (PMO_COL_LABEL[ycol][1], tag) + if not yminus is None: + col_name += ' - ' + PMO_COL_LABEL[yminus][1] + header += [col_name + " avg", col_name + " std", col_name + " wc"] + key = (x, y, yminus, tag) + data = by_tag[key][wcycle] + for r, d in izip(rows, data): + if r[0] != d[0]: + print "mismatch", r[0], d[0], key, wcycle + assert r[0] == d[0] # working set size must match + r += d[1:4] # (average, std, wc) + write_csv_file('pmo-all_wcycle=%d_host=%s.csv' % (wcycle, host), + rows, header, width=max([len(h) for h in header])) + except AssertionError: + self.err("Data missing for wcycle=%d!" % wcycle) + def plot_preempt_migrate(self, datafile, name, conf): plots = self.setup_pmo_graphs(datafile, conf) @@ -259,7 +309,7 @@ class CyclePlotter(defapp.App): xtag = PMO_COL_LABEL[xcol][1] ytag = PMO_COL_LABEL[ycol][1] dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" - figname = "%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag) + figname = name #"%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag) xunit = PMO_COL_LABEL[xcol][2] yunit = PMO_COL_LABEL[ycol][2] ylabel = PMO_COL_LABEL[ycol][0] @@ -282,10 +332,112 @@ class CyclePlotter(defapp.App): ] xlabel = "working set size (kilobytes)" + yrange = (4096, 2**26) if yminus is None else None + gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=figname, + yrange=yrange, logscale="xy 2" if yminus is None else "x 2", format=self.options.format) + def plot_pmo_all(self, datafile, name, conf): + host = conf['host'] + mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]] + columns = [] + idx = 2 + for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS: + tags = ['all'] + if split: + tags += mems + for tag in tags: + columns.append((x, y, yminus, tag, idx)) + idx += 3 + + data = load_csv_file(datafile) + if self.options.cycles_per_usec: + yunit = "(us)" + data[:, 1:] /= self.options.cycles_per_usec + else: + yunit = "(cycles)" + tmp = write_csv_file(None, data) + + rw = int(conf['wcycle']) + rw = 1.0 / rw * 100 if rw != 0 else 0 + + # raw measures + for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]: + graphs = [] + for (x, y, yminus, tag, idx) in columns: + if yminus is None: + label = PMO_COL_LABEL[y][0] + if y == 10: + label += " from %s" % PMO_MEM[tag] + graphs += [ + (tmp.name, 1, idx + offset, label), + ] + xlabel = "working set size (kilobytes)" + ylabel = "time to complete access " + yunit + title = "measured %s WSS access time (%.2f%% writes)" % (long, rw) + yrange = None #(4096, 2**26) + + fname = "%s_full_%s" % (name, kind) + gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname, + yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format) + + # per-sample delta measures + for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]: + graphs = [] + for (x, y, yminus, tag, idx) in columns: + if not (yminus is None) and tag != 'all': + label = "%s" % PMO_MEM[tag] + graphs += [ + (tmp.name, 1, idx + offset, label), + ] + xlabel = "working set size (kilobytes)" + ylabel = "per-sample delta to hot access " + yunit + title = "measured %s overhead (%.2f%% writes)" % (long, rw) + yrange = None + + fname = "%s_delta_%s" % (name, kind) + gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname, + yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format) + del tmp + + # stats delta + # find hot column + col = None + for (x, y, yminus, tag, idx) in columns: + if x == 0 and y == 9 and yminus is None and tag == 'all': + col = idx + break + # normalize based on third hot access + # +1/-1 to get zero-based indices; Gnuplot wants 1-based indices + hot_avg = data[:,col - 1].copy() + hot_wc = data[:,col + 1].copy() + for (x, y, yminus, tag, idx) in columns: + data[:,idx - 1] -= hot_avg + data[:,idx + 1] -= hot_wc + + tmp = write_csv_file(None, data) + + for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]: + graphs = [] + for (x, y, yminus, tag, idx) in columns: + if yminus is None and tag != 'all': + label = PMO_COL_LABEL[y][0] + label = PMO_MEM[tag] + graphs += [ + (tmp.name, 1, idx + offset, label), + ] + xlabel = "working set size (kilobytes)" + ylabel = "delta to third hot access " + yunit + title = "difference of %s access costs (%.2f%% writes)" % (long, rw) + yrange = None + + fname = "%s_diff_%s" % (name, kind) + gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname, + yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format) + del tmp + def plot_file(self, datafile): bname = basename(datafile) name, ext = splitext(bname) @@ -297,6 +449,8 @@ class CyclePlotter(defapp.App): self.plot_preempt_migrate(datafile, name, conf) elif 'pmo-aggr' in conf: self.plot_pmo_aggr(datafile, name, conf) + elif 'pmo-all' in conf: + self.plot_pmo_all(datafile, name, conf) else: self.err("Skipped '%s'; unkown experiment type." % bname) -- cgit v1.2.2