#!/usr/bin/env python from os.path import splitext, basename from optparse import make_option as o from tempfile import NamedTemporaryFile as Tmp from collections import defaultdict from itertools import izip import numpy as np from util import * import stats import defapp from plot import decode from gnuplot import gnuplot, FileGraph, FORMATS def ludwig_l2(x, y): # x left column, y right column, or # y left column, x, right column return (x % 8 < 4 and x + 4 == y) or \ (y % 8 < 4 and x - 4 == y) def ludwig_l3(x, y): # same socket # not a a shared L2 # not identical return (y % 4) == (x % 4) and \ not ludwig_l2(x, y) and \ x != y MACHINE_TOPOLOGY = { 'jupiter-cs' : (4, [('preempt', lambda x, y: x == y), ('mem', lambda x, y: x != y)]), # Socket0 Socket1 Socket2 Socket3 # ------ ------- ------- ------- # | 0, 4| | 1, 5| | 2, 6| | 3, 7| # | 8,12| | 9,13| |10,14| |11,15| # |16,20| |17,21| |18,22| |19,23| # ------- ------- ------- ------- 'ludwig.cs.unc.edu' : (24, [('preempt', lambda x, y: x == y), ('l2', ludwig_l2), ('l3', ludwig_l3), ('mem', lambda x, y: abs(y - x) % 4 != 0)]) } PMO_PARAM = { 'wss' : 'WSS', 'host' : 'host', 'wcycle' : 'write-cycle' } PMO_MEM = { 'mem' : 'a migration through main memory', 'l3' : 'a migration through a shared L3 cache', 'l2' : 'a migration through a shared L2 cache', 'preempt' : 'a preemption', 'all' : 'either a migration or preemption', } PMO_SUBPLOTS = [ # x, y, y-delta, split according to mem-hierarchy? (0, 6, None, False), (0, 7, None, False), (0, 8, None, False), (0, 9, None, False), (0, 10, None, True), (3, 10, None, True), (0, 10, 9, True), (3, 10, 9, True), ] PMO_AGGR_SUBPLOTS = [ # x, y, y-delta, split according to mem-hierarchy? (0, 6, None, False), (0, 7, None, False), (0, 8, None, False), (0, 9, None, False), (0, 10, None, True), # (0, 10, 6, True), # (0, 10, 7, True), # (0, 10, 8, True), (0, 10, 9, True), (0, 8, 7, False), # difference of second to first hot access (0, 9, 8, False), # difference of third to second hot access ] PMO_AGGR_COMBINE = [ [(6, 'all'), (7, 'all'), (8, 'all'), (9, 'all')] ] PMO_COL_LABEL = [('measurement', 'sample', 'index'), ('write cycles', 'wcycle', 'every nth access'), ('WSS', 'wcc', 'kilobytes'), ('suspension length', 'delay', 'microseconds'), ('CPU (preempted on)', 'from', 'processor'), ('CPU (resumed on)', 'to', 'processor'), ('cold access', 'cold', 'cycles'), ('first hot access', 'hot1', 'cycles'), ('second hot access', 'hot2', 'cycles'), ('third hot access', 'hot3', 'cycles'), ('access after resuming', 'after', 'cycles') ] PMO_FROM_CPU = 4 PMO_TO_CPU = 5 options = [ o('-f', '--format', action='store', dest='format', type='choice', choices=FORMATS, help='output format'), o(None, '--paper', action='store_true', dest='paper'), o(None, '--wide', action='store_true', dest='wide'), o(None, '--split', action='store_true', dest='split'), o(None, '--log-y', action='store_true', dest='logy'), o(None, '--errorbar', action='store_true', dest='errbar'), o(None, '--extend', action='store', type='float', dest='extend'), o(None, '--aggregate', action='store_true', dest='aggregate'), o('-c', '--cycles-per-usec', action='store', type='float', dest='cycles_per_usec'), ] defaults = { 'format' : 'show', 'paper' : False, 'split' : False, 'wide' : False, 'aggregate' : False, 'extend' : 1.5, 'cycles_per_usec' : None, 'logy' : False, 'errbar' : False, } def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True): def matching_cpus(row): return cpu_filter(row[PMO_FROM_CPU], row[PMO_TO_CPU]) rows = select(matching_cpus, data) if not (ycol2 is None): rows[:,ycol1] -= rows[:,ycol2] return rows[:,(xcol, ycol1)] class CyclePlotter(defapp.App): def __init__(self): defapp.App.__init__(self, options, defaults, no_std_opts=True) self.aggregate_data = [] def setup_pmo_graphs(self, datafile, conf, subplots=PMO_SUBPLOTS): host = conf['host'] if host in MACHINE_TOPOLOGY: (cpus, hier) = MACHINE_TOPOLOGY[host] plots = [] data = load_csv_file(datafile, dtype=int) for (xcol, ycol, yminus, by_mem_hierarchy) in subplots: sub = [('all', lambda x, y: True)] if by_mem_hierarchy: sub += hier for tag, test in sub: rows = extract_cols(data, xcol, ycol, yminus, cpu_filter=test) plots.append((rows, xcol, ycol, yminus, tag)) return plots else: self.err('Unkown host: %s' % host) return None def write_aggregate(self, datafiles): # (wss, avg, wc, #avg, #wc) # by tag -> by wcycle -> list of data points) by_tag = defaultdict(lambda: defaultdict(list)) host = None for i, datafile in enumerate(datafiles): print '[%d/%d] Processing %s...' % (i + 1, len(datafiles), datafile) bname = basename(datafile) name, ext = splitext(bname) if ext != '.csv': self.err("Warning: '%s' doesn't look like a CSV file." % bname) conf = decode(name) if 'pmo' in conf: plots = self.setup_pmo_graphs(datafile, conf, PMO_AGGR_SUBPLOTS) if plots is None: print "Skipping %s..." % datafile return if not host: host = conf['host'] if host != conf['host']: self.err('Mixing data from two hosts! (%s, %s)' % (host, conf['host'])) self.err('Aborting.') return wss = int(conf['wss']) wcycle = int(conf['wcycle']) for (rows, xcol, ycol, yminus, tag) in plots: clean = stats.iqr_remove_outliers(rows, extend=self.options.extend) vals = clean[:,1] avg = np.mean(vals) std = np.std(vals, ddof=1) wc = np.max(vals) n = len(vals) key = (xcol, ycol, yminus, tag) by_tag[key][wcycle].append((wss, avg, std, wc, n, len(rows) - n)) del plots else: self.err("Warning: '%s' is not a PMO experiment; skipping." % bname) all_wss = set() all_wcycle = set() for key in by_tag: for wcycle in by_tag[key]: all_wcycle.add(wcycle) data = by_tag[key][wcycle] # sort by increasing WSS data.sort(key=lambda row: row[0]) for row in data: all_wss.add(row[0]) (xcol, ycol, yminus, tag) = key xtag = PMO_COL_LABEL[xcol][1] ytag = PMO_COL_LABEL[ycol][1] dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" code = "code=%s-%s-%s-%s" % key figname = "host=%s_%s%s-vs-%s_%s_%s" % \ (host, ytag, dtag, xtag, tag, code) write_csv_file('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), data) mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]] for wcycle in all_wcycle: try: rows = [[wss] for wss in sorted(all_wss)] header = ['wss'] for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS: tags = ['all'] if split: tags += mems for tag in tags: col_name = "%s %s" % (PMO_COL_LABEL[ycol][1], tag) if not yminus is None: col_name += ' - ' + PMO_COL_LABEL[yminus][1] header += [col_name + " avg", col_name + " std", col_name + " wc"] key = (x, y, yminus, tag) data = by_tag[key][wcycle] for r, d in izip(rows, data): if r[0] != d[0]: print "mismatch", r[0], d[0], key, wcycle assert r[0] == d[0] # working set size must match r += d[1:4] # (average, std, wc) write_csv_file('pmo-all_wcycle=%d_host=%s.csv' % (wcycle, host), rows, header, width=max([len(h) for h in header])) except AssertionError: self.err("Data missing for wcycle=%d!" % wcycle) def plot_preempt_migrate(self, datafile, name, conf): plots = self.setup_pmo_graphs(datafile, conf) if plots is None: print "Skipping %s..." % datafile return else: print 'Plotting %s...' % datafile for (rows, xcol, ycol, yminus, tag) in plots: # Write it to a temp file. tmp = Tmp() for row in rows: tmp.write("%s, %s\n" % (row[0], row[1])) tmp.flush() xtag = PMO_COL_LABEL[xcol][1] ytag = PMO_COL_LABEL[ycol][1] dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" figname = "%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag) xunit = PMO_COL_LABEL[xcol][2] yunit = PMO_COL_LABEL[ycol][2] ylabel = PMO_COL_LABEL[ycol][0] xlabel = PMO_COL_LABEL[xcol][0] title = "%s" % ylabel if ycol == 10: title += " from %s" % PMO_MEM[tag] for key in conf: if key in PMO_PARAM: title += " %s=%s" % (PMO_PARAM[key], conf[key]) graphs = [(tmp.name, 1, 2, ylabel)] # plot cutoff (s, lo, hi) = stats.iqr(rows[:,1]) lo -= s * self.options.extend hi += s * self.options.extend m99 = stats.cutoff_max(rows[:, 1]) graphs += [(lo, 'IQR cutoff (%d)' % lo, 'line'), (hi, 'IQR cutoff (%d)' % hi, 'line'), (m99,'99%% cutoff (%d)' % m99, 'line lw 2')] gnuplot(graphs, xlabel="%s (%s)" % (xlabel, xunit), ylabel="%s (%s)" % ("access cost" if yminus is None else "delta to %s" % PMO_COL_LABEL[yminus][0], yunit), title=title, style='points', format=self.options.format, fname=figname) del tmp # delete temporary file def plot_pmo_aggr(self, datafile, name, conf): fname = datafile code = conf['code'] (xcol, ycol, yminus, tag) = code.split('-') xcol = int(xcol) ycol = int(ycol) if yminus != "None": yminus = int(ycol) else: yminus = None xtag = PMO_COL_LABEL[xcol][1] ytag = PMO_COL_LABEL[ycol][1] dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" figname = name #"%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag) xunit = PMO_COL_LABEL[xcol][2] yunit = PMO_COL_LABEL[ycol][2] ylabel = PMO_COL_LABEL[ycol][0] xlabel = PMO_COL_LABEL[xcol][0] title = "%s" % ylabel ylabel="%s (%s)" % ("access cost" if yminus is None else "delta to %s" % PMO_COL_LABEL[yminus][0], yunit), if ycol == 10: title += " from %s" % PMO_MEM[tag] for key in conf: if key in PMO_PARAM: title += " %s=%s" % (PMO_PARAM[key], conf[key]) graphs = [ #(fname, 1, 2, "average"), "'%s' using 1:2:3 title 'average' with errorbars" % (fname), (fname, 1, 4, "maximum"), ] xlabel = "working set size (kilobytes)" yrange = (4096, 2**26) if yminus is None else None gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=figname, yrange=yrange, logscale="xy 2" if yminus is None else "x 2", format=self.options.format) def plot_pmo_all(self, datafile, name, conf): host = conf['host'] mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]] columns = [] idx = 2 header = ["wss"] for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS: tags = ['all'] if split: tags += mems for tag in tags: col_name = "%s %s" % (PMO_COL_LABEL[y][1], tag) if not yminus is None: col_name += ' - ' + PMO_COL_LABEL[yminus][1] header += [col_name + " avg", col_name + " std", col_name + " wc"] columns.append((x, y, yminus, tag, idx)) idx += 3 data = load_csv_file(datafile) if self.options.cycles_per_usec: yunit = "(us)" data[:, 1:] /= self.options.cycles_per_usec else: yunit = "(cycles)" csvfile = "xxx-%s" % datafile write_csv_file(csvfile, data, header, width=max([len(h) for h in header])) rw = int(conf['wcycle']) rw = 1.0 / rw * 100 if rw != 0 else 0 if self.options.logy: axis = ("x 2", "y 10") else: axis = "x 2" # raw measures for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]: graphs = [] for (x, y, yminus, tag, idx) in columns: if yminus is None: label = PMO_COL_LABEL[y][0] if y == 10: label += " from %s" % PMO_MEM[tag] graphs.append( FileGraph( csvfile, xcol=1, ycol=idx + offset, title=label, error=idx + offset + 1 if kind == 'avg' and self.options.errbar else None)) xlabel = "working set size (kilobytes)" ylabel = "time to complete access " + yunit title = "measured %s WSS access time (%.2f%% writes)" % (long, rw) yrange = None #(4096, 2**26) fname = "%s_full_%s" % (name, kind) gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname, yrange=yrange, logscale=axis, format=self.options.format) # per-sample delta measures for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]: graphs = [] for (x, y, yminus, tag, idx) in columns: if not (yminus is None) and tag != 'all': label = "%s" % PMO_MEM[tag] graphs.append( FileGraph( csvfile, xcol=1, ycol=idx + offset, title=label, error=idx + offset + 1 if kind == 'avg' and self.options.errbar else None)) xlabel = "working set size (kilobytes)" ylabel = "per-sample delta to hot access " + yunit title = "measured %s overhead (%.2f%% writes)" % (long, rw) yrange = None fname = "%s_delta_%s" % (name, kind) gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname, yrange=yrange, logscale=axis, format=self.options.format) graphs = [] for (x, y, yminus, tag, idx) in columns: if y in [8, 9] and yminus in [7, 8] and tag == 'all': label = "%s to %s" % (PMO_COL_LABEL[yminus][0], PMO_COL_LABEL[y][0]) graphs.append( FileGraph( csvfile, xcol=1, ycol=idx + offset, title=label, error=idx + offset + 1 if kind == 'avg' and self.options.errbar else None)) xlabel = "working set size (kilobytes)" ylabel = "per-sample delta to previous hot access " + yunit title = "measured %s differences (%.2f%% writes)" % (long, rw) yrange = None fname = "%s_delta-h_%s" % (name, kind) gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname, yrange=yrange, logscale=axis, format=self.options.format) # del tmp # stats delta # find hot column col = None for (x, y, yminus, tag, idx) in columns: if x == 0 and y == 9 and yminus is None and tag == 'all': col = idx break # normalize based on third hot access # +1/-1 to get zero-based indices; Gnuplot wants 1-based indices hot_avg = data[:,col - 1].copy() hot_wc = data[:,col + 1].copy() for (x, y, yminus, tag, idx) in columns: data[:,idx - 1] -= hot_avg data[:,idx + 1] -= hot_wc tmp = write_csv_file(None, data) for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]: graphs = [] for (x, y, yminus, tag, idx) in columns: if yminus is None and tag != 'all': label = PMO_COL_LABEL[y][0] label = PMO_MEM[tag] graphs.append(FileGraph(tmp.name, xcol=1, ycol=idx+offset, title=label)) xlabel = "working set size (kilobytes)" ylabel = "delta to third hot access " + yunit title = "difference of %s access costs (%.2f%% writes)" % (long, rw) yrange = None fname = "%s_diff_%s" % (name, kind) gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname, yrange=yrange, logscale=axis, format=self.options.format) # del tmp def plot_file(self, datafile): bname = basename(datafile) name, ext = splitext(bname) if ext != '.csv': self.err("Warning: '%s' doesn't look like a CSV file." % bname) conf = decode(name) if 'pmo' in conf: self.plot_preempt_migrate(datafile, name, conf) elif 'pmo-aggr' in conf: self.plot_pmo_aggr(datafile, name, conf) elif 'pmo-all' in conf: self.plot_pmo_all(datafile, name, conf) else: self.err("Skipped '%s'; unkown experiment type." % bname) def default(self, _): for datafile in self.args: self.plot_file(datafile) def do_aggregate(self, _): self.write_aggregate(self.args[1:]) if __name__ == "__main__": CyclePlotter().launch()