1 files changed, 383 insertions, 24 deletions
diff --git a/plot_pm2.py b/plot_pm2.py
index d53a6da..c2fcbf3 100755
--- a/plot_pm2.py
+++ b/plot_pm2.py
@@ -1,17 +1,50 @@
 #!/usr/bin/env python
-import defapp
 from os.path  import splitext, basename
 from optparse import make_option as o
 from tempfile import NamedTemporaryFile as Tmp
-import csv
+from collections import defaultdict
+from itertools import izip
+import numpy as np
+from util import *
+import stats
+import defapp
 from plot     import decode
-from gnuplot  import gnuplot, FORMATS
+from gnuplot  import gnuplot, FileGraph, FORMATS
+def ludwig_l2(x, y):
+    # x left column, y right column, or # y left column, x, right column
+    return (x % 8 < 4 and x + 4 == y) or \
+        (y % 8 < 4 and x - 4 == y)
+def ludwig_l3(x, y):
+    # same socket
+    # not a a shared L2
+    # not identical
+    return (y % 4) == (x % 4) and \
+        not ludwig_l2(x, y) and \
+        x != y
 MACHINE_TOPOLOGY = {
-    'jupiter-cs' : (4, [('preempt', lambda x, y: x == y), ('mem', lambda x, y: x != y)])
+    'jupiter-cs' : (4, [('preempt', lambda x, y: x == y),
+                        ('mem', lambda x, y: x != y)]),
+    # Socket0  Socket1  Socket2  Socket3
+    # ------   -------  -------  -------
+    # | 0, 4|  | 1, 5|  | 2, 6|  | 3, 7|
+    # | 8,12|  | 9,13|  |10,14|  |11,15|
+    # |16,20|  |17,21|  |18,22|  |19,23|
+    # -------  -------  -------  -------
+    'ludwig.cs.unc.edu' : (24, [('preempt', lambda x, y: x == y),
+                                ('l2', ludwig_l2),
+                                ('l3', ludwig_l3),
+                                ('mem', lambda x, y: abs(y - x) % 4 != 0)])
 }
 PMO_PARAM = {
@@ -22,6 +55,8 @@ PMO_PARAM = {
 PMO_MEM = {
    'mem'     : 'a migration through main memory',
+    'l3'      : 'a migration through a shared L3 cache',
+    'l2'      : 'a migration through a shared L2 cache',
    'preempt' : 'a preemption',
    'all'     : 'either a migration or preemption',
 }
@@ -38,6 +73,25 @@ PMO_SUBPLOTS = [
    (3, 10,    9,  True),
 ]
+PMO_AGGR_SUBPLOTS = [
+    # x, y, y-delta, split according to mem-hierarchy?
+    (0,  6, None, False),
+    (0,  7, None, False),
+    (0,  8, None, False),
+    (0,  9, None, False),
+    (0, 10, None,  True),
+#    (0, 10,    6,  True),
+#    (0, 10,    7,  True),
+#    (0, 10,    8,  True),
+    (0, 10,    9,  True),
+    (0,  8,    7,  False), # difference of second to first hot access
+    (0,  9,    8,  False), # difference of third to second hot access
+]
+PMO_AGGR_COMBINE = [
+    [(6, 'all'), (7, 'all'), (8, 'all'), (9, 'all')]
+]
 PMO_COL_LABEL = [('measurement', 'sample', 'index'),
                 ('write cycles', 'wcycle', 'every nth access'),
                 ('WSS', 'wcc', 'kilobytes'),
@@ -60,6 +114,11 @@ options = [
    o(None, '--paper', action='store_true', dest='paper'),
    o(None, '--wide', action='store_true', dest='wide'),
    o(None, '--split', action='store_true', dest='split'),
+    o(None, '--log-y', action='store_true', dest='logy'),
+    o(None, '--errorbar', action='store_true', dest='errbar'),
+    o(None, '--extend', action='store', type='float', dest='extend'),
+    o(None, '--aggregate', action='store_true', dest='aggregate'),
+    o('-c', '--cycles-per-usec', action='store', type='float', dest='cycles_per_usec'),
    ]
 defaults = {
@@ -67,48 +126,155 @@ defaults = {
    'paper'  : False,
    'split'  : False,
    'wide'   : False,
+    'aggregate' : False,
+    'extend' : 1.5,
+    'cycles_per_usec' : None,
+    'logy'   : False,
+    'errbar' : False,
    }
 def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True):
-    for row in data:
+    def matching_cpus(row):
-        fcpu = int(row[PMO_FROM_CPU])
+        return cpu_filter(row[PMO_FROM_CPU], row[PMO_TO_CPU])
-        tcpu = int(row[PMO_TO_CPU])
+    rows = select(matching_cpus, data)
-        if cpu_filter(fcpu, tcpu):
+    if not (ycol2 is None):
-            if ycol2 is None:
+        rows[:,ycol1] -= rows[:,ycol2]
-                yield (row[xcol], cast(row[ycol1]))
+    return rows[:,(xcol, ycol1)]
-            else:
-                yield (row[xcol], cast(row[ycol1]) - cast(row[ycol2]))
 class CyclePlotter(defapp.App):
    def __init__(self):
        defapp.App.__init__(self, options, defaults, no_std_opts=True)
+        self.aggregate_data = []
-    def setup_pmo_graphs(self, datafile, conf):
+    def setup_pmo_graphs(self, datafile, conf, subplots=PMO_SUBPLOTS):
        host = conf['host']
        if host in MACHINE_TOPOLOGY:
            (cpus, hier) = MACHINE_TOPOLOGY[host]
            plots = []
-            data = list(csv.reader(open(datafile)))
+            data = load_csv_file(datafile, dtype=int)
-            for (xcol, ycol, yminus, by_mem_hierarchy) in PMO_SUBPLOTS:
+            for (xcol, ycol, yminus, by_mem_hierarchy) in subplots:
                sub = [('all', lambda x, y: True)]
                if by_mem_hierarchy:
                    sub += hier
                for tag, test in sub:
-                    tmp    = Tmp()
+                    rows = extract_cols(data,
-                    for row in extract_cols(data,
+                                        xcol, ycol, yminus,
-                                            xcol, ycol, yminus,
+                                        cpu_filter=test)
-                                            cpu_filter=test):
+                    plots.append((rows, xcol, ycol, yminus, tag))
-                        tmp.write("%s, %s\n" % row)
-                    tmp.flush()
-                    plots.append((tmp, xcol, ycol, yminus, tag))
            return plots
        else:
            self.err('Unkown host: %s' % host)
            return None
+    def write_aggregate(self, datafiles):
+        # (wss, avg, wc, #avg, #wc)
+        # by tag -> by wcycle -> list of data points)
+        by_tag    = defaultdict(lambda: defaultdict(list))
+        host = None
+        for i, datafile in enumerate(datafiles):
+            print '[%d/%d] Processing %s...' % (i + 1, len(datafiles), datafile)
+            bname = basename(datafile)
+            name, ext = splitext(bname)
+            if ext != '.csv':
+                self.err("Warning: '%s' doesn't look like a CSV file."
+                         % bname)
+            conf    = decode(name)
+            if 'pmo' in conf:
+                plots = self.setup_pmo_graphs(datafile, conf, PMO_AGGR_SUBPLOTS)
+                if plots is None:
+                    print "Skipping %s..." % datafile
+                    return
+                if not host:
+                    host = conf['host']
+                if host != conf['host']:
+                    self.err('Mixing data from two hosts! (%s, %s)' % (host, conf['host']))
+                    self.err('Aborting.')
+                    return
+                wss = int(conf['wss'])
+                wcycle  = int(conf['wcycle'])
+                for (rows, xcol, ycol, yminus, tag) in plots:
+                    clean = stats.iqr_remove_outliers(rows, extend=self.options.extend)
+                    vals = clean[:,1]
+                    avg  = np.mean(vals)
+                    std  = np.std(vals, ddof=1)
+                    wc   = np.max(vals)
+                    n    = len(vals)
+                    key = (xcol, ycol, yminus, tag)
+                    by_tag[key][wcycle].append((wss, avg, std, wc, n, len(rows) - n))
+                del plots
+            else:
+                self.err("Warning: '%s' is not a PMO experiment; skipping." % bname)
+        all_wss    = set()
+        all_wcycle = set()
+        for key in by_tag:
+            for wcycle in by_tag[key]:
+                all_wcycle.add(wcycle)
+                data = by_tag[key][wcycle]
+                # sort by increasing WSS
+                data.sort(key=lambda row: row[0])
+                for row in data:
+                    all_wss.add(row[0])
+                (xcol, ycol, yminus, tag) = key
+                xtag = PMO_COL_LABEL[xcol][1]
+                ytag = PMO_COL_LABEL[ycol][1]
+                dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
+                code = "code=%s-%s-%s-%s" % key
+                figname = "host=%s_%s%s-vs-%s_%s_%s" % \
+                    (host, ytag, dtag, xtag, tag, code)
+                write_csv_file('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), data)
+        mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]]
+        for wcycle in all_wcycle:
+            try:
+                rows = [[wss] for wss in sorted(all_wss)]
+                header = ['wss']
+                for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS:
+                    tags = ['all']
+                    if split:
+                        tags += mems
+                    for tag in tags:
+                        col_name = "%s %s" % (PMO_COL_LABEL[ycol][1], tag)
+                        if not yminus is None:
+                            col_name += ' - ' + PMO_COL_LABEL[yminus][1]
+                        header += [col_name + " avg", col_name + " std", col_name + " wc"]
+                        key = (x, y, yminus, tag)
+                        data  = by_tag[key][wcycle]
+                        for r, d in izip(rows, data):
+                            if r[0] != d[0]:
+                                print "mismatch", r[0], d[0], key, wcycle
+                            assert r[0] == d[0] # working set size must match
+                            r += d[1:4]  # (average, std, wc)
+                write_csv_file('pmo-all_wcycle=%d_host=%s.csv' % (wcycle, host),
+                               rows, header, width=max([len(h) for h in header]))
+            except AssertionError:
+                self.err("Data missing for wcycle=%d!" % wcycle)
    def plot_preempt_migrate(self, datafile, name, conf):
        plots = self.setup_pmo_graphs(datafile, conf)
-        for (tmp, xcol, ycol, yminus, tag) in plots:
+        if plots is None:
+            print "Skipping %s..." % datafile
+            return
+        else:
+            print 'Plotting %s...' % datafile
+        for (rows, xcol, ycol, yminus, tag) in plots:
+            # Write it to a temp file.
+            tmp    = Tmp()
+            for row in rows:
+                tmp.write("%s, %s\n" % (row[0], row[1]))
+            tmp.flush()
            xtag = PMO_COL_LABEL[xcol][1]
            ytag = PMO_COL_LABEL[ycol][1]
            dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
@@ -123,7 +289,16 @@ class CyclePlotter(defapp.App):
            for key in conf:
                if key in PMO_PARAM:
                    title += " %s=%s" % (PMO_PARAM[key], conf[key])
-            gnuplot([(tmp.name, 1, 2, ylabel)],
+            graphs = [(tmp.name, 1, 2, ylabel)]
+            # plot cutoff
+            (s, lo, hi) = stats.iqr(rows[:,1])
+            lo -= s * self.options.extend
+            hi += s * self.options.extend
+            m99 = stats.cutoff_max(rows[:, 1])
+            graphs += [(lo, 'IQR cutoff (%d)' % lo, 'line'),
+                       (hi, 'IQR cutoff (%d)' % hi, 'line'),
+                       (m99,'99%% cutoff (%d)' % m99, 'line lw 2')]
+            gnuplot(graphs,
                    xlabel="%s (%s)" % (xlabel, xunit),
                    ylabel="%s (%s)" % ("access cost" if yminus is None
                                        else "delta to %s" % PMO_COL_LABEL[yminus][0],
@@ -132,6 +307,183 @@ class CyclePlotter(defapp.App):
                    style='points',
                    format=self.options.format,
                    fname=figname)
+            del tmp # delete temporary file
+    def plot_pmo_aggr(self, datafile, name, conf):
+        fname = datafile
+        code = conf['code']
+        (xcol, ycol, yminus, tag) = code.split('-')
+        xcol = int(xcol)
+        ycol = int(ycol)
+        if yminus != "None":
+            yminus = int(ycol)
+        else:
+            yminus = None
+        xtag = PMO_COL_LABEL[xcol][1]
+        ytag = PMO_COL_LABEL[ycol][1]
+        dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
+        figname = name #"%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag)
+        xunit = PMO_COL_LABEL[xcol][2]
+        yunit = PMO_COL_LABEL[ycol][2]
+        ylabel = PMO_COL_LABEL[ycol][0]
+        xlabel = PMO_COL_LABEL[xcol][0]
+        title = "%s" % ylabel
+        ylabel="%s (%s)" % ("access cost" if yminus is None
+                            else "delta to %s" % PMO_COL_LABEL[yminus][0],
+                            yunit),
+        if ycol == 10:
+            title += " from %s" % PMO_MEM[tag]
+        for key in conf:
+            if key in PMO_PARAM:
+                title += " %s=%s" % (PMO_PARAM[key], conf[key])
+        graphs = [
+            #(fname, 1, 2, "average"),
+            "'%s' using 1:2:3 title 'average' with errorbars" % (fname),
+            (fname, 1, 4, "maximum"),
+            ]
+        xlabel = "working set size (kilobytes)"
+        yrange = (4096, 2**26) if yminus is None else None
+        gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=figname,
+                yrange=yrange,
+                logscale="xy 2" if yminus is None else "x 2",
+                format=self.options.format)
+    def plot_pmo_all(self, datafile, name, conf):
+        host = conf['host']
+        mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]]
+        columns = []
+        idx = 2
+        header = ["wss"]
+        for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS:
+            tags = ['all']
+            if split:
+                tags += mems
+            for tag in tags:
+                col_name = "%s %s" % (PMO_COL_LABEL[y][1], tag)
+                if not yminus is None:
+                    col_name += ' - ' + PMO_COL_LABEL[yminus][1]
+                header += [col_name + " avg", col_name + " std", col_name + " wc"]
+                columns.append((x, y, yminus, tag, idx))
+                idx += 3
+        data  = load_csv_file(datafile)
+        if self.options.cycles_per_usec:
+            yunit = "(us)"
+            data[:, 1:] /= self.options.cycles_per_usec
+        else:
+            yunit = "(cycles)"
+        csvfile = "xxx-%s" % datafile
+        write_csv_file(csvfile, data, header, width=max([len(h) for h in header]))
+        rw = int(conf['wcycle'])
+        rw = 1.0 / rw * 100 if rw != 0 else 0
+        if self.options.logy:
+            axis = ("x 2", "y 10")
+        else:
+            axis = "x 2"
+        # raw measures
+        for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
+            graphs = []
+            for (x, y, yminus, tag, idx) in columns:
+                if yminus is None:
+                    label = PMO_COL_LABEL[y][0]
+                    if y == 10:
+                        label += " from %s" % PMO_MEM[tag]
+                    graphs.append(
+                        FileGraph(
+                            csvfile, xcol=1, ycol=idx + offset, title=label,
+                            error=idx + offset + 1 if kind == 'avg' and self.options.errbar else None))
+                xlabel = "working set size (kilobytes)"
+                ylabel = "time to complete access " + yunit
+                title = "measured %s WSS access time (%.2f%% writes)" % (long, rw)
+                yrange = None #(4096, 2**26)
+                fname = "%s_full_%s" % (name, kind)
+                gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
+                        yrange=yrange, logscale=axis, format=self.options.format)
+        # per-sample delta measures
+        for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
+            graphs = []
+            for (x, y, yminus, tag, idx) in columns:
+                if not (yminus is None) and tag != 'all':
+                    label = "%s" % PMO_MEM[tag]
+                    graphs.append(
+                        FileGraph(
+                            csvfile, xcol=1, ycol=idx + offset, title=label,
+                            error=idx + offset + 1 if kind == 'avg' and self.options.errbar else None))
+            xlabel = "working set size (kilobytes)"
+            ylabel = "per-sample delta to hot access " + yunit
+            title = "measured %s overhead (%.2f%% writes)"  % (long, rw)
+            yrange = None
+            fname = "%s_delta_%s" % (name, kind)
+            gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
+                    yrange=yrange, logscale=axis, format=self.options.format)
+            graphs = []
+            for (x, y, yminus, tag, idx) in columns:
+                if y in [8, 9] and yminus in [7, 8] and tag == 'all':
+                    label = "%s to %s" % (PMO_COL_LABEL[yminus][0], PMO_COL_LABEL[y][0])
+                    graphs.append(
+                        FileGraph(
+                            csvfile, xcol=1, ycol=idx + offset, title=label,
+                            error=idx + offset + 1 if kind == 'avg' and self.options.errbar else None))
+            xlabel = "working set size (kilobytes)"
+            ylabel = "per-sample delta to previous hot access " + yunit
+            title = "measured %s differences (%.2f%% writes)"  % (long, rw)
+            yrange = None
+            fname = "%s_delta-h_%s" % (name, kind)
+            gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
+                    yrange=yrange, logscale=axis, format=self.options.format)
+#        del tmp
+        # stats delta
+        # find hot column
+        col = None
+        for (x, y, yminus, tag, idx) in columns:
+            if x == 0 and y == 9 and yminus is None and tag == 'all':
+                col = idx
+                break
+        # normalize based on third hot access
+        # +1/-1 to get zero-based indices; Gnuplot wants 1-based indices
+        hot_avg = data[:,col - 1].copy()
+        hot_wc  = data[:,col + 1].copy()
+        for (x, y, yminus, tag, idx) in columns:
+            data[:,idx - 1] -= hot_avg
+            data[:,idx + 1] -= hot_wc
+        tmp = write_csv_file(None, data)
+        for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
+            graphs = []
+            for (x, y, yminus, tag, idx) in columns:
+                if yminus is None and tag != 'all':
+                    label = PMO_COL_LABEL[y][0]
+                    label = PMO_MEM[tag]
+                    graphs.append(FileGraph(tmp.name, xcol=1, ycol=idx+offset, title=label))
+                xlabel = "working set size (kilobytes)"
+                ylabel = "delta to third hot access " + yunit
+                title = "difference of %s access costs (%.2f%% writes)" % (long, rw)
+                yrange = None
+                fname = "%s_diff_%s" % (name, kind)
+                gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
+                        yrange=yrange, logscale=axis, format=self.options.format)
+#        del tmp
    def plot_file(self, datafile):
        bname = basename(datafile)
@@ -142,6 +494,10 @@ class CyclePlotter(defapp.App):
        conf    = decode(name)
        if 'pmo' in conf:
            self.plot_preempt_migrate(datafile, name, conf)
+        elif 'pmo-aggr' in conf:
+            self.plot_pmo_aggr(datafile, name, conf)
+        elif 'pmo-all' in conf:
+            self.plot_pmo_all(datafile, name, conf)
        else:
            self.err("Skipped '%s'; unkown experiment type."
                     % bname)
@@ -150,5 +506,8 @@ class CyclePlotter(defapp.App):
        for datafile in self.args:
            self.plot_file(datafile)
+    def do_aggregate(self, _):
+        self.write_aggregate(self.args[1:])
 if __name__ == "__main__":
    CyclePlotter().launch()

diff --git a/plot_pm2.py b/plot_pm2.py index d53a6da..c2fcbf3 100755 --- a/plot_pm2.py +++ b/plot_pm2.py
@@ -1,17 +1,50 @@
1	#!/usr/bin/env python	1	#!/usr/bin/env python
2	import defapp
3	from os.path import splitext, basename	2	from os.path import splitext, basename
4	from optparse import make_option as o	3	from optparse import make_option as o
5	from tempfile import NamedTemporaryFile as Tmp	4	from tempfile import NamedTemporaryFile as Tmp
6		5
7	import csv	6	from collections import defaultdict
		7	from itertools import izip
		8
		9	import numpy as np
		10	from util import *
		11
		12	import stats
		13	import defapp
8		14
9	from plot import decode	15	from plot import decode
10	from gnuplot import gnuplot, FORMATS	16	from gnuplot import gnuplot, FileGraph, FORMATS
		17
		18
		19
		20	def ludwig_l2(x, y):
		21	# x left column, y right column, or # y left column, x, right column
		22	return (x % 8 < 4 and x + 4 == y) or \
		23	(y % 8 < 4 and x - 4 == y)
		24
		25	def ludwig_l3(x, y):
		26	# same socket
		27	# not a a shared L2
		28	# not identical
		29	return (y % 4) == (x % 4) and \
		30	not ludwig_l2(x, y) and \
		31	x != y
11		32
12		33
13	MACHINE_TOPOLOGY = {	34	MACHINE_TOPOLOGY = {
14	'jupiter-cs' : (4, [('preempt', lambda x, y: x == y), ('mem', lambda x, y: x != y)])	35	'jupiter-cs' : (4, [('preempt', lambda x, y: x == y),
		36	('mem', lambda x, y: x != y)]),
		37
		38	# Socket0 Socket1 Socket2 Socket3
		39	# ------ ------- ------- -------
		40	# \| 0, 4\| \| 1, 5\| \| 2, 6\| \| 3, 7\|
		41	# \| 8,12\| \| 9,13\| \|10,14\| \|11,15\|
		42	# \|16,20\| \|17,21\| \|18,22\| \|19,23\|
		43	# ------- ------- ------- -------
		44	'ludwig.cs.unc.edu' : (24, [('preempt', lambda x, y: x == y),
		45	('l2', ludwig_l2),
		46	('l3', ludwig_l3),
		47	('mem', lambda x, y: abs(y - x) % 4 != 0)])
15	}	48	}
16		49
17	PMO_PARAM = {	50	PMO_PARAM = {
@@ -22,6 +55,8 @@ PMO_PARAM = {
22		55
23	PMO_MEM = {	56	PMO_MEM = {
24	'mem' : 'a migration through main memory',	57	'mem' : 'a migration through main memory',
		58	'l3' : 'a migration through a shared L3 cache',
		59	'l2' : 'a migration through a shared L2 cache',
25	'preempt' : 'a preemption',	60	'preempt' : 'a preemption',
26	'all' : 'either a migration or preemption',	61	'all' : 'either a migration or preemption',
27	}	62	}
@@ -38,6 +73,25 @@ PMO_SUBPLOTS = [
38	(3, 10, 9, True),	73	(3, 10, 9, True),
39	]	74	]
40		75
		76	PMO_AGGR_SUBPLOTS = [
		77	# x, y, y-delta, split according to mem-hierarchy?
		78	(0, 6, None, False),
		79	(0, 7, None, False),
		80	(0, 8, None, False),
		81	(0, 9, None, False),
		82	(0, 10, None, True),
		83	# (0, 10, 6, True),
		84	# (0, 10, 7, True),
		85	# (0, 10, 8, True),
		86	(0, 10, 9, True),
		87	(0, 8, 7, False), # difference of second to first hot access
		88	(0, 9, 8, False), # difference of third to second hot access
		89	]
		90
		91	PMO_AGGR_COMBINE = [
		92	[(6, 'all'), (7, 'all'), (8, 'all'), (9, 'all')]
		93	]
		94
41	PMO_COL_LABEL = [('measurement', 'sample', 'index'),	95	PMO_COL_LABEL = [('measurement', 'sample', 'index'),
42	('write cycles', 'wcycle', 'every nth access'),	96	('write cycles', 'wcycle', 'every nth access'),
43	('WSS', 'wcc', 'kilobytes'),	97	('WSS', 'wcc', 'kilobytes'),
@@ -60,6 +114,11 @@ options = [
60	o(None, '--paper', action='store_true', dest='paper'),	114	o(None, '--paper', action='store_true', dest='paper'),
61	o(None, '--wide', action='store_true', dest='wide'),	115	o(None, '--wide', action='store_true', dest='wide'),
62	o(None, '--split', action='store_true', dest='split'),	116	o(None, '--split', action='store_true', dest='split'),
		117	o(None, '--log-y', action='store_true', dest='logy'),
		118	o(None, '--errorbar', action='store_true', dest='errbar'),
		119	o(None, '--extend', action='store', type='float', dest='extend'),
		120	o(None, '--aggregate', action='store_true', dest='aggregate'),
		121	o('-c', '--cycles-per-usec', action='store', type='float', dest='cycles_per_usec'),
63	]	122	]
64		123
65	defaults = {	124	defaults = {
@@ -67,48 +126,155 @@ defaults = {
67	'paper' : False,	126	'paper' : False,
68	'split' : False,	127	'split' : False,
69	'wide' : False,	128	'wide' : False,
		129	'aggregate' : False,
		130	'extend' : 1.5,
		131	'cycles_per_usec' : None,
		132	'logy' : False,
		133	'errbar' : False,
70	}	134	}
71		135
72	def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True):	136	def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True):
73	for row in data:	137	def matching_cpus(row):
74	fcpu = int(row[PMO_FROM_CPU])	138	return cpu_filter(row[PMO_FROM_CPU], row[PMO_TO_CPU])
75	tcpu = int(row[PMO_TO_CPU])	139	rows = select(matching_cpus, data)
76	if cpu_filter(fcpu, tcpu):	140	if not (ycol2 is None):
77	if ycol2 is None:	141	rows[:,ycol1] -= rows[:,ycol2]
78	yield (row[xcol], cast(row[ycol1]))	142	return rows[:,(xcol, ycol1)]
79	else:
80	yield (row[xcol], cast(row[ycol1]) - cast(row[ycol2]))
81		143
82	class CyclePlotter(defapp.App):	144	class CyclePlotter(defapp.App):
83	def __init__(self):	145	def __init__(self):
84	defapp.App.__init__(self, options, defaults, no_std_opts=True)	146	defapp.App.__init__(self, options, defaults, no_std_opts=True)
		147	self.aggregate_data = []
85		148
86	def setup_pmo_graphs(self, datafile, conf):	149	def setup_pmo_graphs(self, datafile, conf, subplots=PMO_SUBPLOTS):
87	host = conf['host']	150	host = conf['host']
88	if host in MACHINE_TOPOLOGY:	151	if host in MACHINE_TOPOLOGY:
89	(cpus, hier) = MACHINE_TOPOLOGY[host]	152	(cpus, hier) = MACHINE_TOPOLOGY[host]
90	plots = []	153	plots = []
91	data = list(csv.reader(open(datafile)))	154	data = load_csv_file(datafile, dtype=int)
92	for (xcol, ycol, yminus, by_mem_hierarchy) in PMO_SUBPLOTS:	155	for (xcol, ycol, yminus, by_mem_hierarchy) in subplots:
93	sub = [('all', lambda x, y: True)]	156	sub = [('all', lambda x, y: True)]
94	if by_mem_hierarchy:	157	if by_mem_hierarchy:
95	sub += hier	158	sub += hier
96	for tag, test in sub:	159	for tag, test in sub:
97	tmp = Tmp()	160	rows = extract_cols(data,
98	for row in extract_cols(data,	161	xcol, ycol, yminus,
99	xcol, ycol, yminus,	162	cpu_filter=test)
100	cpu_filter=test):	163	plots.append((rows, xcol, ycol, yminus, tag))
101	tmp.write("%s, %s\n" % row)
102	tmp.flush()
103	plots.append((tmp, xcol, ycol, yminus, tag))
104	return plots	164	return plots
105	else:	165	else:
106	self.err('Unkown host: %s' % host)	166	self.err('Unkown host: %s' % host)
107	return None	167	return None
108		168
		169	def write_aggregate(self, datafiles):
		170	# (wss, avg, wc, #avg, #wc)
		171	# by tag -> by wcycle -> list of data points)
		172	by_tag = defaultdict(lambda: defaultdict(list))
		173
		174	host = None
		175
		176	for i, datafile in enumerate(datafiles):
		177	print '[%d/%d] Processing %s...' % (i + 1, len(datafiles), datafile)
		178	bname = basename(datafile)
		179	name, ext = splitext(bname)
		180	if ext != '.csv':
		181	self.err("Warning: '%s' doesn't look like a CSV file."
		182	% bname)
		183	conf = decode(name)
		184	if 'pmo' in conf:
		185	plots = self.setup_pmo_graphs(datafile, conf, PMO_AGGR_SUBPLOTS)
		186	if plots is None:
		187	print "Skipping %s..." % datafile
		188	return
		189	if not host:
		190	host = conf['host']
		191	if host != conf['host']:
		192	self.err('Mixing data from two hosts! (%s, %s)' % (host, conf['host']))
		193	self.err('Aborting.')
		194	return
		195	wss = int(conf['wss'])
		196	wcycle = int(conf['wcycle'])
		197	for (rows, xcol, ycol, yminus, tag) in plots:
		198	clean = stats.iqr_remove_outliers(rows, extend=self.options.extend)
		199	vals = clean[:,1]
		200	avg = np.mean(vals)
		201	std = np.std(vals, ddof=1)
		202	wc = np.max(vals)
		203	n = len(vals)
		204
		205	key = (xcol, ycol, yminus, tag)
		206	by_tag[key][wcycle].append((wss, avg, std, wc, n, len(rows) - n))
		207	del plots
		208	else:
		209	self.err("Warning: '%s' is not a PMO experiment; skipping." % bname)
		210
		211	all_wss = set()
		212	all_wcycle = set()
		213
		214	for key in by_tag:
		215	for wcycle in by_tag[key]:
		216	all_wcycle.add(wcycle)
		217
		218	data = by_tag[key][wcycle]
		219	# sort by increasing WSS
		220	data.sort(key=lambda row: row[0])
		221	for row in data:
		222	all_wss.add(row[0])
		223
		224	(xcol, ycol, yminus, tag) = key
		225
		226	xtag = PMO_COL_LABEL[xcol][1]
		227	ytag = PMO_COL_LABEL[ycol][1]
		228	dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
		229	code = "code=%s-%s-%s-%s" % key
		230	figname = "host=%s_%s%s-vs-%s_%s_%s" % \
		231	(host, ytag, dtag, xtag, tag, code)
		232
		233	write_csv_file('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), data)
		234
		235
		236	mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]]
		237
		238	for wcycle in all_wcycle:
		239	try:
		240	rows = [[wss] for wss in sorted(all_wss)]
		241	header = ['wss']
		242	for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS:
		243	tags = ['all']
		244	if split:
		245	tags += mems
		246	for tag in tags:
		247	col_name = "%s %s" % (PMO_COL_LABEL[ycol][1], tag)
		248	if not yminus is None:
		249	col_name += ' - ' + PMO_COL_LABEL[yminus][1]
		250	header += [col_name + " avg", col_name + " std", col_name + " wc"]
		251	key = (x, y, yminus, tag)
		252	data = by_tag[key][wcycle]
		253	for r, d in izip(rows, data):
		254	if r[0] != d[0]:
		255	print "mismatch", r[0], d[0], key, wcycle
		256	assert r[0] == d[0] # working set size must match
		257	r += d[1:4] # (average, std, wc)
		258	write_csv_file('pmo-all_wcycle=%d_host=%s.csv' % (wcycle, host),
		259	rows, header, width=max([len(h) for h in header]))
		260	except AssertionError:
		261	self.err("Data missing for wcycle=%d!" % wcycle)
		262
		263
109	def plot_preempt_migrate(self, datafile, name, conf):	264	def plot_preempt_migrate(self, datafile, name, conf):
110	plots = self.setup_pmo_graphs(datafile, conf)	265	plots = self.setup_pmo_graphs(datafile, conf)
111	for (tmp, xcol, ycol, yminus, tag) in plots:	266	if plots is None:
		267	print "Skipping %s..." % datafile
		268	return
		269	else:
		270	print 'Plotting %s...' % datafile
		271	for (rows, xcol, ycol, yminus, tag) in plots:
		272	# Write it to a temp file.
		273	tmp = Tmp()
		274	for row in rows:
		275	tmp.write("%s, %s\n" % (row[0], row[1]))
		276	tmp.flush()
		277
112	xtag = PMO_COL_LABEL[xcol][1]	278	xtag = PMO_COL_LABEL[xcol][1]
113	ytag = PMO_COL_LABEL[ycol][1]	279	ytag = PMO_COL_LABEL[ycol][1]
114	dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""	280	dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
@@ -123,7 +289,16 @@ class CyclePlotter(defapp.App):
123	for key in conf:	289	for key in conf:
124	if key in PMO_PARAM:	290	if key in PMO_PARAM:
125	title += " %s=%s" % (PMO_PARAM[key], conf[key])	291	title += " %s=%s" % (PMO_PARAM[key], conf[key])
126	gnuplot([(tmp.name, 1, 2, ylabel)],	292	graphs = [(tmp.name, 1, 2, ylabel)]
		293	# plot cutoff
		294	(s, lo, hi) = stats.iqr(rows[:,1])
		295	lo -= s * self.options.extend
		296	hi += s * self.options.extend
		297	m99 = stats.cutoff_max(rows[:, 1])
		298	graphs += [(lo, 'IQR cutoff (%d)' % lo, 'line'),
		299	(hi, 'IQR cutoff (%d)' % hi, 'line'),
		300	(m99,'99%% cutoff (%d)' % m99, 'line lw 2')]
		301	gnuplot(graphs,
127	xlabel="%s (%s)" % (xlabel, xunit),	302	xlabel="%s (%s)" % (xlabel, xunit),
128	ylabel="%s (%s)" % ("access cost" if yminus is None	303	ylabel="%s (%s)" % ("access cost" if yminus is None
129	else "delta to %s" % PMO_COL_LABEL[yminus][0],	304	else "delta to %s" % PMO_COL_LABEL[yminus][0],
@@ -132,6 +307,183 @@ class CyclePlotter(defapp.App):
132	style='points',	307	style='points',
133	format=self.options.format,	308	format=self.options.format,
134	fname=figname)	309	fname=figname)
		310	del tmp # delete temporary file
		311
		312	def plot_pmo_aggr(self, datafile, name, conf):
		313	fname = datafile
		314	code = conf['code']
		315	(xcol, ycol, yminus, tag) = code.split('-')
		316
		317	xcol = int(xcol)
		318	ycol = int(ycol)
		319	if yminus != "None":
		320	yminus = int(ycol)
		321	else:
		322	yminus = None
		323
		324	xtag = PMO_COL_LABEL[xcol][1]
		325	ytag = PMO_COL_LABEL[ycol][1]
		326	dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
		327	figname = name #"%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag)
		328	xunit = PMO_COL_LABEL[xcol][2]
		329	yunit = PMO_COL_LABEL[ycol][2]
		330	ylabel = PMO_COL_LABEL[ycol][0]
		331	xlabel = PMO_COL_LABEL[xcol][0]
		332	title = "%s" % ylabel
		333
		334	ylabel="%s (%s)" % ("access cost" if yminus is None
		335	else "delta to %s" % PMO_COL_LABEL[yminus][0],
		336	yunit),
		337	if ycol == 10:
		338	title += " from %s" % PMO_MEM[tag]
		339	for key in conf:
		340	if key in PMO_PARAM:
		341	title += " %s=%s" % (PMO_PARAM[key], conf[key])
		342
		343	graphs = [
		344	#(fname, 1, 2, "average"),
		345	"'%s' using 1:2:3 title 'average' with errorbars" % (fname),
		346	(fname, 1, 4, "maximum"),
		347	]
		348	xlabel = "working set size (kilobytes)"
		349
		350	yrange = (4096, 2**26) if yminus is None else None
		351
		352	gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=figname,
		353	yrange=yrange,
		354	logscale="xy 2" if yminus is None else "x 2",
		355	format=self.options.format)
		356
		357	def plot_pmo_all(self, datafile, name, conf):
		358	host = conf['host']
		359	mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]]
		360	columns = []
		361	idx = 2
		362	header = ["wss"]
		363	for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS:
		364	tags = ['all']
		365	if split:
		366	tags += mems
		367	for tag in tags:
		368	col_name = "%s %s" % (PMO_COL_LABEL[y][1], tag)
		369	if not yminus is None:
		370	col_name += ' - ' + PMO_COL_LABEL[yminus][1]
		371	header += [col_name + " avg", col_name + " std", col_name + " wc"]
		372	columns.append((x, y, yminus, tag, idx))
		373	idx += 3
		374
		375	data = load_csv_file(datafile)
		376	if self.options.cycles_per_usec:
		377	yunit = "(us)"
		378	data[:, 1:] /= self.options.cycles_per_usec
		379	else:
		380	yunit = "(cycles)"
		381
		382	csvfile = "xxx-%s" % datafile
		383
		384	write_csv_file(csvfile, data, header, width=max([len(h) for h in header]))
		385
		386	rw = int(conf['wcycle'])
		387	rw = 1.0 / rw * 100 if rw != 0 else 0
		388
		389	if self.options.logy:
		390	axis = ("x 2", "y 10")
		391	else:
		392	axis = "x 2"
		393
		394	# raw measures
		395	for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
		396	graphs = []
		397	for (x, y, yminus, tag, idx) in columns:
		398	if yminus is None:
		399	label = PMO_COL_LABEL[y][0]
		400	if y == 10:
		401	label += " from %s" % PMO_MEM[tag]
		402	graphs.append(
		403	FileGraph(
		404	csvfile, xcol=1, ycol=idx + offset, title=label,
		405	error=idx + offset + 1 if kind == 'avg' and self.options.errbar else None))
		406	xlabel = "working set size (kilobytes)"
		407	ylabel = "time to complete access " + yunit
		408	title = "measured %s WSS access time (%.2f%% writes)" % (long, rw)
		409	yrange = None #(4096, 2**26)
		410
		411	fname = "%s_full_%s" % (name, kind)
		412
		413	gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
		414	yrange=yrange, logscale=axis, format=self.options.format)
		415
		416	# per-sample delta measures
		417	for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
		418	graphs = []
		419	for (x, y, yminus, tag, idx) in columns:
		420	if not (yminus is None) and tag != 'all':
		421	label = "%s" % PMO_MEM[tag]
		422	graphs.append(
		423	FileGraph(
		424	csvfile, xcol=1, ycol=idx + offset, title=label,
		425	error=idx + offset + 1 if kind == 'avg' and self.options.errbar else None))
		426	xlabel = "working set size (kilobytes)"
		427	ylabel = "per-sample delta to hot access " + yunit
		428	title = "measured %s overhead (%.2f%% writes)" % (long, rw)
		429	yrange = None
		430
		431	fname = "%s_delta_%s" % (name, kind)
		432	gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
		433	yrange=yrange, logscale=axis, format=self.options.format)
		434
		435	graphs = []
		436	for (x, y, yminus, tag, idx) in columns:
		437	if y in [8, 9] and yminus in [7, 8] and tag == 'all':
		438	label = "%s to %s" % (PMO_COL_LABEL[yminus][0], PMO_COL_LABEL[y][0])
		439	graphs.append(
		440	FileGraph(
		441	csvfile, xcol=1, ycol=idx + offset, title=label,
		442	error=idx + offset + 1 if kind == 'avg' and self.options.errbar else None))
		443	xlabel = "working set size (kilobytes)"
		444	ylabel = "per-sample delta to previous hot access " + yunit
		445	title = "measured %s differences (%.2f%% writes)" % (long, rw)
		446	yrange = None
		447
		448	fname = "%s_delta-h_%s" % (name, kind)
		449	gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
		450	yrange=yrange, logscale=axis, format=self.options.format)
		451
		452	# del tmp
		453
		454	# stats delta
		455	# find hot column
		456	col = None
		457	for (x, y, yminus, tag, idx) in columns:
		458	if x == 0 and y == 9 and yminus is None and tag == 'all':
		459	col = idx
		460	break
		461	# normalize based on third hot access
		462	# +1/-1 to get zero-based indices; Gnuplot wants 1-based indices
		463	hot_avg = data[:,col - 1].copy()
		464	hot_wc = data[:,col + 1].copy()
		465	for (x, y, yminus, tag, idx) in columns:
		466	data[:,idx - 1] -= hot_avg
		467	data[:,idx + 1] -= hot_wc
		468
		469	tmp = write_csv_file(None, data)
		470
		471	for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
		472	graphs = []
		473	for (x, y, yminus, tag, idx) in columns:
		474	if yminus is None and tag != 'all':
		475	label = PMO_COL_LABEL[y][0]
		476	label = PMO_MEM[tag]
		477	graphs.append(FileGraph(tmp.name, xcol=1, ycol=idx+offset, title=label))
		478	xlabel = "working set size (kilobytes)"
		479	ylabel = "delta to third hot access " + yunit
		480	title = "difference of %s access costs (%.2f%% writes)" % (long, rw)
		481	yrange = None
		482
		483	fname = "%s_diff_%s" % (name, kind)
		484	gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
		485	yrange=yrange, logscale=axis, format=self.options.format)
		486	# del tmp
135		487
136	def plot_file(self, datafile):	488	def plot_file(self, datafile):
137	bname = basename(datafile)	489	bname = basename(datafile)
@@ -142,6 +494,10 @@ class CyclePlotter(defapp.App):
142	conf = decode(name)	494	conf = decode(name)
143	if 'pmo' in conf:	495	if 'pmo' in conf:
144	self.plot_preempt_migrate(datafile, name, conf)	496	self.plot_preempt_migrate(datafile, name, conf)
		497	elif 'pmo-aggr' in conf:
		498	self.plot_pmo_aggr(datafile, name, conf)
		499	elif 'pmo-all' in conf:
		500	self.plot_pmo_all(datafile, name, conf)
145	else:	501	else:
146	self.err("Skipped '%s'; unkown experiment type."	502	self.err("Skipped '%s'; unkown experiment type."
147	% bname)	503	% bname)
@@ -150,5 +506,8 @@ class CyclePlotter(defapp.App):
150	for datafile in self.args:	506	for datafile in self.args:
151	self.plot_file(datafile)	507	self.plot_file(datafile)
152		508
		509	def do_aggregate(self, _):
		510	self.write_aggregate(self.args[1:])
		511
153	if __name__ == "__main__":	512	if __name__ == "__main__":
154	CyclePlotter().launch()	513	CyclePlotter().launch()