From 031d41687127b7eb074229dbc114eb52340472c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20B=2E=20Brandenburg?= <bbb@cs.unc.edu>
Date: Sat, 27 Mar 2010 20:10:00 -0400
Subject: More work on aggregate plotting.

---
 plot_pm2.py | 196 +++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 175 insertions(+), 21 deletions(-)

(limited to 'plot_pm2.py')

diff --git a/plot_pm2.py b/plot_pm2.py
index 3c0174d..1332825 100755
--- a/plot_pm2.py
+++ b/plot_pm2.py
@@ -4,9 +4,10 @@ from optparse import make_option as o
 from tempfile import NamedTemporaryFile as Tmp
 
 from collections import defaultdict
+from itertools import izip
 
 import numpy as np
-from util import load_csv_file, select
+from util import *
 
 import stats
 import defapp
@@ -70,10 +71,14 @@ PMO_AGGR_SUBPLOTS = [
     (0,  8, None, False),
     (0,  9, None, False),
     (0, 10, None,  True),
-    (0, 10,    6,  True),
-    (0, 10,    7,  True),
+#    (0, 10,    6,  True),
+#    (0, 10,    7,  True),
+#    (0, 10,    8,  True),
     (0, 10,    9,  True),
-    (0, 10,    8,  True),
+]
+
+PMO_AGGR_COMBINE = [
+    [(6, 'all'), (7, 'all'), (8, 'all'), (9, 'all')]
 ]
 
 PMO_COL_LABEL = [('measurement', 'sample', 'index'),
@@ -100,6 +105,7 @@ options = [
     o(None, '--split', action='store_true', dest='split'),
     o(None, '--extend', action='store', type='float', dest='extend'),
     o(None, '--aggregate', action='store_true', dest='aggregate'),
+    o('-c', '--cycles-per-usec', action='store', type='float', dest='cycles_per_usec'),
     ]
 
 defaults = {
@@ -109,6 +115,7 @@ defaults = {
     'wide'   : False,
     'aggregate' : False,
     'extend' : 1.5,
+    'cycles_per_usec' : None,
     }
 
 def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True):
@@ -149,6 +156,8 @@ class CyclePlotter(defapp.App):
         # by tag -> by wcycle -> list of data points)
         by_tag    = defaultdict(lambda: defaultdict(list))
 
+        host = None
+
         for i, datafile in enumerate(datafiles):
             print '[%d/%d] Processing %s...' % (i + 1, len(datafiles), datafile)
             bname = basename(datafile)
@@ -162,9 +171,14 @@ class CyclePlotter(defapp.App):
                 if plots is None:
                     print "Skipping %s..." % datafile
                     return
+                if not host:
+                    host = conf['host']
+                if host != conf['host']:
+                    self.err('Mixing data from two hosts! (%s, %s)' % (host, conf['host']))
+                    self.err('Aborting.')
+                    return
                 wss = int(conf['wss'])
                 wcycle  = int(conf['wcycle'])
-                host = conf['host']
                 for (rows, xcol, ycol, yminus, tag) in plots:
                     clean = stats.iqr_remove_outliers(rows, extend=self.options.extend)
                     vals = clean[:,1]
@@ -173,28 +187,64 @@ class CyclePlotter(defapp.App):
                     wc   = np.max(vals)
                     n    = len(vals)
 
-                    xtag = PMO_COL_LABEL[xcol][1]
-                    ytag = PMO_COL_LABEL[ycol][1]
-                    dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
-                    code = "code=%s-%s-%s-%s" % \
-                        (xcol, ycol, yminus, tag)
-                    figname = "host=%s_%s%s-vs-%s_%s_%s" % \
-                        (host, ytag, dtag, xtag, tag, code)
-                    by_tag[figname][wcycle].append((wss, avg, std, wc, n, len(rows) - n))
+                    key = (xcol, ycol, yminus, tag)
+                    by_tag[key][wcycle].append((wss, avg, std, wc, n, len(rows) - n))
                 del plots
             else:
                 self.err("Warning: '%s' is not a PMO experiment; skipping." % bname)
 
-        for figname in by_tag:
-            for wcycle in by_tag[figname]:
-                data = by_tag[figname][wcycle]
+        all_wss    = set()
+        all_wcycle = set()
+
+        for key in by_tag:
+            for wcycle in by_tag[key]:
+                all_wcycle.add(wcycle)
+
+                data = by_tag[key][wcycle]
                 # sort by increasing WSS
                 data.sort(key=lambda row: row[0])
-                f = open('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), 'w')
                 for row in data:
-                    f.write(", ".join([str(x) for x in row]))
-                    f.write('\n')
-                f.close()
+                    all_wss.add(row[0])
+
+                (xcol, ycol, yminus, tag) = key
+
+                xtag = PMO_COL_LABEL[xcol][1]
+                ytag = PMO_COL_LABEL[ycol][1]
+                dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
+                code = "code=%s-%s-%s-%s" % key
+                figname = "host=%s_%s%s-vs-%s_%s_%s" % \
+                    (host, ytag, dtag, xtag, tag, code)
+
+                write_csv_file('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), data)
+
+
+        mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]]
+
+        for wcycle in all_wcycle:
+            try:
+                rows = [[wss] for wss in sorted(all_wss)]
+                header = ['wss']
+                for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS:
+                    tags = ['all']
+                    if split:
+                        tags += mems
+                    for tag in tags:
+                        col_name = "%s %s" % (PMO_COL_LABEL[ycol][1], tag)
+                        if not yminus is None:
+                            col_name += ' - ' + PMO_COL_LABEL[yminus][1]
+                        header += [col_name + " avg", col_name + " std", col_name + " wc"]
+                        key = (x, y, yminus, tag)
+                        data  = by_tag[key][wcycle]
+                        for r, d in izip(rows, data):
+                            if r[0] != d[0]:
+                                print "mismatch", r[0], d[0], key, wcycle
+                            assert r[0] == d[0] # working set size must match
+                            r += d[1:4]  # (average, std, wc)
+                write_csv_file('pmo-all_wcycle=%d_host=%s.csv' % (wcycle, host),
+                               rows, header, width=max([len(h) for h in header]))
+            except AssertionError:
+                self.err("Data missing for wcycle=%d!" % wcycle)
+
 
     def plot_preempt_migrate(self, datafile, name, conf):
         plots = self.setup_pmo_graphs(datafile, conf)
@@ -259,7 +309,7 @@ class CyclePlotter(defapp.App):
         xtag = PMO_COL_LABEL[xcol][1]
         ytag = PMO_COL_LABEL[ycol][1]
         dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
-        figname = "%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag)
+        figname = name #"%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag)
         xunit = PMO_COL_LABEL[xcol][2]
         yunit = PMO_COL_LABEL[ycol][2]
         ylabel = PMO_COL_LABEL[ycol][0]
@@ -282,10 +332,112 @@ class CyclePlotter(defapp.App):
             ]
         xlabel = "working set size (kilobytes)"
 
+        yrange = (4096, 2**26) if yminus is None else None
+
         gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=figname,
+                yrange=yrange,
                 logscale="xy 2" if yminus is None else "x 2",
                 format=self.options.format)
 
+    def plot_pmo_all(self, datafile, name, conf):
+        host = conf['host']
+        mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]]
+        columns = []
+        idx = 2
+        for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS:
+            tags = ['all']
+            if split:
+                tags += mems
+            for tag in tags:
+                columns.append((x, y, yminus, tag, idx))
+                idx += 3
+
+        data  = load_csv_file(datafile)
+        if self.options.cycles_per_usec:
+            yunit = "(us)"
+            data[:, 1:] /= self.options.cycles_per_usec
+        else:
+            yunit = "(cycles)"
+        tmp = write_csv_file(None, data)
+
+        rw = int(conf['wcycle'])
+        rw = 1.0 / rw * 100 if rw != 0 else 0
+
+        # raw measures
+        for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
+            graphs = []
+            for (x, y, yminus, tag, idx) in columns:
+                if yminus is None:
+                    label = PMO_COL_LABEL[y][0]
+                    if y == 10:
+                        label += " from %s" % PMO_MEM[tag]
+                    graphs += [
+                        (tmp.name, 1, idx + offset, label),
+                        ]
+                xlabel = "working set size (kilobytes)"
+                ylabel = "time to complete access " + yunit
+                title = "measured %s WSS access time (%.2f%% writes)" % (long, rw)
+                yrange = None #(4096, 2**26)
+
+                fname = "%s_full_%s" % (name, kind)
+                gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
+                        yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format)
+
+        # per-sample delta measures
+        for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
+            graphs = []
+            for (x, y, yminus, tag, idx) in columns:
+                if not (yminus is None) and tag != 'all':
+                    label = "%s" % PMO_MEM[tag]
+                    graphs += [
+                        (tmp.name, 1, idx + offset, label),
+                        ]
+            xlabel = "working set size (kilobytes)"
+            ylabel = "per-sample delta to hot access " + yunit
+            title = "measured %s overhead (%.2f%% writes)"  % (long, rw)
+            yrange = None
+
+            fname = "%s_delta_%s" % (name, kind)
+            gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
+                    yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format)
+        del tmp
+
+        # stats delta
+        # find hot column
+        col = None
+        for (x, y, yminus, tag, idx) in columns:
+            if x == 0 and y == 9 and yminus is None and tag == 'all':
+                col = idx
+                break
+        # normalize based on third hot access
+        # +1/-1 to get zero-based indices; Gnuplot wants 1-based indices
+        hot_avg = data[:,col - 1].copy()
+        hot_wc  = data[:,col + 1].copy()
+        for (x, y, yminus, tag, idx) in columns:
+            data[:,idx - 1] -= hot_avg
+            data[:,idx + 1] -= hot_wc
+
+        tmp = write_csv_file(None, data)
+
+        for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
+            graphs = []
+            for (x, y, yminus, tag, idx) in columns:
+                if yminus is None and tag != 'all':
+                    label = PMO_COL_LABEL[y][0]
+                    label = PMO_MEM[tag]
+                    graphs += [
+                        (tmp.name, 1, idx + offset, label),
+                        ]
+                xlabel = "working set size (kilobytes)"
+                ylabel = "delta to third hot access " + yunit
+                title = "difference of %s access costs (%.2f%% writes)" % (long, rw)
+                yrange = None
+
+                fname = "%s_diff_%s" % (name, kind)
+                gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
+                        yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format)
+        del tmp
+
     def plot_file(self, datafile):
         bname = basename(datafile)
         name, ext = splitext(bname)
@@ -297,6 +449,8 @@ class CyclePlotter(defapp.App):
             self.plot_preempt_migrate(datafile, name, conf)
         elif 'pmo-aggr' in conf:
             self.plot_pmo_aggr(datafile, name, conf)
+        elif 'pmo-all' in conf:
+            self.plot_pmo_all(datafile, name, conf)
         else:
             self.err("Skipped '%s'; unkown experiment type."
                      % bname)
-- 
cgit v1.2.2