From 137365d1ea9c736f67184b6ed0bb683326a55243 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20B=2E=20Brandenburg?= <bbb@cs.unc.edu>
Date: Sat, 27 Mar 2010 11:46:37 -0400
Subject: First steps in aggregate plotting.

---
 plot_pm2.py | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 130 insertions(+), 9 deletions(-)

(limited to 'plot_pm2.py')

diff --git a/plot_pm2.py b/plot_pm2.py
index 866b4b2..3c0174d 100755
--- a/plot_pm2.py
+++ b/plot_pm2.py
@@ -3,6 +3,8 @@ from os.path  import splitext, basename
 from optparse import make_option as o
 from tempfile import NamedTemporaryFile as Tmp
 
+from collections import defaultdict
+
 import numpy as np
 from util import load_csv_file, select
 
@@ -61,6 +63,19 @@ PMO_SUBPLOTS = [
     (3, 10,    9,  True),
 ]
 
+PMO_AGGR_SUBPLOTS = [
+    # x, y, y-delta, split according to mem-hierarchy?
+    (0,  6, None, False),
+    (0,  7, None, False),
+    (0,  8, None, False),
+    (0,  9, None, False),
+    (0, 10, None,  True),
+    (0, 10,    6,  True),
+    (0, 10,    7,  True),
+    (0, 10,    9,  True),
+    (0, 10,    8,  True),
+]
+
 PMO_COL_LABEL = [('measurement', 'sample', 'index'),
                  ('write cycles', 'wcycle', 'every nth access'),
                  ('WSS', 'wcc', 'kilobytes'),
@@ -84,6 +99,7 @@ options = [
     o(None, '--wide', action='store_true', dest='wide'),
     o(None, '--split', action='store_true', dest='split'),
     o(None, '--extend', action='store', type='float', dest='extend'),
+    o(None, '--aggregate', action='store_true', dest='aggregate'),
     ]
 
 defaults = {
@@ -91,6 +107,7 @@ defaults = {
     'paper'  : False,
     'split'  : False,
     'wide'   : False,
+    'aggregate' : False,
     'extend' : 1.5,
     }
 
@@ -105,37 +122,94 @@ def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: Tru
 class CyclePlotter(defapp.App):
     def __init__(self):
         defapp.App.__init__(self, options, defaults, no_std_opts=True)
+        self.aggregate_data = []
 
-    def setup_pmo_graphs(self, datafile, conf):
+    def setup_pmo_graphs(self, datafile, conf, subplots=PMO_SUBPLOTS):
         host = conf['host']
         if host in MACHINE_TOPOLOGY:
             (cpus, hier) = MACHINE_TOPOLOGY[host]
             plots = []
             data = load_csv_file(datafile, dtype=int)
-            for (xcol, ycol, yminus, by_mem_hierarchy) in PMO_SUBPLOTS:
+            for (xcol, ycol, yminus, by_mem_hierarchy) in subplots:
                 sub = [('all', lambda x, y: True)]
                 if by_mem_hierarchy:
                     sub += hier
                 for tag, test in sub:
-                    tmp    = Tmp()
                     rows = extract_cols(data,
                                         xcol, ycol, yminus,
                                         cpu_filter=test)
-                    for row in rows:
-                        tmp.write("%s, %s\n" % (row[0], row[1]))
-                    tmp.flush()
-                    plots.append((tmp, xcol, ycol, yminus, tag, rows))
+                    plots.append((rows, xcol, ycol, yminus, tag))
             return plots
         else:
             self.err('Unkown host: %s' % host)
             return None
 
+    def write_aggregate(self, datafiles):
+        # (wss, avg, wc, #avg, #wc)
+        # by tag -> by wcycle -> list of data points)
+        by_tag    = defaultdict(lambda: defaultdict(list))
+
+        for i, datafile in enumerate(datafiles):
+            print '[%d/%d] Processing %s...' % (i + 1, len(datafiles), datafile)
+            bname = basename(datafile)
+            name, ext = splitext(bname)
+            if ext != '.csv':
+                self.err("Warning: '%s' doesn't look like a CSV file."
+                         % bname)
+            conf    = decode(name)
+            if 'pmo' in conf:
+                plots = self.setup_pmo_graphs(datafile, conf, PMO_AGGR_SUBPLOTS)
+                if plots is None:
+                    print "Skipping %s..." % datafile
+                    return
+                wss = int(conf['wss'])
+                wcycle  = int(conf['wcycle'])
+                host = conf['host']
+                for (rows, xcol, ycol, yminus, tag) in plots:
+                    clean = stats.iqr_remove_outliers(rows, extend=self.options.extend)
+                    vals = clean[:,1]
+                    avg  = np.mean(vals)
+                    std  = np.std(vals, ddof=1)
+                    wc   = np.max(vals)
+                    n    = len(vals)
+
+                    xtag = PMO_COL_LABEL[xcol][1]
+                    ytag = PMO_COL_LABEL[ycol][1]
+                    dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
+                    code = "code=%s-%s-%s-%s" % \
+                        (xcol, ycol, yminus, tag)
+                    figname = "host=%s_%s%s-vs-%s_%s_%s" % \
+                        (host, ytag, dtag, xtag, tag, code)
+                    by_tag[figname][wcycle].append((wss, avg, std, wc, n, len(rows) - n))
+                del plots
+            else:
+                self.err("Warning: '%s' is not a PMO experiment; skipping." % bname)
+
+        for figname in by_tag:
+            for wcycle in by_tag[figname]:
+                data = by_tag[figname][wcycle]
+                # sort by increasing WSS
+                data.sort(key=lambda row: row[0])
+                f = open('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), 'w')
+                for row in data:
+                    f.write(", ".join([str(x) for x in row]))
+                    f.write('\n')
+                f.close()
+
     def plot_preempt_migrate(self, datafile, name, conf):
         plots = self.setup_pmo_graphs(datafile, conf)
         if plots is None:
             print "Skipping %s..." % datafile
             return
-        for (tmp, xcol, ycol, yminus, tag, rows) in plots:
+        else:
+            print 'Plotting %s...' % datafile
+        for (rows, xcol, ycol, yminus, tag) in plots:
+            # Write it to a temp file.
+            tmp    = Tmp()
+            for row in rows:
+                tmp.write("%s, %s\n" % (row[0], row[1]))
+            tmp.flush()
+
             xtag = PMO_COL_LABEL[xcol][1]
             ytag = PMO_COL_LABEL[ycol][1]
             dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
@@ -147,7 +221,6 @@ class CyclePlotter(defapp.App):
             title = "%s" % ylabel
             if ycol == 10:
                 title += " from %s" % PMO_MEM[tag]
-            title += "\\n"
             for key in conf:
                 if key in PMO_PARAM:
                     title += " %s=%s" % (PMO_PARAM[key], conf[key])
@@ -169,6 +242,49 @@ class CyclePlotter(defapp.App):
                     style='points',
                     format=self.options.format,
                     fname=figname)
+            del tmp # delete temporary file
+
+    def plot_pmo_aggr(self, datafile, name, conf):
+        fname = datafile
+        code = conf['code']
+        (xcol, ycol, yminus, tag) = code.split('-')
+
+        xcol = int(xcol)
+        ycol = int(ycol)
+        if yminus != "None":
+            yminus = int(ycol)
+        else:
+            yminus = None
+
+        xtag = PMO_COL_LABEL[xcol][1]
+        ytag = PMO_COL_LABEL[ycol][1]
+        dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
+        figname = "%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag)
+        xunit = PMO_COL_LABEL[xcol][2]
+        yunit = PMO_COL_LABEL[ycol][2]
+        ylabel = PMO_COL_LABEL[ycol][0]
+        xlabel = PMO_COL_LABEL[xcol][0]
+        title = "%s" % ylabel
+
+        ylabel="%s (%s)" % ("access cost" if yminus is None
+                            else "delta to %s" % PMO_COL_LABEL[yminus][0],
+                            yunit),
+        if ycol == 10:
+            title += " from %s" % PMO_MEM[tag]
+        for key in conf:
+            if key in PMO_PARAM:
+                title += " %s=%s" % (PMO_PARAM[key], conf[key])
+
+        graphs = [
+            #(fname, 1, 2, "average"),
+            "'%s' using 1:2:3 title 'average' with errorbars" % (fname),
+            (fname, 1, 4, "maximum"),
+            ]
+        xlabel = "working set size (kilobytes)"
+
+        gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=figname,
+                logscale="xy 2" if yminus is None else "x 2",
+                format=self.options.format)
 
     def plot_file(self, datafile):
         bname = basename(datafile)
@@ -179,6 +295,8 @@ class CyclePlotter(defapp.App):
         conf    = decode(name)
         if 'pmo' in conf:
             self.plot_preempt_migrate(datafile, name, conf)
+        elif 'pmo-aggr' in conf:
+            self.plot_pmo_aggr(datafile, name, conf)
         else:
             self.err("Skipped '%s'; unkown experiment type."
                      % bname)
@@ -187,5 +305,8 @@ class CyclePlotter(defapp.App):
         for datafile in self.args:
             self.plot_file(datafile)
 
+    def do_aggregate(self, _):
+        self.write_aggregate(self.args[1:])
+
 if __name__ == "__main__":
     CyclePlotter().launch()
-- 
cgit v1.2.2