aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn B. Brandenburg <bbb@cs.unc.edu>2010-03-27 20:10:00 -0400
committerBjörn B. Brandenburg <bbb@cs.unc.edu>2010-03-27 20:10:00 -0400
commit031d41687127b7eb074229dbc114eb52340472c5 (patch)
treea0a0b4c1b7c91a1b49ed7432007c2fc9531c708d
parent78d011ded95ac9dec18e68699a8ebf4fb3dc8797 (diff)
More work on aggregate plotting.
-rwxr-xr-xplot_pm2.py196
1 files changed, 175 insertions, 21 deletions
diff --git a/plot_pm2.py b/plot_pm2.py
index 3c0174d..1332825 100755
--- a/plot_pm2.py
+++ b/plot_pm2.py
@@ -4,9 +4,10 @@ from optparse import make_option as o
4from tempfile import NamedTemporaryFile as Tmp 4from tempfile import NamedTemporaryFile as Tmp
5 5
6from collections import defaultdict 6from collections import defaultdict
7from itertools import izip
7 8
8import numpy as np 9import numpy as np
9from util import load_csv_file, select 10from util import *
10 11
11import stats 12import stats
12import defapp 13import defapp
@@ -70,10 +71,14 @@ PMO_AGGR_SUBPLOTS = [
70 (0, 8, None, False), 71 (0, 8, None, False),
71 (0, 9, None, False), 72 (0, 9, None, False),
72 (0, 10, None, True), 73 (0, 10, None, True),
73 (0, 10, 6, True), 74# (0, 10, 6, True),
74 (0, 10, 7, True), 75# (0, 10, 7, True),
76# (0, 10, 8, True),
75 (0, 10, 9, True), 77 (0, 10, 9, True),
76 (0, 10, 8, True), 78]
79
80PMO_AGGR_COMBINE = [
81 [(6, 'all'), (7, 'all'), (8, 'all'), (9, 'all')]
77] 82]
78 83
79PMO_COL_LABEL = [('measurement', 'sample', 'index'), 84PMO_COL_LABEL = [('measurement', 'sample', 'index'),
@@ -100,6 +105,7 @@ options = [
100 o(None, '--split', action='store_true', dest='split'), 105 o(None, '--split', action='store_true', dest='split'),
101 o(None, '--extend', action='store', type='float', dest='extend'), 106 o(None, '--extend', action='store', type='float', dest='extend'),
102 o(None, '--aggregate', action='store_true', dest='aggregate'), 107 o(None, '--aggregate', action='store_true', dest='aggregate'),
108 o('-c', '--cycles-per-usec', action='store', type='float', dest='cycles_per_usec'),
103 ] 109 ]
104 110
105defaults = { 111defaults = {
@@ -109,6 +115,7 @@ defaults = {
109 'wide' : False, 115 'wide' : False,
110 'aggregate' : False, 116 'aggregate' : False,
111 'extend' : 1.5, 117 'extend' : 1.5,
118 'cycles_per_usec' : None,
112 } 119 }
113 120
114def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True): 121def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True):
@@ -149,6 +156,8 @@ class CyclePlotter(defapp.App):
149 # by tag -> by wcycle -> list of data points) 156 # by tag -> by wcycle -> list of data points)
150 by_tag = defaultdict(lambda: defaultdict(list)) 157 by_tag = defaultdict(lambda: defaultdict(list))
151 158
159 host = None
160
152 for i, datafile in enumerate(datafiles): 161 for i, datafile in enumerate(datafiles):
153 print '[%d/%d] Processing %s...' % (i + 1, len(datafiles), datafile) 162 print '[%d/%d] Processing %s...' % (i + 1, len(datafiles), datafile)
154 bname = basename(datafile) 163 bname = basename(datafile)
@@ -162,9 +171,14 @@ class CyclePlotter(defapp.App):
162 if plots is None: 171 if plots is None:
163 print "Skipping %s..." % datafile 172 print "Skipping %s..." % datafile
164 return 173 return
174 if not host:
175 host = conf['host']
176 if host != conf['host']:
177 self.err('Mixing data from two hosts! (%s, %s)' % (host, conf['host']))
178 self.err('Aborting.')
179 return
165 wss = int(conf['wss']) 180 wss = int(conf['wss'])
166 wcycle = int(conf['wcycle']) 181 wcycle = int(conf['wcycle'])
167 host = conf['host']
168 for (rows, xcol, ycol, yminus, tag) in plots: 182 for (rows, xcol, ycol, yminus, tag) in plots:
169 clean = stats.iqr_remove_outliers(rows, extend=self.options.extend) 183 clean = stats.iqr_remove_outliers(rows, extend=self.options.extend)
170 vals = clean[:,1] 184 vals = clean[:,1]
@@ -173,28 +187,64 @@ class CyclePlotter(defapp.App):
173 wc = np.max(vals) 187 wc = np.max(vals)
174 n = len(vals) 188 n = len(vals)
175 189
176 xtag = PMO_COL_LABEL[xcol][1] 190 key = (xcol, ycol, yminus, tag)
177 ytag = PMO_COL_LABEL[ycol][1] 191 by_tag[key][wcycle].append((wss, avg, std, wc, n, len(rows) - n))
178 dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
179 code = "code=%s-%s-%s-%s" % \
180 (xcol, ycol, yminus, tag)
181 figname = "host=%s_%s%s-vs-%s_%s_%s" % \
182 (host, ytag, dtag, xtag, tag, code)
183 by_tag[figname][wcycle].append((wss, avg, std, wc, n, len(rows) - n))
184 del plots 192 del plots
185 else: 193 else:
186 self.err("Warning: '%s' is not a PMO experiment; skipping." % bname) 194 self.err("Warning: '%s' is not a PMO experiment; skipping." % bname)
187 195
188 for figname in by_tag: 196 all_wss = set()
189 for wcycle in by_tag[figname]: 197 all_wcycle = set()
190 data = by_tag[figname][wcycle] 198
199 for key in by_tag:
200 for wcycle in by_tag[key]:
201 all_wcycle.add(wcycle)
202
203 data = by_tag[key][wcycle]
191 # sort by increasing WSS 204 # sort by increasing WSS
192 data.sort(key=lambda row: row[0]) 205 data.sort(key=lambda row: row[0])
193 f = open('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), 'w')
194 for row in data: 206 for row in data:
195 f.write(", ".join([str(x) for x in row])) 207 all_wss.add(row[0])
196 f.write('\n') 208
197 f.close() 209 (xcol, ycol, yminus, tag) = key
210
211 xtag = PMO_COL_LABEL[xcol][1]
212 ytag = PMO_COL_LABEL[ycol][1]
213 dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
214 code = "code=%s-%s-%s-%s" % key
215 figname = "host=%s_%s%s-vs-%s_%s_%s" % \
216 (host, ytag, dtag, xtag, tag, code)
217
218 write_csv_file('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), data)
219
220
221 mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]]
222
223 for wcycle in all_wcycle:
224 try:
225 rows = [[wss] for wss in sorted(all_wss)]
226 header = ['wss']
227 for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS:
228 tags = ['all']
229 if split:
230 tags += mems
231 for tag in tags:
232 col_name = "%s %s" % (PMO_COL_LABEL[ycol][1], tag)
233 if not yminus is None:
234 col_name += ' - ' + PMO_COL_LABEL[yminus][1]
235 header += [col_name + " avg", col_name + " std", col_name + " wc"]
236 key = (x, y, yminus, tag)
237 data = by_tag[key][wcycle]
238 for r, d in izip(rows, data):
239 if r[0] != d[0]:
240 print "mismatch", r[0], d[0], key, wcycle
241 assert r[0] == d[0] # working set size must match
242 r += d[1:4] # (average, std, wc)
243 write_csv_file('pmo-all_wcycle=%d_host=%s.csv' % (wcycle, host),
244 rows, header, width=max([len(h) for h in header]))
245 except AssertionError:
246 self.err("Data missing for wcycle=%d!" % wcycle)
247
198 248
199 def plot_preempt_migrate(self, datafile, name, conf): 249 def plot_preempt_migrate(self, datafile, name, conf):
200 plots = self.setup_pmo_graphs(datafile, conf) 250 plots = self.setup_pmo_graphs(datafile, conf)
@@ -259,7 +309,7 @@ class CyclePlotter(defapp.App):
259 xtag = PMO_COL_LABEL[xcol][1] 309 xtag = PMO_COL_LABEL[xcol][1]
260 ytag = PMO_COL_LABEL[ycol][1] 310 ytag = PMO_COL_LABEL[ycol][1]
261 dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" 311 dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
262 figname = "%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag) 312 figname = name #"%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag)
263 xunit = PMO_COL_LABEL[xcol][2] 313 xunit = PMO_COL_LABEL[xcol][2]
264 yunit = PMO_COL_LABEL[ycol][2] 314 yunit = PMO_COL_LABEL[ycol][2]
265 ylabel = PMO_COL_LABEL[ycol][0] 315 ylabel = PMO_COL_LABEL[ycol][0]
@@ -282,10 +332,112 @@ class CyclePlotter(defapp.App):
282 ] 332 ]
283 xlabel = "working set size (kilobytes)" 333 xlabel = "working set size (kilobytes)"
284 334
335 yrange = (4096, 2**26) if yminus is None else None
336
285 gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=figname, 337 gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=figname,
338 yrange=yrange,
286 logscale="xy 2" if yminus is None else "x 2", 339 logscale="xy 2" if yminus is None else "x 2",
287 format=self.options.format) 340 format=self.options.format)
288 341
342 def plot_pmo_all(self, datafile, name, conf):
343 host = conf['host']
344 mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]]
345 columns = []
346 idx = 2
347 for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS:
348 tags = ['all']
349 if split:
350 tags += mems
351 for tag in tags:
352 columns.append((x, y, yminus, tag, idx))
353 idx += 3
354
355 data = load_csv_file(datafile)
356 if self.options.cycles_per_usec:
357 yunit = "(us)"
358 data[:, 1:] /= self.options.cycles_per_usec
359 else:
360 yunit = "(cycles)"
361 tmp = write_csv_file(None, data)
362
363 rw = int(conf['wcycle'])
364 rw = 1.0 / rw * 100 if rw != 0 else 0
365
366 # raw measures
367 for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
368 graphs = []
369 for (x, y, yminus, tag, idx) in columns:
370 if yminus is None:
371 label = PMO_COL_LABEL[y][0]
372 if y == 10:
373 label += " from %s" % PMO_MEM[tag]
374 graphs += [
375 (tmp.name, 1, idx + offset, label),
376 ]
377 xlabel = "working set size (kilobytes)"
378 ylabel = "time to complete access " + yunit
379 title = "measured %s WSS access time (%.2f%% writes)" % (long, rw)
380 yrange = None #(4096, 2**26)
381
382 fname = "%s_full_%s" % (name, kind)
383 gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
384 yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format)
385
386 # per-sample delta measures
387 for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
388 graphs = []
389 for (x, y, yminus, tag, idx) in columns:
390 if not (yminus is None) and tag != 'all':
391 label = "%s" % PMO_MEM[tag]
392 graphs += [
393 (tmp.name, 1, idx + offset, label),
394 ]
395 xlabel = "working set size (kilobytes)"
396 ylabel = "per-sample delta to hot access " + yunit
397 title = "measured %s overhead (%.2f%% writes)" % (long, rw)
398 yrange = None
399
400 fname = "%s_delta_%s" % (name, kind)
401 gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
402 yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format)
403 del tmp
404
405 # stats delta
406 # find hot column
407 col = None
408 for (x, y, yminus, tag, idx) in columns:
409 if x == 0 and y == 9 and yminus is None and tag == 'all':
410 col = idx
411 break
412 # normalize based on third hot access
413 # +1/-1 to get zero-based indices; Gnuplot wants 1-based indices
414 hot_avg = data[:,col - 1].copy()
415 hot_wc = data[:,col + 1].copy()
416 for (x, y, yminus, tag, idx) in columns:
417 data[:,idx - 1] -= hot_avg
418 data[:,idx + 1] -= hot_wc
419
420 tmp = write_csv_file(None, data)
421
422 for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
423 graphs = []
424 for (x, y, yminus, tag, idx) in columns:
425 if yminus is None and tag != 'all':
426 label = PMO_COL_LABEL[y][0]
427 label = PMO_MEM[tag]
428 graphs += [
429 (tmp.name, 1, idx + offset, label),
430 ]
431 xlabel = "working set size (kilobytes)"
432 ylabel = "delta to third hot access " + yunit
433 title = "difference of %s access costs (%.2f%% writes)" % (long, rw)
434 yrange = None
435
436 fname = "%s_diff_%s" % (name, kind)
437 gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
438 yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format)
439 del tmp
440
289 def plot_file(self, datafile): 441 def plot_file(self, datafile):
290 bname = basename(datafile) 442 bname = basename(datafile)
291 name, ext = splitext(bname) 443 name, ext = splitext(bname)
@@ -297,6 +449,8 @@ class CyclePlotter(defapp.App):
297 self.plot_preempt_migrate(datafile, name, conf) 449 self.plot_preempt_migrate(datafile, name, conf)
298 elif 'pmo-aggr' in conf: 450 elif 'pmo-aggr' in conf:
299 self.plot_pmo_aggr(datafile, name, conf) 451 self.plot_pmo_aggr(datafile, name, conf)
452 elif 'pmo-all' in conf:
453 self.plot_pmo_all(datafile, name, conf)
300 else: 454 else:
301 self.err("Skipped '%s'; unkown experiment type." 455 self.err("Skipped '%s'; unkown experiment type."
302 % bname) 456 % bname)