diff options
Diffstat (limited to 'plot_pm2.py')
-rwxr-xr-x | plot_pm2.py | 196 |
1 files changed, 175 insertions, 21 deletions
diff --git a/plot_pm2.py b/plot_pm2.py index 3c0174d..1332825 100755 --- a/plot_pm2.py +++ b/plot_pm2.py | |||
@@ -4,9 +4,10 @@ from optparse import make_option as o | |||
4 | from tempfile import NamedTemporaryFile as Tmp | 4 | from tempfile import NamedTemporaryFile as Tmp |
5 | 5 | ||
6 | from collections import defaultdict | 6 | from collections import defaultdict |
7 | from itertools import izip | ||
7 | 8 | ||
8 | import numpy as np | 9 | import numpy as np |
9 | from util import load_csv_file, select | 10 | from util import * |
10 | 11 | ||
11 | import stats | 12 | import stats |
12 | import defapp | 13 | import defapp |
@@ -70,10 +71,14 @@ PMO_AGGR_SUBPLOTS = [ | |||
70 | (0, 8, None, False), | 71 | (0, 8, None, False), |
71 | (0, 9, None, False), | 72 | (0, 9, None, False), |
72 | (0, 10, None, True), | 73 | (0, 10, None, True), |
73 | (0, 10, 6, True), | 74 | # (0, 10, 6, True), |
74 | (0, 10, 7, True), | 75 | # (0, 10, 7, True), |
76 | # (0, 10, 8, True), | ||
75 | (0, 10, 9, True), | 77 | (0, 10, 9, True), |
76 | (0, 10, 8, True), | 78 | ] |
79 | |||
80 | PMO_AGGR_COMBINE = [ | ||
81 | [(6, 'all'), (7, 'all'), (8, 'all'), (9, 'all')] | ||
77 | ] | 82 | ] |
78 | 83 | ||
79 | PMO_COL_LABEL = [('measurement', 'sample', 'index'), | 84 | PMO_COL_LABEL = [('measurement', 'sample', 'index'), |
@@ -100,6 +105,7 @@ options = [ | |||
100 | o(None, '--split', action='store_true', dest='split'), | 105 | o(None, '--split', action='store_true', dest='split'), |
101 | o(None, '--extend', action='store', type='float', dest='extend'), | 106 | o(None, '--extend', action='store', type='float', dest='extend'), |
102 | o(None, '--aggregate', action='store_true', dest='aggregate'), | 107 | o(None, '--aggregate', action='store_true', dest='aggregate'), |
108 | o('-c', '--cycles-per-usec', action='store', type='float', dest='cycles_per_usec'), | ||
103 | ] | 109 | ] |
104 | 110 | ||
105 | defaults = { | 111 | defaults = { |
@@ -109,6 +115,7 @@ defaults = { | |||
109 | 'wide' : False, | 115 | 'wide' : False, |
110 | 'aggregate' : False, | 116 | 'aggregate' : False, |
111 | 'extend' : 1.5, | 117 | 'extend' : 1.5, |
118 | 'cycles_per_usec' : None, | ||
112 | } | 119 | } |
113 | 120 | ||
114 | def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True): | 121 | def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True): |
@@ -149,6 +156,8 @@ class CyclePlotter(defapp.App): | |||
149 | # by tag -> by wcycle -> list of data points) | 156 | # by tag -> by wcycle -> list of data points) |
150 | by_tag = defaultdict(lambda: defaultdict(list)) | 157 | by_tag = defaultdict(lambda: defaultdict(list)) |
151 | 158 | ||
159 | host = None | ||
160 | |||
152 | for i, datafile in enumerate(datafiles): | 161 | for i, datafile in enumerate(datafiles): |
153 | print '[%d/%d] Processing %s...' % (i + 1, len(datafiles), datafile) | 162 | print '[%d/%d] Processing %s...' % (i + 1, len(datafiles), datafile) |
154 | bname = basename(datafile) | 163 | bname = basename(datafile) |
@@ -162,9 +171,14 @@ class CyclePlotter(defapp.App): | |||
162 | if plots is None: | 171 | if plots is None: |
163 | print "Skipping %s..." % datafile | 172 | print "Skipping %s..." % datafile |
164 | return | 173 | return |
174 | if not host: | ||
175 | host = conf['host'] | ||
176 | if host != conf['host']: | ||
177 | self.err('Mixing data from two hosts! (%s, %s)' % (host, conf['host'])) | ||
178 | self.err('Aborting.') | ||
179 | return | ||
165 | wss = int(conf['wss']) | 180 | wss = int(conf['wss']) |
166 | wcycle = int(conf['wcycle']) | 181 | wcycle = int(conf['wcycle']) |
167 | host = conf['host'] | ||
168 | for (rows, xcol, ycol, yminus, tag) in plots: | 182 | for (rows, xcol, ycol, yminus, tag) in plots: |
169 | clean = stats.iqr_remove_outliers(rows, extend=self.options.extend) | 183 | clean = stats.iqr_remove_outliers(rows, extend=self.options.extend) |
170 | vals = clean[:,1] | 184 | vals = clean[:,1] |
@@ -173,28 +187,64 @@ class CyclePlotter(defapp.App): | |||
173 | wc = np.max(vals) | 187 | wc = np.max(vals) |
174 | n = len(vals) | 188 | n = len(vals) |
175 | 189 | ||
176 | xtag = PMO_COL_LABEL[xcol][1] | 190 | key = (xcol, ycol, yminus, tag) |
177 | ytag = PMO_COL_LABEL[ycol][1] | 191 | by_tag[key][wcycle].append((wss, avg, std, wc, n, len(rows) - n)) |
178 | dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" | ||
179 | code = "code=%s-%s-%s-%s" % \ | ||
180 | (xcol, ycol, yminus, tag) | ||
181 | figname = "host=%s_%s%s-vs-%s_%s_%s" % \ | ||
182 | (host, ytag, dtag, xtag, tag, code) | ||
183 | by_tag[figname][wcycle].append((wss, avg, std, wc, n, len(rows) - n)) | ||
184 | del plots | 192 | del plots |
185 | else: | 193 | else: |
186 | self.err("Warning: '%s' is not a PMO experiment; skipping." % bname) | 194 | self.err("Warning: '%s' is not a PMO experiment; skipping." % bname) |
187 | 195 | ||
188 | for figname in by_tag: | 196 | all_wss = set() |
189 | for wcycle in by_tag[figname]: | 197 | all_wcycle = set() |
190 | data = by_tag[figname][wcycle] | 198 | |
199 | for key in by_tag: | ||
200 | for wcycle in by_tag[key]: | ||
201 | all_wcycle.add(wcycle) | ||
202 | |||
203 | data = by_tag[key][wcycle] | ||
191 | # sort by increasing WSS | 204 | # sort by increasing WSS |
192 | data.sort(key=lambda row: row[0]) | 205 | data.sort(key=lambda row: row[0]) |
193 | f = open('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), 'w') | ||
194 | for row in data: | 206 | for row in data: |
195 | f.write(", ".join([str(x) for x in row])) | 207 | all_wss.add(row[0]) |
196 | f.write('\n') | 208 | |
197 | f.close() | 209 | (xcol, ycol, yminus, tag) = key |
210 | |||
211 | xtag = PMO_COL_LABEL[xcol][1] | ||
212 | ytag = PMO_COL_LABEL[ycol][1] | ||
213 | dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" | ||
214 | code = "code=%s-%s-%s-%s" % key | ||
215 | figname = "host=%s_%s%s-vs-%s_%s_%s" % \ | ||
216 | (host, ytag, dtag, xtag, tag, code) | ||
217 | |||
218 | write_csv_file('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), data) | ||
219 | |||
220 | |||
221 | mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]] | ||
222 | |||
223 | for wcycle in all_wcycle: | ||
224 | try: | ||
225 | rows = [[wss] for wss in sorted(all_wss)] | ||
226 | header = ['wss'] | ||
227 | for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS: | ||
228 | tags = ['all'] | ||
229 | if split: | ||
230 | tags += mems | ||
231 | for tag in tags: | ||
232 | col_name = "%s %s" % (PMO_COL_LABEL[ycol][1], tag) | ||
233 | if not yminus is None: | ||
234 | col_name += ' - ' + PMO_COL_LABEL[yminus][1] | ||
235 | header += [col_name + " avg", col_name + " std", col_name + " wc"] | ||
236 | key = (x, y, yminus, tag) | ||
237 | data = by_tag[key][wcycle] | ||
238 | for r, d in izip(rows, data): | ||
239 | if r[0] != d[0]: | ||
240 | print "mismatch", r[0], d[0], key, wcycle | ||
241 | assert r[0] == d[0] # working set size must match | ||
242 | r += d[1:4] # (average, std, wc) | ||
243 | write_csv_file('pmo-all_wcycle=%d_host=%s.csv' % (wcycle, host), | ||
244 | rows, header, width=max([len(h) for h in header])) | ||
245 | except AssertionError: | ||
246 | self.err("Data missing for wcycle=%d!" % wcycle) | ||
247 | |||
198 | 248 | ||
199 | def plot_preempt_migrate(self, datafile, name, conf): | 249 | def plot_preempt_migrate(self, datafile, name, conf): |
200 | plots = self.setup_pmo_graphs(datafile, conf) | 250 | plots = self.setup_pmo_graphs(datafile, conf) |
@@ -259,7 +309,7 @@ class CyclePlotter(defapp.App): | |||
259 | xtag = PMO_COL_LABEL[xcol][1] | 309 | xtag = PMO_COL_LABEL[xcol][1] |
260 | ytag = PMO_COL_LABEL[ycol][1] | 310 | ytag = PMO_COL_LABEL[ycol][1] |
261 | dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" | 311 | dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" |
262 | figname = "%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag) | 312 | figname = name #"%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag) |
263 | xunit = PMO_COL_LABEL[xcol][2] | 313 | xunit = PMO_COL_LABEL[xcol][2] |
264 | yunit = PMO_COL_LABEL[ycol][2] | 314 | yunit = PMO_COL_LABEL[ycol][2] |
265 | ylabel = PMO_COL_LABEL[ycol][0] | 315 | ylabel = PMO_COL_LABEL[ycol][0] |
@@ -282,10 +332,112 @@ class CyclePlotter(defapp.App): | |||
282 | ] | 332 | ] |
283 | xlabel = "working set size (kilobytes)" | 333 | xlabel = "working set size (kilobytes)" |
284 | 334 | ||
335 | yrange = (4096, 2**26) if yminus is None else None | ||
336 | |||
285 | gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=figname, | 337 | gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=figname, |
338 | yrange=yrange, | ||
286 | logscale="xy 2" if yminus is None else "x 2", | 339 | logscale="xy 2" if yminus is None else "x 2", |
287 | format=self.options.format) | 340 | format=self.options.format) |
288 | 341 | ||
342 | def plot_pmo_all(self, datafile, name, conf): | ||
343 | host = conf['host'] | ||
344 | mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]] | ||
345 | columns = [] | ||
346 | idx = 2 | ||
347 | for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS: | ||
348 | tags = ['all'] | ||
349 | if split: | ||
350 | tags += mems | ||
351 | for tag in tags: | ||
352 | columns.append((x, y, yminus, tag, idx)) | ||
353 | idx += 3 | ||
354 | |||
355 | data = load_csv_file(datafile) | ||
356 | if self.options.cycles_per_usec: | ||
357 | yunit = "(us)" | ||
358 | data[:, 1:] /= self.options.cycles_per_usec | ||
359 | else: | ||
360 | yunit = "(cycles)" | ||
361 | tmp = write_csv_file(None, data) | ||
362 | |||
363 | rw = int(conf['wcycle']) | ||
364 | rw = 1.0 / rw * 100 if rw != 0 else 0 | ||
365 | |||
366 | # raw measures | ||
367 | for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]: | ||
368 | graphs = [] | ||
369 | for (x, y, yminus, tag, idx) in columns: | ||
370 | if yminus is None: | ||
371 | label = PMO_COL_LABEL[y][0] | ||
372 | if y == 10: | ||
373 | label += " from %s" % PMO_MEM[tag] | ||
374 | graphs += [ | ||
375 | (tmp.name, 1, idx + offset, label), | ||
376 | ] | ||
377 | xlabel = "working set size (kilobytes)" | ||
378 | ylabel = "time to complete access " + yunit | ||
379 | title = "measured %s WSS access time (%.2f%% writes)" % (long, rw) | ||
380 | yrange = None #(4096, 2**26) | ||
381 | |||
382 | fname = "%s_full_%s" % (name, kind) | ||
383 | gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname, | ||
384 | yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format) | ||
385 | |||
386 | # per-sample delta measures | ||
387 | for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]: | ||
388 | graphs = [] | ||
389 | for (x, y, yminus, tag, idx) in columns: | ||
390 | if not (yminus is None) and tag != 'all': | ||
391 | label = "%s" % PMO_MEM[tag] | ||
392 | graphs += [ | ||
393 | (tmp.name, 1, idx + offset, label), | ||
394 | ] | ||
395 | xlabel = "working set size (kilobytes)" | ||
396 | ylabel = "per-sample delta to hot access " + yunit | ||
397 | title = "measured %s overhead (%.2f%% writes)" % (long, rw) | ||
398 | yrange = None | ||
399 | |||
400 | fname = "%s_delta_%s" % (name, kind) | ||
401 | gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname, | ||
402 | yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format) | ||
403 | del tmp | ||
404 | |||
405 | # stats delta | ||
406 | # find hot column | ||
407 | col = None | ||
408 | for (x, y, yminus, tag, idx) in columns: | ||
409 | if x == 0 and y == 9 and yminus is None and tag == 'all': | ||
410 | col = idx | ||
411 | break | ||
412 | # normalize based on third hot access | ||
413 | # +1/-1 to get zero-based indices; Gnuplot wants 1-based indices | ||
414 | hot_avg = data[:,col - 1].copy() | ||
415 | hot_wc = data[:,col + 1].copy() | ||
416 | for (x, y, yminus, tag, idx) in columns: | ||
417 | data[:,idx - 1] -= hot_avg | ||
418 | data[:,idx + 1] -= hot_wc | ||
419 | |||
420 | tmp = write_csv_file(None, data) | ||
421 | |||
422 | for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]: | ||
423 | graphs = [] | ||
424 | for (x, y, yminus, tag, idx) in columns: | ||
425 | if yminus is None and tag != 'all': | ||
426 | label = PMO_COL_LABEL[y][0] | ||
427 | label = PMO_MEM[tag] | ||
428 | graphs += [ | ||
429 | (tmp.name, 1, idx + offset, label), | ||
430 | ] | ||
431 | xlabel = "working set size (kilobytes)" | ||
432 | ylabel = "delta to third hot access " + yunit | ||
433 | title = "difference of %s access costs (%.2f%% writes)" % (long, rw) | ||
434 | yrange = None | ||
435 | |||
436 | fname = "%s_diff_%s" % (name, kind) | ||
437 | gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname, | ||
438 | yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format) | ||
439 | del tmp | ||
440 | |||
289 | def plot_file(self, datafile): | 441 | def plot_file(self, datafile): |
290 | bname = basename(datafile) | 442 | bname = basename(datafile) |
291 | name, ext = splitext(bname) | 443 | name, ext = splitext(bname) |
@@ -297,6 +449,8 @@ class CyclePlotter(defapp.App): | |||
297 | self.plot_preempt_migrate(datafile, name, conf) | 449 | self.plot_preempt_migrate(datafile, name, conf) |
298 | elif 'pmo-aggr' in conf: | 450 | elif 'pmo-aggr' in conf: |
299 | self.plot_pmo_aggr(datafile, name, conf) | 451 | self.plot_pmo_aggr(datafile, name, conf) |
452 | elif 'pmo-all' in conf: | ||
453 | self.plot_pmo_all(datafile, name, conf) | ||
300 | else: | 454 | else: |
301 | self.err("Skipped '%s'; unkown experiment type." | 455 | self.err("Skipped '%s'; unkown experiment type." |
302 | % bname) | 456 | % bname) |