diff options
Diffstat (limited to 'plot_pm2.py')
| -rwxr-xr-x | plot_pm2.py | 196 |
1 files changed, 175 insertions, 21 deletions
diff --git a/plot_pm2.py b/plot_pm2.py index 3c0174d..1332825 100755 --- a/plot_pm2.py +++ b/plot_pm2.py | |||
| @@ -4,9 +4,10 @@ from optparse import make_option as o | |||
| 4 | from tempfile import NamedTemporaryFile as Tmp | 4 | from tempfile import NamedTemporaryFile as Tmp |
| 5 | 5 | ||
| 6 | from collections import defaultdict | 6 | from collections import defaultdict |
| 7 | from itertools import izip | ||
| 7 | 8 | ||
| 8 | import numpy as np | 9 | import numpy as np |
| 9 | from util import load_csv_file, select | 10 | from util import * |
| 10 | 11 | ||
| 11 | import stats | 12 | import stats |
| 12 | import defapp | 13 | import defapp |
| @@ -70,10 +71,14 @@ PMO_AGGR_SUBPLOTS = [ | |||
| 70 | (0, 8, None, False), | 71 | (0, 8, None, False), |
| 71 | (0, 9, None, False), | 72 | (0, 9, None, False), |
| 72 | (0, 10, None, True), | 73 | (0, 10, None, True), |
| 73 | (0, 10, 6, True), | 74 | # (0, 10, 6, True), |
| 74 | (0, 10, 7, True), | 75 | # (0, 10, 7, True), |
| 76 | # (0, 10, 8, True), | ||
| 75 | (0, 10, 9, True), | 77 | (0, 10, 9, True), |
| 76 | (0, 10, 8, True), | 78 | ] |
| 79 | |||
| 80 | PMO_AGGR_COMBINE = [ | ||
| 81 | [(6, 'all'), (7, 'all'), (8, 'all'), (9, 'all')] | ||
| 77 | ] | 82 | ] |
| 78 | 83 | ||
| 79 | PMO_COL_LABEL = [('measurement', 'sample', 'index'), | 84 | PMO_COL_LABEL = [('measurement', 'sample', 'index'), |
| @@ -100,6 +105,7 @@ options = [ | |||
| 100 | o(None, '--split', action='store_true', dest='split'), | 105 | o(None, '--split', action='store_true', dest='split'), |
| 101 | o(None, '--extend', action='store', type='float', dest='extend'), | 106 | o(None, '--extend', action='store', type='float', dest='extend'), |
| 102 | o(None, '--aggregate', action='store_true', dest='aggregate'), | 107 | o(None, '--aggregate', action='store_true', dest='aggregate'), |
| 108 | o('-c', '--cycles-per-usec', action='store', type='float', dest='cycles_per_usec'), | ||
| 103 | ] | 109 | ] |
| 104 | 110 | ||
| 105 | defaults = { | 111 | defaults = { |
| @@ -109,6 +115,7 @@ defaults = { | |||
| 109 | 'wide' : False, | 115 | 'wide' : False, |
| 110 | 'aggregate' : False, | 116 | 'aggregate' : False, |
| 111 | 'extend' : 1.5, | 117 | 'extend' : 1.5, |
| 118 | 'cycles_per_usec' : None, | ||
| 112 | } | 119 | } |
| 113 | 120 | ||
| 114 | def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True): | 121 | def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True): |
| @@ -149,6 +156,8 @@ class CyclePlotter(defapp.App): | |||
| 149 | # by tag -> by wcycle -> list of data points) | 156 | # by tag -> by wcycle -> list of data points) |
| 150 | by_tag = defaultdict(lambda: defaultdict(list)) | 157 | by_tag = defaultdict(lambda: defaultdict(list)) |
| 151 | 158 | ||
| 159 | host = None | ||
| 160 | |||
| 152 | for i, datafile in enumerate(datafiles): | 161 | for i, datafile in enumerate(datafiles): |
| 153 | print '[%d/%d] Processing %s...' % (i + 1, len(datafiles), datafile) | 162 | print '[%d/%d] Processing %s...' % (i + 1, len(datafiles), datafile) |
| 154 | bname = basename(datafile) | 163 | bname = basename(datafile) |
| @@ -162,9 +171,14 @@ class CyclePlotter(defapp.App): | |||
| 162 | if plots is None: | 171 | if plots is None: |
| 163 | print "Skipping %s..." % datafile | 172 | print "Skipping %s..." % datafile |
| 164 | return | 173 | return |
| 174 | if not host: | ||
| 175 | host = conf['host'] | ||
| 176 | if host != conf['host']: | ||
| 177 | self.err('Mixing data from two hosts! (%s, %s)' % (host, conf['host'])) | ||
| 178 | self.err('Aborting.') | ||
| 179 | return | ||
| 165 | wss = int(conf['wss']) | 180 | wss = int(conf['wss']) |
| 166 | wcycle = int(conf['wcycle']) | 181 | wcycle = int(conf['wcycle']) |
| 167 | host = conf['host'] | ||
| 168 | for (rows, xcol, ycol, yminus, tag) in plots: | 182 | for (rows, xcol, ycol, yminus, tag) in plots: |
| 169 | clean = stats.iqr_remove_outliers(rows, extend=self.options.extend) | 183 | clean = stats.iqr_remove_outliers(rows, extend=self.options.extend) |
| 170 | vals = clean[:,1] | 184 | vals = clean[:,1] |
| @@ -173,28 +187,64 @@ class CyclePlotter(defapp.App): | |||
| 173 | wc = np.max(vals) | 187 | wc = np.max(vals) |
| 174 | n = len(vals) | 188 | n = len(vals) |
| 175 | 189 | ||
| 176 | xtag = PMO_COL_LABEL[xcol][1] | 190 | key = (xcol, ycol, yminus, tag) |
| 177 | ytag = PMO_COL_LABEL[ycol][1] | 191 | by_tag[key][wcycle].append((wss, avg, std, wc, n, len(rows) - n)) |
| 178 | dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" | ||
| 179 | code = "code=%s-%s-%s-%s" % \ | ||
| 180 | (xcol, ycol, yminus, tag) | ||
| 181 | figname = "host=%s_%s%s-vs-%s_%s_%s" % \ | ||
| 182 | (host, ytag, dtag, xtag, tag, code) | ||
| 183 | by_tag[figname][wcycle].append((wss, avg, std, wc, n, len(rows) - n)) | ||
| 184 | del plots | 192 | del plots |
| 185 | else: | 193 | else: |
| 186 | self.err("Warning: '%s' is not a PMO experiment; skipping." % bname) | 194 | self.err("Warning: '%s' is not a PMO experiment; skipping." % bname) |
| 187 | 195 | ||
| 188 | for figname in by_tag: | 196 | all_wss = set() |
| 189 | for wcycle in by_tag[figname]: | 197 | all_wcycle = set() |
| 190 | data = by_tag[figname][wcycle] | 198 | |
| 199 | for key in by_tag: | ||
| 200 | for wcycle in by_tag[key]: | ||
| 201 | all_wcycle.add(wcycle) | ||
| 202 | |||
| 203 | data = by_tag[key][wcycle] | ||
| 191 | # sort by increasing WSS | 204 | # sort by increasing WSS |
| 192 | data.sort(key=lambda row: row[0]) | 205 | data.sort(key=lambda row: row[0]) |
| 193 | f = open('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), 'w') | ||
| 194 | for row in data: | 206 | for row in data: |
| 195 | f.write(", ".join([str(x) for x in row])) | 207 | all_wss.add(row[0]) |
| 196 | f.write('\n') | 208 | |
| 197 | f.close() | 209 | (xcol, ycol, yminus, tag) = key |
| 210 | |||
| 211 | xtag = PMO_COL_LABEL[xcol][1] | ||
| 212 | ytag = PMO_COL_LABEL[ycol][1] | ||
| 213 | dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" | ||
| 214 | code = "code=%s-%s-%s-%s" % key | ||
| 215 | figname = "host=%s_%s%s-vs-%s_%s_%s" % \ | ||
| 216 | (host, ytag, dtag, xtag, tag, code) | ||
| 217 | |||
| 218 | write_csv_file('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), data) | ||
| 219 | |||
| 220 | |||
| 221 | mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]] | ||
| 222 | |||
| 223 | for wcycle in all_wcycle: | ||
| 224 | try: | ||
| 225 | rows = [[wss] for wss in sorted(all_wss)] | ||
| 226 | header = ['wss'] | ||
| 227 | for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS: | ||
| 228 | tags = ['all'] | ||
| 229 | if split: | ||
| 230 | tags += mems | ||
| 231 | for tag in tags: | ||
| 232 | col_name = "%s %s" % (PMO_COL_LABEL[ycol][1], tag) | ||
| 233 | if not yminus is None: | ||
| 234 | col_name += ' - ' + PMO_COL_LABEL[yminus][1] | ||
| 235 | header += [col_name + " avg", col_name + " std", col_name + " wc"] | ||
| 236 | key = (x, y, yminus, tag) | ||
| 237 | data = by_tag[key][wcycle] | ||
| 238 | for r, d in izip(rows, data): | ||
| 239 | if r[0] != d[0]: | ||
| 240 | print "mismatch", r[0], d[0], key, wcycle | ||
| 241 | assert r[0] == d[0] # working set size must match | ||
| 242 | r += d[1:4] # (average, std, wc) | ||
| 243 | write_csv_file('pmo-all_wcycle=%d_host=%s.csv' % (wcycle, host), | ||
| 244 | rows, header, width=max([len(h) for h in header])) | ||
| 245 | except AssertionError: | ||
| 246 | self.err("Data missing for wcycle=%d!" % wcycle) | ||
| 247 | |||
| 198 | 248 | ||
| 199 | def plot_preempt_migrate(self, datafile, name, conf): | 249 | def plot_preempt_migrate(self, datafile, name, conf): |
| 200 | plots = self.setup_pmo_graphs(datafile, conf) | 250 | plots = self.setup_pmo_graphs(datafile, conf) |
| @@ -259,7 +309,7 @@ class CyclePlotter(defapp.App): | |||
| 259 | xtag = PMO_COL_LABEL[xcol][1] | 309 | xtag = PMO_COL_LABEL[xcol][1] |
| 260 | ytag = PMO_COL_LABEL[ycol][1] | 310 | ytag = PMO_COL_LABEL[ycol][1] |
| 261 | dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" | 311 | dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else "" |
| 262 | figname = "%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag) | 312 | figname = name #"%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag) |
| 263 | xunit = PMO_COL_LABEL[xcol][2] | 313 | xunit = PMO_COL_LABEL[xcol][2] |
| 264 | yunit = PMO_COL_LABEL[ycol][2] | 314 | yunit = PMO_COL_LABEL[ycol][2] |
| 265 | ylabel = PMO_COL_LABEL[ycol][0] | 315 | ylabel = PMO_COL_LABEL[ycol][0] |
| @@ -282,10 +332,112 @@ class CyclePlotter(defapp.App): | |||
| 282 | ] | 332 | ] |
| 283 | xlabel = "working set size (kilobytes)" | 333 | xlabel = "working set size (kilobytes)" |
| 284 | 334 | ||
| 335 | yrange = (4096, 2**26) if yminus is None else None | ||
| 336 | |||
| 285 | gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=figname, | 337 | gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=figname, |
| 338 | yrange=yrange, | ||
| 286 | logscale="xy 2" if yminus is None else "x 2", | 339 | logscale="xy 2" if yminus is None else "x 2", |
| 287 | format=self.options.format) | 340 | format=self.options.format) |
| 288 | 341 | ||
| 342 | def plot_pmo_all(self, datafile, name, conf): | ||
| 343 | host = conf['host'] | ||
| 344 | mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]] | ||
| 345 | columns = [] | ||
| 346 | idx = 2 | ||
| 347 | for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS: | ||
| 348 | tags = ['all'] | ||
| 349 | if split: | ||
| 350 | tags += mems | ||
| 351 | for tag in tags: | ||
| 352 | columns.append((x, y, yminus, tag, idx)) | ||
| 353 | idx += 3 | ||
| 354 | |||
| 355 | data = load_csv_file(datafile) | ||
| 356 | if self.options.cycles_per_usec: | ||
| 357 | yunit = "(us)" | ||
| 358 | data[:, 1:] /= self.options.cycles_per_usec | ||
| 359 | else: | ||
| 360 | yunit = "(cycles)" | ||
| 361 | tmp = write_csv_file(None, data) | ||
| 362 | |||
| 363 | rw = int(conf['wcycle']) | ||
| 364 | rw = 1.0 / rw * 100 if rw != 0 else 0 | ||
| 365 | |||
| 366 | # raw measures | ||
| 367 | for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]: | ||
| 368 | graphs = [] | ||
| 369 | for (x, y, yminus, tag, idx) in columns: | ||
| 370 | if yminus is None: | ||
| 371 | label = PMO_COL_LABEL[y][0] | ||
| 372 | if y == 10: | ||
| 373 | label += " from %s" % PMO_MEM[tag] | ||
| 374 | graphs += [ | ||
| 375 | (tmp.name, 1, idx + offset, label), | ||
| 376 | ] | ||
| 377 | xlabel = "working set size (kilobytes)" | ||
| 378 | ylabel = "time to complete access " + yunit | ||
| 379 | title = "measured %s WSS access time (%.2f%% writes)" % (long, rw) | ||
| 380 | yrange = None #(4096, 2**26) | ||
| 381 | |||
| 382 | fname = "%s_full_%s" % (name, kind) | ||
| 383 | gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname, | ||
| 384 | yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format) | ||
| 385 | |||
| 386 | # per-sample delta measures | ||
| 387 | for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]: | ||
| 388 | graphs = [] | ||
| 389 | for (x, y, yminus, tag, idx) in columns: | ||
| 390 | if not (yminus is None) and tag != 'all': | ||
| 391 | label = "%s" % PMO_MEM[tag] | ||
| 392 | graphs += [ | ||
| 393 | (tmp.name, 1, idx + offset, label), | ||
| 394 | ] | ||
| 395 | xlabel = "working set size (kilobytes)" | ||
| 396 | ylabel = "per-sample delta to hot access " + yunit | ||
| 397 | title = "measured %s overhead (%.2f%% writes)" % (long, rw) | ||
| 398 | yrange = None | ||
| 399 | |||
| 400 | fname = "%s_delta_%s" % (name, kind) | ||
| 401 | gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname, | ||
| 402 | yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format) | ||
| 403 | del tmp | ||
| 404 | |||
| 405 | # stats delta | ||
| 406 | # find hot column | ||
| 407 | col = None | ||
| 408 | for (x, y, yminus, tag, idx) in columns: | ||
| 409 | if x == 0 and y == 9 and yminus is None and tag == 'all': | ||
| 410 | col = idx | ||
| 411 | break | ||
| 412 | # normalize based on third hot access | ||
| 413 | # +1/-1 to get zero-based indices; Gnuplot wants 1-based indices | ||
| 414 | hot_avg = data[:,col - 1].copy() | ||
| 415 | hot_wc = data[:,col + 1].copy() | ||
| 416 | for (x, y, yminus, tag, idx) in columns: | ||
| 417 | data[:,idx - 1] -= hot_avg | ||
| 418 | data[:,idx + 1] -= hot_wc | ||
| 419 | |||
| 420 | tmp = write_csv_file(None, data) | ||
| 421 | |||
| 422 | for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]: | ||
| 423 | graphs = [] | ||
| 424 | for (x, y, yminus, tag, idx) in columns: | ||
| 425 | if yminus is None and tag != 'all': | ||
| 426 | label = PMO_COL_LABEL[y][0] | ||
| 427 | label = PMO_MEM[tag] | ||
| 428 | graphs += [ | ||
| 429 | (tmp.name, 1, idx + offset, label), | ||
| 430 | ] | ||
| 431 | xlabel = "working set size (kilobytes)" | ||
| 432 | ylabel = "delta to third hot access " + yunit | ||
| 433 | title = "difference of %s access costs (%.2f%% writes)" % (long, rw) | ||
| 434 | yrange = None | ||
| 435 | |||
| 436 | fname = "%s_diff_%s" % (name, kind) | ||
| 437 | gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname, | ||
| 438 | yrange=yrange, logscale=("x 2", "y 10"), format=self.options.format) | ||
| 439 | del tmp | ||
| 440 | |||
| 289 | def plot_file(self, datafile): | 441 | def plot_file(self, datafile): |
| 290 | bname = basename(datafile) | 442 | bname = basename(datafile) |
| 291 | name, ext = splitext(bname) | 443 | name, ext = splitext(bname) |
| @@ -297,6 +449,8 @@ class CyclePlotter(defapp.App): | |||
| 297 | self.plot_preempt_migrate(datafile, name, conf) | 449 | self.plot_preempt_migrate(datafile, name, conf) |
| 298 | elif 'pmo-aggr' in conf: | 450 | elif 'pmo-aggr' in conf: |
| 299 | self.plot_pmo_aggr(datafile, name, conf) | 451 | self.plot_pmo_aggr(datafile, name, conf) |
| 452 | elif 'pmo-all' in conf: | ||
| 453 | self.plot_pmo_all(datafile, name, conf) | ||
| 300 | else: | 454 | else: |
| 301 | self.err("Skipped '%s'; unkown experiment type." | 455 | self.err("Skipped '%s'; unkown experiment type." |
| 302 | % bname) | 456 | % bname) |
