#!/usr/bin/env python
from os.path  import splitext, basename
from optparse import make_option as o
from tempfile import NamedTemporaryFile as Tmp

from collections import defaultdict
from itertools import izip

import numpy as np
from util import *

import stats
import defapp

from plot     import decode
from gnuplot  import gnuplot, FileGraph, FORMATS


def ludwig_l2(x, y):
    # x left column, y right column, or # y left column, x, right column
    return (x % 8 < 4 and x + 4 == y) or \
        (y % 8 < 4 and x - 4 == y)

def ludwig_l3(x, y):
    # same socket
    # not a a shared L2
    # not identical
    return (y % 4) == (x % 4) and \
        not ludwig_l2(x, y) and \
        x != y


MACHINE_TOPOLOGY = {
    'jupiter-cs' : (4, [('preempt', lambda x, y: x == y),
                        ('mem', lambda x, y: x != y)]),

    # Socket0  Socket1  Socket2  Socket3
    # ------   -------  -------  -------
    # | 0, 4|  | 1, 5|  | 2, 6|  | 3, 7|
    # | 8,12|  | 9,13|  |10,14|  |11,15|
    # |16,20|  |17,21|  |18,22|  |19,23|
    # -------  -------  -------  -------
    'ludwig.cs.unc.edu' : (24, [('preempt', lambda x, y: x == y),
                                ('l2', ludwig_l2),
                                ('l3', ludwig_l3),
                                ('mem', lambda x, y: abs(y - x) % 4 != 0)])
}

PMO_PARAM = {
    'wss'    : 'WSS',
    'host'   : 'host',
    'wcycle' : 'write-cycle'
}

PMO_MEM = {
    'mem'     : 'a migration through main memory',
    'l3'      : 'a migration through a shared L3 cache',
    'l2'      : 'a migration through a shared L2 cache',
    'preempt' : 'a preemption',
    'all'     : 'either a migration or preemption',
}

PMO_SUBPLOTS = [
    # x, y, y-delta, split according to mem-hierarchy?
    (0,  6, None, False),
    (0,  7, None, False),
    (0,  8, None, False),
    (0,  9, None, False),
    (0, 10, None,  True),
    (3, 10, None,  True),
    (0, 10,    9,  True),
    (3, 10,    9,  True),
]

PMO_AGGR_SUBPLOTS = [
    # x, y, y-delta, split according to mem-hierarchy?
    (0,  6, None, False),
    (0,  7, None, False),
    (0,  8, None, False),
    (0,  9, None, False),
    (0, 10, None,  True),
#    (0, 10,    6,  True),
#    (0, 10,    7,  True),
#    (0, 10,    8,  True),
    (0, 10,    9,  True),
    (0,  8,    7,  False), # difference of second to first hot access
    (0,  9,    8,  False), # difference of third to second hot access
]

PMO_AGGR_COMBINE = [
    [(6, 'all'), (7, 'all'), (8, 'all'), (9, 'all')]
]

PMO_COL_LABEL = [('measurement', 'sample', 'index'),
                 ('write cycles', 'wcycle', 'every nth access'),
                 ('WSS', 'wcc', 'kilobytes'),
                 ('suspension length', 'delay', 'microseconds'),
                 ('CPU (preempted on)', 'from', 'processor'),
                 ('CPU (resumed on)', 'to', 'processor'),
                 ('cold access', 'cold', 'cycles'),
                 ('first hot access', 'hot1', 'cycles'),
                 ('second hot access', 'hot2', 'cycles'),
                 ('third hot access', 'hot3', 'cycles'),
                 ('access after resuming', 'after', 'cycles')
                 ]

PMO_FROM_CPU = 4
PMO_TO_CPU   = 5

options = [
    o('-f', '--format', action='store', dest='format', type='choice',
      choices=FORMATS, help='output format'),
    o(None, '--paper', action='store_true', dest='paper'),
    o(None, '--wide', action='store_true', dest='wide'),
    o(None, '--split', action='store_true', dest='split'),
    o(None, '--log-y', action='store_true', dest='logy'),
    o(None, '--errorbar', action='store_true', dest='errbar'),
    o(None, '--extend', action='store', type='float', dest='extend'),
    o(None, '--aggregate', action='store_true', dest='aggregate'),
    o('-c', '--cycles-per-usec', action='store', type='float', dest='cycles_per_usec'),
    ]

defaults = {
    'format' : 'show',
    'paper'  : False,
    'split'  : False,
    'wide'   : False,
    'aggregate' : False,
    'extend' : 1.5,
    'cycles_per_usec' : None,
    'logy'   : False,
    'errbar' : False,
    }

def extract_cols(data, xcol, ycol1, ycol2, cast=int, cpu_filter=lambda x, y: True):
    def matching_cpus(row):
        return cpu_filter(row[PMO_FROM_CPU], row[PMO_TO_CPU])
    rows = select(matching_cpus, data)
    if not (ycol2 is None):
        rows[:,ycol1] -= rows[:,ycol2]
    return rows[:,(xcol, ycol1)]

class CyclePlotter(defapp.App):
    def __init__(self):
        defapp.App.__init__(self, options, defaults, no_std_opts=True)
        self.aggregate_data = []

    def setup_pmo_graphs(self, datafile, conf, subplots=PMO_SUBPLOTS):
        host = conf['host']
        if host in MACHINE_TOPOLOGY:
            (cpus, hier) = MACHINE_TOPOLOGY[host]
            plots = []
            data = load_csv_file(datafile, dtype=int)
            for (xcol, ycol, yminus, by_mem_hierarchy) in subplots:
                sub = [('all', lambda x, y: True)]
                if by_mem_hierarchy:
                    sub += hier
                for tag, test in sub:
                    rows = extract_cols(data,
                                        xcol, ycol, yminus,
                                        cpu_filter=test)
                    plots.append((rows, xcol, ycol, yminus, tag))
            return plots
        else:
            self.err('Unkown host: %s' % host)
            return None

    def write_aggregate(self, datafiles):
        # (wss, avg, wc, #avg, #wc)
        # by tag -> by wcycle -> list of data points)
        by_tag    = defaultdict(lambda: defaultdict(list))

        host = None

        for i, datafile in enumerate(datafiles):
            print '[%d/%d] Processing %s...' % (i + 1, len(datafiles), datafile)
            bname = basename(datafile)
            name, ext = splitext(bname)
            if ext != '.csv':
                self.err("Warning: '%s' doesn't look like a CSV file."
                         % bname)
            conf    = decode(name)
            if 'pmo' in conf:
                plots = self.setup_pmo_graphs(datafile, conf, PMO_AGGR_SUBPLOTS)
                if plots is None:
                    print "Skipping %s..." % datafile
                    return
                if not host:
                    host = conf['host']
                if host != conf['host']:
                    self.err('Mixing data from two hosts! (%s, %s)' % (host, conf['host']))
                    self.err('Aborting.')
                    return
                wss = int(conf['wss'])
                wcycle  = int(conf['wcycle'])
                for (rows, xcol, ycol, yminus, tag) in plots:
                    clean = stats.iqr_remove_outliers(rows, extend=self.options.extend)
                    vals = clean[:,1]
                    avg  = np.mean(vals)
                    std  = np.std(vals, ddof=1)
                    wc   = np.max(vals)
                    n    = len(vals)

                    key = (xcol, ycol, yminus, tag)
                    by_tag[key][wcycle].append((wss, avg, std, wc, n, len(rows) - n))
                del plots
            else:
                self.err("Warning: '%s' is not a PMO experiment; skipping." % bname)

        all_wss    = set()
        all_wcycle = set()

        for key in by_tag:
            for wcycle in by_tag[key]:
                all_wcycle.add(wcycle)

                data = by_tag[key][wcycle]
                # sort by increasing WSS
                data.sort(key=lambda row: row[0])
                for row in data:
                    all_wss.add(row[0])

                (xcol, ycol, yminus, tag) = key

                xtag = PMO_COL_LABEL[xcol][1]
                ytag = PMO_COL_LABEL[ycol][1]
                dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
                code = "code=%s-%s-%s-%s" % key
                figname = "host=%s_%s%s-vs-%s_%s_%s" % \
                    (host, ytag, dtag, xtag, tag, code)

                write_csv_file('pmo-aggr_wcycle=%d_%s.csv' % (wcycle, figname), data)


        mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]]

        for wcycle in all_wcycle:
            try:
                rows = [[wss] for wss in sorted(all_wss)]
                header = ['wss']
                for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS:
                    tags = ['all']
                    if split:
                        tags += mems
                    for tag in tags:
                        col_name = "%s %s" % (PMO_COL_LABEL[ycol][1], tag)
                        if not yminus is None:
                            col_name += ' - ' + PMO_COL_LABEL[yminus][1]
                        header += [col_name + " avg", col_name + " std", col_name + " wc"]
                        key = (x, y, yminus, tag)
                        data  = by_tag[key][wcycle]
                        for r, d in izip(rows, data):
                            if r[0] != d[0]:
                                print "mismatch", r[0], d[0], key, wcycle
                            assert r[0] == d[0] # working set size must match
                            r += d[1:4]  # (average, std, wc)
                write_csv_file('pmo-all_wcycle=%d_host=%s.csv' % (wcycle, host),
                               rows, header, width=max([len(h) for h in header]))
            except AssertionError:
                self.err("Data missing for wcycle=%d!" % wcycle)


    def plot_preempt_migrate(self, datafile, name, conf):
        plots = self.setup_pmo_graphs(datafile, conf)
        if plots is None:
            print "Skipping %s..." % datafile
            return
        else:
            print 'Plotting %s...' % datafile
        for (rows, xcol, ycol, yminus, tag) in plots:
            # Write it to a temp file.
            tmp    = Tmp()
            for row in rows:
                tmp.write("%s, %s\n" % (row[0], row[1]))
            tmp.flush()

            xtag = PMO_COL_LABEL[xcol][1]
            ytag = PMO_COL_LABEL[ycol][1]
            dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
            figname = "%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag)
            xunit = PMO_COL_LABEL[xcol][2]
            yunit = PMO_COL_LABEL[ycol][2]
            ylabel = PMO_COL_LABEL[ycol][0]
            xlabel = PMO_COL_LABEL[xcol][0]
            title = "%s" % ylabel
            if ycol == 10:
                title += " from %s" % PMO_MEM[tag]
            for key in conf:
                if key in PMO_PARAM:
                    title += " %s=%s" % (PMO_PARAM[key], conf[key])
            graphs = [(tmp.name, 1, 2, ylabel)]
            # plot cutoff
            (s, lo, hi) = stats.iqr(rows[:,1])
            lo -= s * self.options.extend
            hi += s * self.options.extend
            m99 = stats.cutoff_max(rows[:, 1])
            graphs += [(lo, 'IQR cutoff (%d)' % lo, 'line'),
                       (hi, 'IQR cutoff (%d)' % hi, 'line'),
                       (m99,'99%% cutoff (%d)' % m99, 'line lw 2')]
            gnuplot(graphs,
                    xlabel="%s (%s)" % (xlabel, xunit),
                    ylabel="%s (%s)" % ("access cost" if yminus is None
                                        else "delta to %s" % PMO_COL_LABEL[yminus][0],
                                        yunit),
                    title=title,
                    style='points',
                    format=self.options.format,
                    fname=figname)
            del tmp # delete temporary file

    def plot_pmo_aggr(self, datafile, name, conf):
        fname = datafile
        code = conf['code']
        (xcol, ycol, yminus, tag) = code.split('-')

        xcol = int(xcol)
        ycol = int(ycol)
        if yminus != "None":
            yminus = int(ycol)
        else:
            yminus = None

        xtag = PMO_COL_LABEL[xcol][1]
        ytag = PMO_COL_LABEL[ycol][1]
        dtag = "-delta-%s" % PMO_COL_LABEL[yminus][1] if not yminus is None else ""
        figname = name #"%s_%s%s-vs-%s_%s" % (name, ytag, dtag, xtag, tag)
        xunit = PMO_COL_LABEL[xcol][2]
        yunit = PMO_COL_LABEL[ycol][2]
        ylabel = PMO_COL_LABEL[ycol][0]
        xlabel = PMO_COL_LABEL[xcol][0]
        title = "%s" % ylabel

        ylabel="%s (%s)" % ("access cost" if yminus is None
                            else "delta to %s" % PMO_COL_LABEL[yminus][0],
                            yunit),
        if ycol == 10:
            title += " from %s" % PMO_MEM[tag]
        for key in conf:
            if key in PMO_PARAM:
                title += " %s=%s" % (PMO_PARAM[key], conf[key])

        graphs = [
            #(fname, 1, 2, "average"),
            "'%s' using 1:2:3 title 'average' with errorbars" % (fname),
            (fname, 1, 4, "maximum"),
            ]
        xlabel = "working set size (kilobytes)"

        yrange = (4096, 2**26) if yminus is None else None

        gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=figname,
                yrange=yrange,
                logscale="xy 2" if yminus is None else "x 2",
                format=self.options.format)

    def plot_pmo_all(self, datafile, name, conf):
        host = conf['host']
        mems = [tag for (tag, _) in MACHINE_TOPOLOGY[host][1]]
        columns = []
        idx = 2
        header = ["wss"]
        for (x, y, yminus, split) in PMO_AGGR_SUBPLOTS:
            tags = ['all']
            if split:
                tags += mems
            for tag in tags:
                col_name = "%s %s" % (PMO_COL_LABEL[y][1], tag)
                if not yminus is None:
                    col_name += ' - ' + PMO_COL_LABEL[yminus][1]
                header += [col_name + " avg", col_name + " std", col_name + " wc"]
                columns.append((x, y, yminus, tag, idx))
                idx += 3

        data  = load_csv_file(datafile)
        if self.options.cycles_per_usec:
            yunit = "(us)"
            data[:, 1:] /= self.options.cycles_per_usec
        else:
            yunit = "(cycles)"

        csvfile = "xxx-%s" % datafile

        write_csv_file(csvfile, data, header, width=max([len(h) for h in header]))

        rw = int(conf['wcycle'])
        rw = 1.0 / rw * 100 if rw != 0 else 0

        if self.options.logy:
            axis = ("x 2", "y 10")
        else:
            axis = "x 2"

        # raw measures
        for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
            graphs = []
            for (x, y, yminus, tag, idx) in columns:
                if yminus is None:
                    label = PMO_COL_LABEL[y][0]
                    if y == 10:
                        label += " from %s" % PMO_MEM[tag]
                    graphs.append(
                        FileGraph(
                            csvfile, xcol=1, ycol=idx + offset, title=label,
                            error=idx + offset + 1 if kind == 'avg' and self.options.errbar else None))
                xlabel = "working set size (kilobytes)"
                ylabel = "time to complete access " + yunit
                title = "measured %s WSS access time (%.2f%% writes)" % (long, rw)
                yrange = None #(4096, 2**26)

                fname = "%s_full_%s" % (name, kind)

                gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
                        yrange=yrange, logscale=axis, format=self.options.format)

        # per-sample delta measures
        for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
            graphs = []
            for (x, y, yminus, tag, idx) in columns:
                if not (yminus is None) and tag != 'all':
                    label = "%s" % PMO_MEM[tag]
                    graphs.append(
                        FileGraph(
                            csvfile, xcol=1, ycol=idx + offset, title=label,
                            error=idx + offset + 1 if kind == 'avg' and self.options.errbar else None))
            xlabel = "working set size (kilobytes)"
            ylabel = "per-sample delta to hot access " + yunit
            title = "measured %s overhead (%.2f%% writes)"  % (long, rw)
            yrange = None

            fname = "%s_delta_%s" % (name, kind)
            gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
                    yrange=yrange, logscale=axis, format=self.options.format)

            graphs = []
            for (x, y, yminus, tag, idx) in columns:
                if y in [8, 9] and yminus in [7, 8] and tag == 'all':
                    label = "%s to %s" % (PMO_COL_LABEL[yminus][0], PMO_COL_LABEL[y][0])
                    graphs.append(
                        FileGraph(
                            csvfile, xcol=1, ycol=idx + offset, title=label,
                            error=idx + offset + 1 if kind == 'avg' and self.options.errbar else None))
            xlabel = "working set size (kilobytes)"
            ylabel = "per-sample delta to previous hot access " + yunit
            title = "measured %s differences (%.2f%% writes)"  % (long, rw)
            yrange = None

            fname = "%s_delta-h_%s" % (name, kind)
            gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
                    yrange=yrange, logscale=axis, format=self.options.format)

#        del tmp

        # stats delta
        # find hot column
        col = None
        for (x, y, yminus, tag, idx) in columns:
            if x == 0 and y == 9 and yminus is None and tag == 'all':
                col = idx
                break
        # normalize based on third hot access
        # +1/-1 to get zero-based indices; Gnuplot wants 1-based indices
        hot_avg = data[:,col - 1].copy()
        hot_wc  = data[:,col + 1].copy()
        for (x, y, yminus, tag, idx) in columns:
            data[:,idx - 1] -= hot_avg
            data[:,idx + 1] -= hot_wc

        tmp = write_csv_file(None, data)

        for offset, kind, long in [(0, 'avg', 'average'), (2, 'wc', 'maximum')]:
            graphs = []
            for (x, y, yminus, tag, idx) in columns:
                if yminus is None and tag != 'all':
                    label = PMO_COL_LABEL[y][0]
                    label = PMO_MEM[tag]
                    graphs.append(FileGraph(tmp.name, xcol=1, ycol=idx+offset, title=label))
                xlabel = "working set size (kilobytes)"
                ylabel = "delta to third hot access " + yunit
                title = "difference of %s access costs (%.2f%% writes)" % (long, rw)
                yrange = None

                fname = "%s_diff_%s" % (name, kind)
                gnuplot(graphs, xlabel=xlabel, ylabel=ylabel, title=title, fname=fname,
                        yrange=yrange, logscale=axis, format=self.options.format)
#        del tmp

    def plot_file(self, datafile):
        bname = basename(datafile)
        name, ext = splitext(bname)
        if ext != '.csv':
            self.err("Warning: '%s' doesn't look like a CSV file."
                     % bname)
        conf    = decode(name)
        if 'pmo' in conf:
            self.plot_preempt_migrate(datafile, name, conf)
        elif 'pmo-aggr' in conf:
            self.plot_pmo_aggr(datafile, name, conf)
        elif 'pmo-all' in conf:
            self.plot_pmo_all(datafile, name, conf)
        else:
            self.err("Skipped '%s'; unkown experiment type."
                     % bname)

    def default(self, _):
        for datafile in self.args:
            self.plot_file(datafile)

    def do_aggregate(self, _):
        self.write_aggregate(self.args[1:])

if __name__ == "__main__":
    CyclePlotter().launch()