#!/usr/bin/env python
import defapp

from plot import decode
from dplot import sched_name
from util import load_csv_file, load_binary_file, write_csv_file
from stats import iqr_cutoff

from binary_data import get_data

from math import ceil

import numpy
import csv
from os.path  import splitext, basename
from optparse import make_option as o

from gnuplot  import gnuplot, FORMATS, Plot, label, curve

options = [
    # output options
    o('-f', '--format', action='store', dest='format', type='choice',
      choices=FORMATS, help='output format'),
    o(None, '--save-script', action='store_true', dest='save_script'),
    o('-p', '--prefix', action='store', dest='prefix'),

    o('-i', '--iqr-extent', action='store', dest='extent', type='float',
      help='what extent to use for outlier removal'),

    o('-n', '--normalize', action='store_true', dest='normalize',
      help='use normalize counts'),

    o('-d', '--stdev', action='store_true', dest='use_std',
      help='use standard devation filter'),

    o(None, '--not-per-proc', action='store_false', dest='per_proc',
      help='do not show per-processor task counts'),

    o('-c', '--cut-off', action='store', dest='cutoff', type='int',
      help='max number of samples to use'),

    o('-t', '--take-off', action='store', dest='take_off', type='int',
      help='manual number of outlier samples to discard'),

    o('-o', '--outlier-list', action='store', dest='outlier_file',
      help='list of outliers to remove'),

    o('-x', '--xmax', action='store', dest='xmax', type='int',
      help='determines x-axis range'),

    o('-y', '--ymax', action='store', dest='ymax', type='float',
      help='determines y-axis range'),

    o('-b', '--binsize', action='store', dest='binsize', type='float',
      help='set binsize of histogram'),

    o(None, '--ylog', action='store_true', dest='ylog',
      help='use logarithmic y-axis'),

    o(None, '--cycles', action='store', dest='cycles', type='int',
      help='how many cycles per usec'),

    o(None, '--compare', action='store', dest='compare',
      help='plot overhead comparison', choices=['max', 'avg']),
   ]

defaults = {
    # output options
    'format' : 'pdf',
    'save_script' : False,
    'prefix' : '',

    # data processing
    'cycles' : 2128, # per usec
    'extent' : 0,
    'cutoff' : None,
    'take_off' : None,
    'normalize' : False,
    'use_std'  : False,

    # manual outlier removal
    'outlier_file' : None,
    'outliers' : {},

    # formatting options
    'binsize' : 0.25,
    'per_proc' : True,

    'xmax'    : None,
    'ymax'    : None,
    'ylog'    : False,

    'compare' : None,
    }


TXT = {
    'RELEASE-LATENCY' : 'event latency',
    'RELEASE' : 'release interrupt overhead',
    'SCHED' : 'scheduling overhead',
    'SCHED2' : 'post-scheduling overhead',
    'CXS' : 'context-switch overhead',
    'SEND-RESCHED' : 'IPI latency',
    'TICK' : 'timer tick overhead',
}


HOST_CPUS = {
    'ludwig' : 24,
}


def load_outliers(fname):
    outliers = {}
    for row in csv.reader(open(fname, "r")):
        sched = row[0]
        n     = int(row[1])
        cut   = int(row[2])
        if not sched in outliers:
            outliers[sched] = []
        outliers[sched].append((n, cut))
    return outliers


def get_stats_label(samples):
    avg = numpy.mean(samples)
    med = numpy.median(samples)
    dev = numpy.std(samples)
    max = samples[-1]
    min = samples[0]
    return "min=%.2fus  max=%.2fus  avg=%.2fus  median=%.2fus  stdev=%.2fus" \
        % (min, max, avg, med, dev)

class OverheadPlotter(defapp.App):
    def __init__(self):
        defapp.App.__init__(self, options, defaults, no_std_opts=True)
        self.tmpfiles = []

    def make_plot(self, fname=None):
        p = Plot()
        p.output = "%s%s.%s" % (self.options.prefix, fname, self.options.format)
        p.format = self.options.format
        return p

    def setup_png(self, plot):
        # standard png options; usually correct; never tweaked for paper
        if self.options.format == 'png':
            plot.font_size = 'large'
            plot.size   = (1024, 768)
            plot.xticks = (0, 1)
            plot.yticks = (0, 0.1)
            plot.default_style = "linespoints"
            return True
        else:
            return False

    def write(self, data, name, ext='data'):
        if self.options.save_script:
            fname = "%s.%s" % (name, ext)
            write_csv_file(fname, data)
            return fname
        else:
            tmp = write_csv_file(None, data)
            # keep a reference so that it isn't deleted
            self.tmpfiles.append(tmp)
            return tmp.name

    def write_histogram(self, samples, name, labels=10):
        max = ceil(numpy.amax(samples))
        if self.options.xmax:
            max = self.options.xmax
        bin_size = self.options.binsize
        num_bins  = int(max / bin_size)
        (bins, edges) = numpy.histogram(samples, bins=num_bins,
                                        range=(self.options.binsize / 2,
                                               max + self.options.binsize / 2))
        data = numpy.zeros((num_bins, 3))
        cumulative = 0
        for i in xrange(len(bins)):
            data[i, 0]  = (edges[i] + edges[i + 1]) / 2.0
            data[i, 1]  = bins[i]
            cumulative += bins[i]
            data[i, 2]  = cumulative

        if self.options.normalize:
            data[:, 1] /= len(samples)
            data[:, 2] /= len(samples)

        label_rate = len(bins) / labels
        if not label_rate:
            label_rate = 1
        for_file = []
        for i, row in enumerate(data):
            label = '%.2f' % row[0] if i % label_rate == 0 else ''
            for_file.append([row[0], row[1], row[2], label])

        return (data, self.write(for_file, name, ext='hist'), edges)

    def render(self, p):
        if self.options.save_script:
            p.gnuplot_save(p.output + '.plot')
        else:
            p.gnuplot_exec()

    def plot_samples(self, datafile, name, conf):
        if conf['overhead'] == 'RELEASE-LATENCY':
            scale = 1.0 / 1000.0
        else:
            scale = 1.0 / self.options.cycles

        take_off = self.options.take_off
        if conf['scheduler'] in self.options.outliers:
            n = int(conf['n'])
            for (i, t) in self.options.outliers[conf['scheduler']]:
                if i == n:
                    take_off = t
                    break

        data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile,
                                                            scale,
                                                            extent=self.options.extent,
                                                            cutoff=self.options.cutoff,
                                                            stdev=self.options.use_std,
                                                            manual=take_off)

        samples = data[min_idx:max_idx]
        discarded = (len(data) - len(samples)) / float(len(data)) * 100
        max_cost = data[-1]

        p = self.make_plot(name)

        samples_label = "samples: total=%d filtered=%d (%.2f%%)" % \
            (len(data), len(data) -  len(samples), discarded)


        if self.options.extent:
            iqr_label = "IQR: extent=%d threshold=%.2fus" % \
                (self.options.extent, iqr_max)
        elif take_off:
            iqr_label = "%s outlier%s manually removed" % \
                (take_off, '' if take_off == 1 else 's')
        elif discarded > 0:
            iqr_label = "manual threshold=1000us [IQR not applied]"
        else:
            iqr_label = "[IQR filter not applied]"
            samples_label = "samples: total=%d" % len(data)

        data_label = "%s\\n%s" % (samples_label, iqr_label)

        p.labels = [label(0.5, 0.9,
                          get_stats_label(samples),
                          coord=['graph', 'screen'], align='center'),
                    label(0.98, 0.95, data_label,
                          coord=['graph', 'graph'], align='right')]

        (hist, fname, edges) = self.write_histogram(samples, name)

        p.setup_histogram(gap=1, boxwidth=1.0)

        p.title = "%s: measured %s for %s task%s per processor (host=%s)" \
            % (conf['scheduler'], TXT[conf['overhead']],
               conf['n'], 's' if conf['n'] != '1' else '',
               conf['host'])

        if self.options.normalize:
            p.ylabel = "fraction of samples"
        else:
            p.ylabel = "number of samples"
        p.xlabel = "overhead in microseconds (bin size = %.2fus)" \
            % self.options.binsize

        if self.options.ymax:
            if self.options.ylog:
                ymin = 0.5 if not self.options.normalize else 0.0001
            else:
                ymin = 0
            p.yrange = (ymin, self.options.ymax)
#        p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100))
        p.xticks = (0, 10)
        p.curves = [curve(histogram=fname, col=2, labels_col=4)]

        p.ylog = self.options.ylog

        #### Styling.

        if not self.setup_png(p):
            p.rounded_caps = True
            p.font = 'Helvetica'

            p.font_size = '10'
            p.size = ('20cm', '10cm')
            p.monochrome   = False
            p.dashed_lines = False
            p.key = 'off'
            p.default_style = 'points lw 1'

        self.render(p)

    def prepare_trends(self, datafile, name, conf, want_avg_ymax=False):
        data = load_csv_file(datafile)
        if not self.options.per_proc and \
                'host' in conf and conf['host'] in HOST_CPUS:
            cpus = HOST_CPUS[conf['host']]
            if conf['scheduler'].endswith('-RM'):
                cpus -= 1
        else:
            cpus = 1
        # format
        n_idx   = 2
        wc_idx  = 5
        avg_idx = 6
        std_idx = 9

        rows = [ [r[n_idx] * cpus,
                  r[wc_idx],
                  r[avg_idx],
                  r[std_idx]]
                 for r in data]

        if want_avg_ymax:
            max_y = numpy.amax(data[:,avg_idx])
        else:
            max_y = numpy.amax(data[:,wc_idx])

        return (self.write(rows, name), cpus, max_y)

    def plot_trends(self, datafile, name, conf):
        fname, cpus, max_y = self.prepare_trends(datafile, name, conf)

        p = self.make_plot(name)

        p.title = "measured %s under %s scheduling" \
            % (TXT[conf['overhead']], conf['scheduler'])

        p.ylabel = "overhead in microseconds"
        if self.options.per_proc:
            p.xlabel = "number of tasks per processor"
        else:
            p.xlabel = "number of tasks"

        p.xticks = (0, max(cpus, 10))
        if self.options.xmax:
            p.xrange = (0, self.options.xmax)
        elif self.options.per_proc:
            p.xrange = (0.5, 20.5)
            p.xticks = (0, 1)
        else:
            p.xrange = (0, ceil(cpus * 20 / 100.0) * 100)

        if self.options.ymax:
            p.yrange = (0, self.options.ymax)
        else:
            p.yrange = (0, (ceil(max_y / 50.0)) * 50)

        p.curves = [curve(fname, xcol=1, ycol=2, title="maximum"),
                    curve(fname, xcol=1, ycol=3, style="lines", title='average'),
                    curve(fname, xcol=1, ycol=3, error=4, title="std. deviation")]

        #### Styling.

        if not self.setup_png(p):
            p.rounded_caps = True
            p.font = 'Helvetica'

            p.font_size = '10'
            p.size = ('20cm', '10cm')
            p.monochrome   = False
            p.dashed_lines = False
            p.key = 'left top'
            p.default_style = 'linespoints lw 1'

        self.render(p)

    def plot_comparison(self, datafiles):

        if self.options.compare == 'max':
            stat = 'maximum'
            want_avg_ymax = False
        else:
            stat = 'average'
            want_avg_ymax = True

        plots = []
        max_y = 0
        cpus  = 0
        overheads  = set()

        for i, datafile in enumerate(datafiles):
            self.out("[%d/%d] Processing %s ..." % (i + 1, len(datafiles), datafile))
            bname     = basename(datafile)
            name, ext = splitext(bname)
            conf      = decode(name)
            overheads.add(conf['overhead'])
            fname, _cpus, _max_y = self.prepare_trends(datafile, name, conf, want_avg_ymax)
            max_y = max(max_y, _max_y)
            cpus  = max(cpus, _cpus)
            plots.append((fname, conf))

        assert len(overheads) == 1
        overhead = overheads.pop()


        schedulers = '_'.join([conf['scheduler'] for (_, conf) in plots])

        name = 'compare_%s-%s_%s' % (stat, overhead, schedulers)

        p = self.make_plot(name)

        p.title = "%s measured %s" \
            % (stat, TXT[overhead])

        p.ylabel = "overhead in microseconds"
        if self.options.per_proc:
            p.xlabel = "number of tasks per processor"
        else:
            p.xlabel = "number of tasks"

        p.xticks = (0, max(cpus, 10))
        if self.options.xmax:
            p.xrange = (0, self.options.xmax)
        elif self.options.per_proc:
            p.xrange = (0.5, 20.5)
            p.xticks = (0, 1)
        else:
            p.xrange = (0, ceil(cpus * 20 / 100.0) * 100)

        if self.options.ymax:
            p.yrange = (0, self.options.ymax)
        else:
            p.yrange = (0, (ceil(max_y / 10.0)) * 10)

        for (fname, conf) in plots:
            name = sched_name(conf['scheduler'])
            if self.options.compare == 'max':
                p.curves += [
                    curve(fname, xcol=1, ycol=2, title=name)]
            else:
                p.curves += [
                    curve(fname, xcol=1, ycol=3, title=name)]
#                    curve(fname, xcol=1, ycol=3, error=4, title='notitle')]

        #### Styling.

        marker = 'lines'
        if len(p.curves) > 2:
            p.curves[2].style = marker + " ls 4"

        if len(p.curves) > 3:
            p.curves[3].style =  marker + " ls 6"

        p.font_size = '7'
        p.size = ('6in', '2.50in')
        p.monochrome   = True #False
        p.dashed_lines = True #True
        p.key = 'top left'
        p.default_style = marker + ' lw 1'
        p.pointsize = 2

        self.render(p)

    def plot_file(self, datafile):
        bname     = basename(datafile)
        name, ext = splitext(bname)
        conf      = decode(name)
        plotters = {
            'taskset'   : self.plot_samples,
            'otrend'    : self.plot_trends,
            }

        for plot_type in plotters:
            if plot_type in conf:
                try:
                    plotters[plot_type](datafile, name, conf)
                except IOError as err:
                    self.err("Skipped '%s' (%s)." % (datafile, err))
                break
        else:
            self.err("Skipped '%s'; unkown experiment type."
                     % bname)
        # release all tmp files
        self.tmpfiles = []

    def default(self, _):
        if self.options.outlier_file:
            self.options.outliers = load_outliers(self.options.outlier_file)

        if not self.options.compare is None:
            self.plot_comparison(self.args)
        else:
            for i, datafile in enumerate(self.args):
                self.out("[%d/%d] Processing %s ..." % (i + 1, len(self.args), datafile))
                self.plot_file(datafile)

if __name__ == "__main__":
    OverheadPlotter().launch()