#!/usr/bin/env python

import sys
import time

from os.path import basename
from os import listdir

from tempfile import NamedTemporaryFile as Tmp

from plot import decode, Plot
from gnuplot import curve

SCHEDULERS = ['MC', 'MC-MERGE', 'MC-MERGE-REDIR']
COLS = {'plugin':       0,
        'overhead':     1,
        'n_tasks':      2,
        'samples':      3,
        'filtered':     4,
        'max':          5,
        'avg':          6,
        'min':          7,
        'med':          8,
        'std':          9,
        'var':          10,
        'iqr_max':      11,
        'iqr_min':      12}


def usage(msg):
    print >>sys.stderr, msg
    sys.exit(1)

def get_csv_files(directory):
    a = []
    for f in listdir(directory):
        bn = basename(f)
        if f.startswith('scheduler') and f.endswith('.csv'):
            a.append(f)
    return a

def gnuplot_col(col_name):
    # gnuplot is 1-indexed
    return 1 + COLS[col_name]

def get_sched_title(sched):
    SCHEDULERS = {'MC': 'Basic',
                  'MC-MERGE': 'IM + TM',
                  'MC-MERGE-REDIR': 'IM + TM + WR'}
    return SCHEDULERS[sched]

def get_overhead_title(ov):
    OV = {'SCHED': '(A + B + C)',
          'LVLA-SCHED': '(A)',
          'RELEASE': '(A + B + C)',
          'LVLA-RELEASE': '(A)'}
    return OV[ov]

def set_plot_opts(p):
    p.rounded_caps = True
    p.font = 'Helvetica'
    p.font_size = '5pt'
    p.size = ('8.5cm', '5.25cm')
    p.default_style = 'linespoints lw 2.5'
    p.default_style += ' smooth bezier'
    p.key = 'off'
    p.monochrome = False
    p.dashed_lines = False
    p.xrange = (18, 122)
    p.yrange = (0, '')

    for i, c in enumerate(p.curves):
        c.style = "linespoints ls %d" % (i + 1)

    # don't use yellow
    p.curves[5].style = "linespoints ls 7"

    p.line_styles = [
        (1, "lw 1.5 ps 0.3"),
        (2, "lw 1.5 ps 0.3"),
        (3, "lw 1.5 ps 0.3"),
        (4, "lw 1.5 ps 0.3"),
        (5, 'pt 6 lw 1.5 ps 0.3 lc rgbcolor "#ff910d"'),
        (6, "pt 7 lw 1.5 ps 0.3"),
        (7, 'lw 1.5 ps 0.3 lc rgbcolor "#000000"'),
        (8, "lw 1.5 ps 0.3"),
        ]


def get_data_matrix(fname):
    ret = []
    with open(fname, 'r') as f:
        for row in f:
            new_row = []
            for field in row.split(','):
                s_field = field.strip()
                try:
                    conv = float(s_field)
                except ValueError:
                    conv = s_field
                new_row.append(conv)
            ret.append(new_row)
    return ret


def include_level_a_releases(data_dir, o_type, scheduler, ycol):
    fmt_f = '{0}/scheduler={1}_overhead={2}.csv'
    a_fname = fmt_f.format(data_dir, scheduler, 'LVLA-RELEASE')
    b_c_fname = fmt_f.format(data_dir, scheduler, 'RELEASE')
    a_data = get_data_matrix(a_fname)
    b_c_data = get_data_matrix(b_c_fname)
    new_data = []

    for i, a_row in enumerate(a_data):
        b_c_row = b_c_data[i]
        # make a copy of a_row
        new_row = list(a_row)
        if gnuplot_col('max') == ycol:
            idx = COLS['max']
            new_row[idx] = max(a_row[idx], b_c_row[idx])
        elif gnuplot_col('avg') == ycol:
            avg_col = COLS['avg']
            a_samples = a_row[COLS['samples']]
            b_c_samples = b_c_row[COLS['samples']]
            a_filtered = a_row[COLS['filtered']]
            b_c_filtered = b_c_row[COLS['filtered']]
            a_avg = a_row[avg_col]
            b_c_avg = b_c_row[avg_col]
            n_a = a_samples - a_filtered
            n_b_c = b_c_samples - b_c_filtered
            new_avg = (a_avg * n_a + b_c_avg * n_b_c) / (n_a + n_b_c)
            new_row[avg_col] = new_avg
        else:
            raise RuntimeError("Don't know how to merge this column.")
        new_data.append(new_row)
    f = Tmp()
    for row in new_data:
        f.write(', '.join(map(str, row)))
        f.write('\n')
    f.file.flush()
    return (f.name, f)


def plot_release(data_dir, ycol, title, fname):
    p = Plot()
    p.output = '{0}/{1}'.format(data_dir, fname)
    p.format = 'pdf'
    refs = [] # need to save reference to file handle so it is not deleted

    for o_type in ['RELEASE', 'LVLA-RELEASE']:
        for sched in SCHEDULERS:
            if o_type == 'RELEASE':
                # we have to make the regular release include the level-A
                # releases
                fname, ref = include_level_a_releases(data_dir, o_type,
                        sched, ycol)
                refs.append(ref)
            else:
                fname = '{0}/scheduler={1}_overhead={2}.csv'.format(data_dir, sched, o_type)
            ti = '{0}{1}'.format(get_sched_title(sched), get_overhead_title(o_type))
            p.curves += [curve(fname=fname, xcol=gnuplot_col('n_tasks'),
                ycol=ycol, title=ti)]
    p.xlabel = 'number of tasks'
    p.ylabel = 'overhead (microseconds)'
    set_plot_opts(p)
    if ycol == gnuplot_col('max'):
        # worst-case release gets the key
        p.key = 'top left'
        # make the graph tall so the damn key fits
        p.yrange = (0, 80)
    p.gnuplot_exec()

def plot_sched(data_dir, ycol, title, fname):
    p = Plot()
    p.output = '{0}/{1}'.format(data_dir, fname)
    p.format = 'pdf'

    for o_type in ['SCHED', 'LVLA-SCHED']:
        for sched in SCHEDULERS:
            fname = '{0}/scheduler={1}_overhead={2}.csv'.format(data_dir, sched, o_type)
            ti = '{0}{1}'.format(get_sched_title(sched), get_overhead_title(o_type))
            p.curves += [curve(fname=fname, xcol=gnuplot_col('n_tasks'),
                ycol=ycol, title=ti)]
    p.xlabel = 'number of tasks'
    p.ylabel = 'overhead (microseconds)'
    set_plot_opts(p)
    p.gnuplot_exec()


def main():
    if len(sys.argv) < 2:
        usage('missing args')

    data_dir = sys.argv[1]
    plot_sched(data_dir, gnuplot_col('max'), 'worst-case scheduling overhead', 'overhead=SCHED_type=MAX.pdf')
    plot_release(data_dir, gnuplot_col('max'), 'worst-case release overhead', 'overhead=RELEASE_type=MAX.pdf')
    plot_sched(data_dir, gnuplot_col('avg'), 'average-case scheduling overhead', 'overhead=SCHED_type=AVG.pdf')
    plot_release(data_dir, gnuplot_col('avg'), 'average-case release overhead', 'overhead=RELEASE_type=AVG.pdf')
    
if __name__ == '__main__':
    main()