#!/usr/bin/env python import sys import time from os.path import basename from os import listdir from tempfile import NamedTemporaryFile as Tmp from plot import decode, Plot from gnuplot import curve SCHEDULERS = ['MC', 'MC-MERGE', 'MC-MERGE-REDIR'] COLS = {'plugin': 0, 'overhead': 1, 'n_tasks': 2, 'samples': 3, 'filtered': 4, 'max': 5, 'avg': 6, 'min': 7, 'med': 8, 'std': 9, 'var': 10, 'iqr_max': 11, 'iqr_min': 12} def usage(msg): print >>sys.stderr, msg sys.exit(1) def get_csv_files(directory): a = [] for f in listdir(directory): bn = basename(f) if f.startswith('scheduler') and f.endswith('.csv'): a.append(f) return a def gnuplot_col(col_name): # gnuplot is 1-indexed return 1 + COLS[col_name] def get_sched_title(sched): SCHEDULERS = {'MC': 'Basic', 'MC-MERGE': 'IM + TM', 'MC-MERGE-REDIR': 'IM + TM + WR'} return SCHEDULERS[sched] def get_overhead_title(ov): OV = {'SCHED': '(A + B + C)', 'LVLA-SCHED': '(A)', 'RELEASE': '(A + B + C)', 'LVLA-RELEASE': '(A)'} return OV[ov] def set_plot_opts(p): p.rounded_caps = True p.font = 'Helvetica' p.font_size = '5pt' p.size = ('8.5cm', '5.25cm') p.default_style = 'linespoints lw 2.5' p.default_style += ' smooth bezier' p.key = 'off' p.monochrome = False p.dashed_lines = False p.xrange = (18, 122) p.yrange = (0, '') for i, c in enumerate(p.curves): c.style = "linespoints ls %d" % (i + 1) # don't use yellow p.curves[5].style = "linespoints ls 7" p.line_styles = [ (1, "lw 1.5 ps 0.3"), (2, "lw 1.5 ps 0.3"), (3, "lw 1.5 ps 0.3"), (4, "lw 1.5 ps 0.3"), (5, 'pt 6 lw 1.5 ps 0.3 lc rgbcolor "#ff910d"'), (6, "pt 7 lw 1.5 ps 0.3"), (7, 'lw 1.5 ps 0.3 lc rgbcolor "#000000"'), (8, "lw 1.5 ps 0.3"), ] def get_data_matrix(fname): ret = [] with open(fname, 'r') as f: for row in f: new_row = [] for field in row.split(','): s_field = field.strip() try: conv = float(s_field) except ValueError: conv = s_field new_row.append(conv) ret.append(new_row) return ret def include_level_a_releases(data_dir, o_type, scheduler, ycol): fmt_f = '{0}/scheduler={1}_overhead={2}.csv' a_fname = fmt_f.format(data_dir, scheduler, 'LVLA-RELEASE') b_c_fname = fmt_f.format(data_dir, scheduler, 'RELEASE') a_data = get_data_matrix(a_fname) b_c_data = get_data_matrix(b_c_fname) new_data = [] for i, a_row in enumerate(a_data): b_c_row = b_c_data[i] # make a copy of a_row new_row = list(a_row) if gnuplot_col('max') == ycol: idx = COLS['max'] new_row[idx] = max(a_row[idx], b_c_row[idx]) elif gnuplot_col('avg') == ycol: avg_col = COLS['avg'] a_samples = a_row[COLS['samples']] b_c_samples = b_c_row[COLS['samples']] a_filtered = a_row[COLS['filtered']] b_c_filtered = b_c_row[COLS['filtered']] a_avg = a_row[avg_col] b_c_avg = b_c_row[avg_col] n_a = a_samples - a_filtered n_b_c = b_c_samples - b_c_filtered new_avg = (a_avg * n_a + b_c_avg * n_b_c) / (n_a + n_b_c) new_row[avg_col] = new_avg else: raise RuntimeError("Don't know how to merge this column.") new_data.append(new_row) f = Tmp() for row in new_data: f.write(', '.join(map(str, row))) f.write('\n') f.file.flush() return (f.name, f) def plot_release(data_dir, ycol, title, fname): p = Plot() p.output = '{0}/{1}'.format(data_dir, fname) p.format = 'pdf' refs = [] # need to save reference to file handle so it is not deleted for o_type in ['RELEASE', 'LVLA-RELEASE']: for sched in SCHEDULERS: if o_type == 'RELEASE': # we have to make the regular release include the level-A # releases fname, ref = include_level_a_releases(data_dir, o_type, sched, ycol) refs.append(ref) else: fname = '{0}/scheduler={1}_overhead={2}.csv'.format(data_dir, sched, o_type) ti = '{0}{1}'.format(get_sched_title(sched), get_overhead_title(o_type)) p.curves += [curve(fname=fname, xcol=gnuplot_col('n_tasks'), ycol=ycol, title=ti)] p.xlabel = 'number of tasks' p.ylabel = 'overhead (microseconds)' set_plot_opts(p) if ycol == gnuplot_col('max'): # worst-case release gets the key p.key = 'top left' # make the graph tall so the damn key fits p.yrange = (0, 80) p.gnuplot_exec() def plot_sched(data_dir, ycol, title, fname): p = Plot() p.output = '{0}/{1}'.format(data_dir, fname) p.format = 'pdf' for o_type in ['SCHED', 'LVLA-SCHED']: for sched in SCHEDULERS: fname = '{0}/scheduler={1}_overhead={2}.csv'.format(data_dir, sched, o_type) ti = '{0}{1}'.format(get_sched_title(sched), get_overhead_title(o_type)) p.curves += [curve(fname=fname, xcol=gnuplot_col('n_tasks'), ycol=ycol, title=ti)] p.xlabel = 'number of tasks' p.ylabel = 'overhead (microseconds)' set_plot_opts(p) p.gnuplot_exec() def main(): if len(sys.argv) < 2: usage('missing args') data_dir = sys.argv[1] plot_sched(data_dir, gnuplot_col('max'), 'worst-case scheduling overhead', 'overhead=SCHED_type=MAX.pdf') plot_release(data_dir, gnuplot_col('max'), 'worst-case release overhead', 'overhead=RELEASE_type=MAX.pdf') plot_sched(data_dir, gnuplot_col('avg'), 'average-case scheduling overhead', 'overhead=SCHED_type=AVG.pdf') plot_release(data_dir, gnuplot_col('avg'), 'average-case release overhead', 'overhead=RELEASE_type=AVG.pdf') if __name__ == '__main__': main()