diff options
Diffstat (limited to 'oplot.py')
| -rwxr-xr-x | oplot.py | 215 |
1 files changed, 215 insertions, 0 deletions
diff --git a/oplot.py b/oplot.py new file mode 100755 index 0000000..2f7f0cb --- /dev/null +++ b/oplot.py | |||
| @@ -0,0 +1,215 @@ | |||
| 1 | #!/usr/bin/env python | ||
| 2 | import defapp | ||
| 3 | |||
| 4 | from plot import decode | ||
| 5 | from util import load_csv_file, load_binary_file, write_csv_file | ||
| 6 | from stats import iqr_cutoff | ||
| 7 | |||
| 8 | from math import ceil | ||
| 9 | |||
| 10 | import numpy | ||
| 11 | |||
| 12 | from os.path import splitext, basename | ||
| 13 | from optparse import make_option as o | ||
| 14 | |||
| 15 | from gnuplot import gnuplot, FORMATS, Plot, label, curve | ||
| 16 | |||
| 17 | options = [ | ||
| 18 | # output options | ||
| 19 | o('-f', '--format', action='store', dest='format', type='choice', | ||
| 20 | choices=FORMATS, help='output format'), | ||
| 21 | o(None, '--save-script', action='store_true', dest='save_script'), | ||
| 22 | o('-p', '--prefix', action='store', dest='prefix'), | ||
| 23 | |||
| 24 | o('-i', '--iqr-extent', action='store', dest='extent', type='float', | ||
| 25 | help='what extent to use for outlier removal'), | ||
| 26 | |||
| 27 | ] | ||
| 28 | |||
| 29 | defaults = { | ||
| 30 | # output options | ||
| 31 | 'format' : 'pdf', | ||
| 32 | 'save_script' : False, | ||
| 33 | 'prefix' : '', | ||
| 34 | |||
| 35 | # data processing | ||
| 36 | 'cycles' : 2128, # per usec | ||
| 37 | 'extent' : 3, | ||
| 38 | |||
| 39 | # formatting options | ||
| 40 | 'binsize' : 0.25, | ||
| 41 | } | ||
| 42 | |||
| 43 | |||
| 44 | def get_data(fname, scale, extend): | ||
| 45 | data = load_binary_file(fname) | ||
| 46 | |||
| 47 | if not scale is None: | ||
| 48 | data *= scale | ||
| 49 | |||
| 50 | data.sort() | ||
| 51 | |||
| 52 | iqr_min, iqr_max = iqr_cutoff(data, extend) | ||
| 53 | min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) | ||
| 54 | |||
| 55 | return [data, max_idx, min_idx, iqr_max, iqr_min] | ||
| 56 | |||
| 57 | |||
| 58 | def get_stats_label(samples): | ||
| 59 | avg = numpy.mean(samples) | ||
| 60 | med = numpy.median(samples) | ||
| 61 | dev = numpy.std(samples) | ||
| 62 | max = samples[-1] | ||
| 63 | min = samples[0] | ||
| 64 | return "min=%.2fus max=%.2fus avg=%.2fus median=%.2fus stdev=%.2fus" \ | ||
| 65 | % (min, max, avg, med, dev) | ||
| 66 | |||
| 67 | class OverheadPlotter(defapp.App): | ||
| 68 | def __init__(self): | ||
| 69 | defapp.App.__init__(self, options, defaults, no_std_opts=True) | ||
| 70 | self.tmpfiles = [] | ||
| 71 | |||
| 72 | def make_plot(self, fname=None): | ||
| 73 | p = Plot() | ||
| 74 | p.output = "%s%s.%s" % (self.options.prefix, fname, self.options.format) | ||
| 75 | p.format = self.options.format | ||
| 76 | return p | ||
| 77 | |||
| 78 | def setup_png(self, plot): | ||
| 79 | # standard png options; usually correct; never tweaked for paper | ||
| 80 | if self.options.format == 'png': | ||
| 81 | plot.font_size = 'large' | ||
| 82 | plot.size = (1024, 768) | ||
| 83 | plot.xticks = (0, 1) | ||
| 84 | plot.yticks = (0, 0.1) | ||
| 85 | plot.default_style = "linespoints" | ||
| 86 | return True | ||
| 87 | else: | ||
| 88 | return False | ||
| 89 | |||
| 90 | def write(self, data, name, ext='data'): | ||
| 91 | if self.options.save_script: | ||
| 92 | fname = "%s.%s" % (name, ext) | ||
| 93 | write_csv_file(fname, data) | ||
| 94 | return fname | ||
| 95 | else: | ||
| 96 | tmp = write_csv_file(None, data) | ||
| 97 | # keep a reference so that it isn't deleted | ||
| 98 | self.tmpfiles.append(tmp) | ||
| 99 | return tmp.name | ||
| 100 | |||
| 101 | def write_histogram(self, samples, name, labels=10): | ||
| 102 | max = ceil(numpy.amax(samples)) | ||
| 103 | bin_size = self.options.binsize | ||
| 104 | num_bins = int(max / bin_size) | ||
| 105 | (bins, edges) = numpy.histogram(samples, bins=num_bins, | ||
| 106 | range=(self.options.binsize / 2, | ||
| 107 | max + self.options.binsize / 2)) | ||
| 108 | data = numpy.zeros((num_bins, 3)) | ||
| 109 | cumulative = 0 | ||
| 110 | for i in xrange(len(bins)): | ||
| 111 | data[i, 0] = (edges[i] + edges[i + 1]) / 2.0 | ||
| 112 | data[i, 1] = bins[i] | ||
| 113 | cumulative += bins[i] | ||
| 114 | data[i, 2] = cumulative | ||
| 115 | |||
| 116 | label_rate = len(bins) / labels | ||
| 117 | if not label_rate: | ||
| 118 | label_rate = 1 | ||
| 119 | for_file = [] | ||
| 120 | for i, row in enumerate(data): | ||
| 121 | label = '%.2f' % row[0] if i % label_rate == 0 else '' | ||
| 122 | for_file.append([row[0], row[1], row[2], label]) | ||
| 123 | |||
| 124 | return (data, self.write(for_file, name, ext='hist')) | ||
| 125 | |||
| 126 | def render(self, p): | ||
| 127 | if self.options.save_script: | ||
| 128 | p.gnuplot_save(p.output + '.plot') | ||
| 129 | else: | ||
| 130 | p.gnuplot_exec() | ||
| 131 | |||
| 132 | def plot_samples(self, datafile, name, conf): | ||
| 133 | if conf['overhead'] == 'RELEASE-LATENCY': | ||
| 134 | scale = 1.0 / 1000.0 | ||
| 135 | else: | ||
| 136 | scale = 1.0 / self.options.cycles | ||
| 137 | |||
| 138 | data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile, | ||
| 139 | scale, | ||
| 140 | self.options.extent) | ||
| 141 | samples = data[min_idx:max_idx] | ||
| 142 | discarded = (len(data) - len(samples)) / float(len(data)) * 100 | ||
| 143 | max_cost = data[-1] | ||
| 144 | |||
| 145 | p = self.make_plot(name) | ||
| 146 | |||
| 147 | iqr_label = "IQR: extent=%d threshold=%.2fus filtered=%.2f%%" % \ | ||
| 148 | (self.options.extent, iqr_max, discarded) | ||
| 149 | |||
| 150 | p.labels = [label(0.5, 0.9, | ||
| 151 | get_stats_label(samples), | ||
| 152 | coord=['graph', 'screen'], align='center'), | ||
| 153 | label(0.98, 0.95, iqr_label, | ||
| 154 | coord=['graph', 'graph'], align='right')] | ||
| 155 | |||
| 156 | (hist, fname) = self.write_histogram(samples, name) | ||
| 157 | |||
| 158 | p.setup_histogram(gap=1, boxwidth=1.0) | ||
| 159 | |||
| 160 | p.title = "measured overheads scheduler=%s; overhead=%s; host=%s" \ | ||
| 161 | % (conf['scheduler'], conf['overhead'], conf['host']) | ||
| 162 | |||
| 163 | p.ylabel = "number of samples" | ||
| 164 | p.xlabel = "overhead in microseconds (bin size = %.2fus)" \ | ||
| 165 | % self.options.binsize | ||
| 166 | # p.xrange = (0, ceil(max_cost)) | ||
| 167 | p.xticks = (0, 10) | ||
| 168 | # p.yticks = (0, 1) | ||
| 169 | p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100)) | ||
| 170 | p.curves = [curve(histogram=fname, col=2, labels_col=4)] | ||
| 171 | |||
| 172 | #### Styling. | ||
| 173 | |||
| 174 | if not self.setup_png(p): | ||
| 175 | p.rounded_caps = True | ||
| 176 | p.font = 'Helvetica' | ||
| 177 | |||
| 178 | p.font_size = '10' | ||
| 179 | p.size = ('20cm', '10cm') | ||
| 180 | p.monochrome = False | ||
| 181 | p.dashed_lines = False | ||
| 182 | p.key = 'off' | ||
| 183 | p.default_style = 'points lw 1' | ||
| 184 | |||
| 185 | self.render(p) | ||
| 186 | |||
| 187 | |||
| 188 | def plot_file(self, datafile): | ||
| 189 | bname = basename(datafile) | ||
| 190 | name, ext = splitext(bname) | ||
| 191 | conf = decode(name) | ||
| 192 | plotters = { | ||
| 193 | 'taskset' : self.plot_samples, | ||
| 194 | } | ||
| 195 | |||
| 196 | for plot_type in plotters: | ||
| 197 | if plot_type in conf: | ||
| 198 | try: | ||
| 199 | plotters[plot_type](datafile, name, conf) | ||
| 200 | except IOError as err: | ||
| 201 | self.err("Skipped '%s' (%s)." % err) | ||
| 202 | break | ||
| 203 | else: | ||
| 204 | self.err("Skipped '%s'; unkown experiment type." | ||
| 205 | % bname) | ||
| 206 | # release all tmp files | ||
| 207 | self.tmpfiles = [] | ||
| 208 | |||
| 209 | def default(self, _): | ||
| 210 | for i, datafile in enumerate(self.args): | ||
| 211 | self.out("[%d/%d] Processing %s ..." % (i + 1, len(self.args), datafile)) | ||
| 212 | self.plot_file(datafile) | ||
| 213 | |||
| 214 | if __name__ == "__main__": | ||
| 215 | OverheadPlotter().launch() | ||
