diff options
-rwxr-xr-x | oplot.py | 215 |
1 files changed, 215 insertions, 0 deletions
diff --git a/oplot.py b/oplot.py new file mode 100755 index 0000000..2f7f0cb --- /dev/null +++ b/oplot.py | |||
@@ -0,0 +1,215 @@ | |||
1 | #!/usr/bin/env python | ||
2 | import defapp | ||
3 | |||
4 | from plot import decode | ||
5 | from util import load_csv_file, load_binary_file, write_csv_file | ||
6 | from stats import iqr_cutoff | ||
7 | |||
8 | from math import ceil | ||
9 | |||
10 | import numpy | ||
11 | |||
12 | from os.path import splitext, basename | ||
13 | from optparse import make_option as o | ||
14 | |||
15 | from gnuplot import gnuplot, FORMATS, Plot, label, curve | ||
16 | |||
17 | options = [ | ||
18 | # output options | ||
19 | o('-f', '--format', action='store', dest='format', type='choice', | ||
20 | choices=FORMATS, help='output format'), | ||
21 | o(None, '--save-script', action='store_true', dest='save_script'), | ||
22 | o('-p', '--prefix', action='store', dest='prefix'), | ||
23 | |||
24 | o('-i', '--iqr-extent', action='store', dest='extent', type='float', | ||
25 | help='what extent to use for outlier removal'), | ||
26 | |||
27 | ] | ||
28 | |||
29 | defaults = { | ||
30 | # output options | ||
31 | 'format' : 'pdf', | ||
32 | 'save_script' : False, | ||
33 | 'prefix' : '', | ||
34 | |||
35 | # data processing | ||
36 | 'cycles' : 2128, # per usec | ||
37 | 'extent' : 3, | ||
38 | |||
39 | # formatting options | ||
40 | 'binsize' : 0.25, | ||
41 | } | ||
42 | |||
43 | |||
44 | def get_data(fname, scale, extend): | ||
45 | data = load_binary_file(fname) | ||
46 | |||
47 | if not scale is None: | ||
48 | data *= scale | ||
49 | |||
50 | data.sort() | ||
51 | |||
52 | iqr_min, iqr_max = iqr_cutoff(data, extend) | ||
53 | min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) | ||
54 | |||
55 | return [data, max_idx, min_idx, iqr_max, iqr_min] | ||
56 | |||
57 | |||
58 | def get_stats_label(samples): | ||
59 | avg = numpy.mean(samples) | ||
60 | med = numpy.median(samples) | ||
61 | dev = numpy.std(samples) | ||
62 | max = samples[-1] | ||
63 | min = samples[0] | ||
64 | return "min=%.2fus max=%.2fus avg=%.2fus median=%.2fus stdev=%.2fus" \ | ||
65 | % (min, max, avg, med, dev) | ||
66 | |||
67 | class OverheadPlotter(defapp.App): | ||
68 | def __init__(self): | ||
69 | defapp.App.__init__(self, options, defaults, no_std_opts=True) | ||
70 | self.tmpfiles = [] | ||
71 | |||
72 | def make_plot(self, fname=None): | ||
73 | p = Plot() | ||
74 | p.output = "%s%s.%s" % (self.options.prefix, fname, self.options.format) | ||
75 | p.format = self.options.format | ||
76 | return p | ||
77 | |||
78 | def setup_png(self, plot): | ||
79 | # standard png options; usually correct; never tweaked for paper | ||
80 | if self.options.format == 'png': | ||
81 | plot.font_size = 'large' | ||
82 | plot.size = (1024, 768) | ||
83 | plot.xticks = (0, 1) | ||
84 | plot.yticks = (0, 0.1) | ||
85 | plot.default_style = "linespoints" | ||
86 | return True | ||
87 | else: | ||
88 | return False | ||
89 | |||
90 | def write(self, data, name, ext='data'): | ||
91 | if self.options.save_script: | ||
92 | fname = "%s.%s" % (name, ext) | ||
93 | write_csv_file(fname, data) | ||
94 | return fname | ||
95 | else: | ||
96 | tmp = write_csv_file(None, data) | ||
97 | # keep a reference so that it isn't deleted | ||
98 | self.tmpfiles.append(tmp) | ||
99 | return tmp.name | ||
100 | |||
101 | def write_histogram(self, samples, name, labels=10): | ||
102 | max = ceil(numpy.amax(samples)) | ||
103 | bin_size = self.options.binsize | ||
104 | num_bins = int(max / bin_size) | ||
105 | (bins, edges) = numpy.histogram(samples, bins=num_bins, | ||
106 | range=(self.options.binsize / 2, | ||
107 | max + self.options.binsize / 2)) | ||
108 | data = numpy.zeros((num_bins, 3)) | ||
109 | cumulative = 0 | ||
110 | for i in xrange(len(bins)): | ||
111 | data[i, 0] = (edges[i] + edges[i + 1]) / 2.0 | ||
112 | data[i, 1] = bins[i] | ||
113 | cumulative += bins[i] | ||
114 | data[i, 2] = cumulative | ||
115 | |||
116 | label_rate = len(bins) / labels | ||
117 | if not label_rate: | ||
118 | label_rate = 1 | ||
119 | for_file = [] | ||
120 | for i, row in enumerate(data): | ||
121 | label = '%.2f' % row[0] if i % label_rate == 0 else '' | ||
122 | for_file.append([row[0], row[1], row[2], label]) | ||
123 | |||
124 | return (data, self.write(for_file, name, ext='hist')) | ||
125 | |||
126 | def render(self, p): | ||
127 | if self.options.save_script: | ||
128 | p.gnuplot_save(p.output + '.plot') | ||
129 | else: | ||
130 | p.gnuplot_exec() | ||
131 | |||
132 | def plot_samples(self, datafile, name, conf): | ||
133 | if conf['overhead'] == 'RELEASE-LATENCY': | ||
134 | scale = 1.0 / 1000.0 | ||
135 | else: | ||
136 | scale = 1.0 / self.options.cycles | ||
137 | |||
138 | data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile, | ||
139 | scale, | ||
140 | self.options.extent) | ||
141 | samples = data[min_idx:max_idx] | ||
142 | discarded = (len(data) - len(samples)) / float(len(data)) * 100 | ||
143 | max_cost = data[-1] | ||
144 | |||
145 | p = self.make_plot(name) | ||
146 | |||
147 | iqr_label = "IQR: extent=%d threshold=%.2fus filtered=%.2f%%" % \ | ||
148 | (self.options.extent, iqr_max, discarded) | ||
149 | |||
150 | p.labels = [label(0.5, 0.9, | ||
151 | get_stats_label(samples), | ||
152 | coord=['graph', 'screen'], align='center'), | ||
153 | label(0.98, 0.95, iqr_label, | ||
154 | coord=['graph', 'graph'], align='right')] | ||
155 | |||
156 | (hist, fname) = self.write_histogram(samples, name) | ||
157 | |||
158 | p.setup_histogram(gap=1, boxwidth=1.0) | ||
159 | |||
160 | p.title = "measured overheads scheduler=%s; overhead=%s; host=%s" \ | ||
161 | % (conf['scheduler'], conf['overhead'], conf['host']) | ||
162 | |||
163 | p.ylabel = "number of samples" | ||
164 | p.xlabel = "overhead in microseconds (bin size = %.2fus)" \ | ||
165 | % self.options.binsize | ||
166 | # p.xrange = (0, ceil(max_cost)) | ||
167 | p.xticks = (0, 10) | ||
168 | # p.yticks = (0, 1) | ||
169 | p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100)) | ||
170 | p.curves = [curve(histogram=fname, col=2, labels_col=4)] | ||
171 | |||
172 | #### Styling. | ||
173 | |||
174 | if not self.setup_png(p): | ||
175 | p.rounded_caps = True | ||
176 | p.font = 'Helvetica' | ||
177 | |||
178 | p.font_size = '10' | ||
179 | p.size = ('20cm', '10cm') | ||
180 | p.monochrome = False | ||
181 | p.dashed_lines = False | ||
182 | p.key = 'off' | ||
183 | p.default_style = 'points lw 1' | ||
184 | |||
185 | self.render(p) | ||
186 | |||
187 | |||
188 | def plot_file(self, datafile): | ||
189 | bname = basename(datafile) | ||
190 | name, ext = splitext(bname) | ||
191 | conf = decode(name) | ||
192 | plotters = { | ||
193 | 'taskset' : self.plot_samples, | ||
194 | } | ||
195 | |||
196 | for plot_type in plotters: | ||
197 | if plot_type in conf: | ||
198 | try: | ||
199 | plotters[plot_type](datafile, name, conf) | ||
200 | except IOError as err: | ||
201 | self.err("Skipped '%s' (%s)." % err) | ||
202 | break | ||
203 | else: | ||
204 | self.err("Skipped '%s'; unkown experiment type." | ||
205 | % bname) | ||
206 | # release all tmp files | ||
207 | self.tmpfiles = [] | ||
208 | |||
209 | def default(self, _): | ||
210 | for i, datafile in enumerate(self.args): | ||
211 | self.out("[%d/%d] Processing %s ..." % (i + 1, len(self.args), datafile)) | ||
212 | self.plot_file(datafile) | ||
213 | |||
214 | if __name__ == "__main__": | ||
215 | OverheadPlotter().launch() | ||