improve data and visualization mangling in oplot.py

Also, avoid duplicating binary data parsing code.
author: Bjoern B. Brandenburg <bbb@cs.unc.edu> 2011-02-19 21:45:54 -0500
committer: Bjoern B. Brandenburg <bbb@cs.unc.edu> 2011-02-19 21:45:54 -0500
commit: 5e5fc221d60f46951a360f3e4f637e0edc084daf (patch)
tree: 1b622220d08c64a776ea4bd1d50d0dc39ece4c6b
parent: c69e0012f6845fcfe8d89bc980c14b7993d99cde (diff)
2 files changed, 53 insertions, 25 deletions
diff --git a/binary_data.py b/binary_data.py
index d4be159..6108816 100644
--- a/binary_data.py
+++ b/binary_data.py
@@ -3,18 +3,32 @@ import numpy
 from util import load_binary_file
 from stats import iqr_remove_outliers, iqr_cutoff
-def compact_file(fname, scale=None, extend=1.5):
+def get_data(fname, scale, extent, cutoff=None, maxval=1000.0):
    data = load_binary_file(fname)
+    if cutoff and len(data) > cutoff:
+        data = data[:cutoff]
    if not scale is None:
        data *= scale
    data.sort()
-    iqr_min, iqr_max = iqr_cutoff(data, extend)
+    if extent:
+        iqr_min, iqr_max = iqr_cutoff(data, extent)
+    else:
+        iqr_min = 0
+        iqr_max = maxval
    min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max])
+    return [data, max_idx, min_idx, iqr_max, iqr_min]
+def compact_file(*args, **kargs):
+    data, max_idx, min_idx, iqr_max, iqr_min = get_data(*args, **kargs)
    samples = data[min_idx:max_idx]
    filtered = len(data) - len(samples)
diff --git a/oplot.py b/oplot.py
index e0b4c69..89e0ef0 100755
--- a/oplot.py
+++ b/oplot.py
@@ -5,6 +5,8 @@ from plot import decode
 from util import load_csv_file, load_binary_file, write_csv_file
 from stats import iqr_cutoff
+from binary_data import get_data
 from math import ceil
 import numpy
@@ -24,9 +26,21 @@ options = [
    o('-i', '--iqr-extent', action='store', dest='extent', type='float',
      help='what extent to use for outlier removal'),
+    o('-n', '--normalize', action='store_true', dest='normalize',
+      help='use normalize counts'),
    o('-c', '--cut-off', action='store', dest='cutoff', type='int',
      help='max number of samples to use'),
+    o('-x', '--xmax', action='store', dest='xmax', type='int',
+      help='determines x-axis range'),
+    o('-y', '--ymax', action='store', dest='ymax', type='float',
+      help='determines y-axis range'),
+    o('-b', '--binsize', action='store', dest='binsize', type='float',
+      help='set binsize of histogram'),
    ]
 defaults = {
@@ -39,9 +53,14 @@ defaults = {
    'cycles' : 2128, # per usec
    'extent' : 3,
    'cutoff' : None,
+    'normalize' : False,
    # formatting options
    'binsize' : 0.25,
+    'xmax'    : None,
+    'ymax'    : None,
    }
@@ -55,19 +74,6 @@ TXT = {
    'TICK' : 'timer tick overhead',
 }
-def get_data(fname, scale, extend):
-    data = load_binary_file(fname)
-    if not scale is None:
-        data *= scale
-    data.sort()
-    iqr_min, iqr_max = iqr_cutoff(data, extend)
-    min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max])
-    return [data, max_idx, min_idx, iqr_max, iqr_min]
 def get_stats_label(samples):
    avg = numpy.mean(samples)
@@ -114,6 +120,8 @@ class OverheadPlotter(defapp.App):
    def write_histogram(self, samples, name, labels=10):
        max = ceil(numpy.amax(samples))
+        if self.options.xmax:
+            max = self.options.xmax
        bin_size = self.options.binsize
        num_bins  = int(max / bin_size)
        (bins, edges) = numpy.histogram(samples, bins=num_bins,
@@ -127,6 +135,10 @@ class OverheadPlotter(defapp.App):
            cumulative += bins[i]
            data[i, 2]  = cumulative
+        if self.options.normalize:
+            data[:, 1] /= len(samples)
+            data[:, 2] /= len(samples)
        label_rate = len(bins) / labels
        if not label_rate:
            label_rate = 1
@@ -135,7 +147,7 @@ class OverheadPlotter(defapp.App):
            label = '%.2f' % row[0] if i % label_rate == 0 else ''
            for_file.append([row[0], row[1], row[2], label])
-        return (data, self.write(for_file, name, ext='hist'))
+        return (data, self.write(for_file, name, ext='hist'), edges)
    def render(self, p):
        if self.options.save_script:
@@ -151,10 +163,8 @@ class OverheadPlotter(defapp.App):
        data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile,
                                                            scale,
-                                                            self.options.extent)
+                                                            self.options.extent,
+                                                            self.options.cutoff)
-        if self.options.cutoff and len(data) > self.options.cutoff:
-            data = data[:self.options.cutoff]
        samples = data[min_idx:max_idx]
        discarded = (len(data) - len(samples)) / float(len(data)) * 100
@@ -175,20 +185,24 @@ class OverheadPlotter(defapp.App):
                    label(0.98, 0.95, iqr_label,
                          coord=['graph', 'graph'], align='right')]
-        (hist, fname) = self.write_histogram(samples, name)
+        (hist, fname, edges) = self.write_histogram(samples, name)
        p.setup_histogram(gap=1, boxwidth=1.0)
        p.title = "%s: measured %s for %s tasks per processor (host=%s)" \
            % (conf['scheduler'], TXT[conf['overhead']], conf['n'], conf['host'])
-        p.ylabel = "number of samples"
+        if self.options.normalize:
+            p.ylabel = "fraction of samples"
+        else:
+            p.ylabel = "number of samples"
        p.xlabel = "overhead in microseconds (bin size = %.2fus)" \
            % self.options.binsize
-#            p.xrange = (0, ceil(max_cost))
+        if self.options.ymax:
+            p.yrange = (0, self.options.ymax)
+#        p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100))
        p.xticks = (0, 10)
-#            p.yticks = (0, 1)
-        p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100))
        p.curves = [curve(histogram=fname, col=2, labels_col=4)]
        #### Styling.
author	Bjoern B. Brandenburg <bbb@cs.unc.edu>	2011-02-19 21:45:54 -0500
committer	Bjoern B. Brandenburg <bbb@cs.unc.edu>	2011-02-19 21:45:54 -0500
commit	5e5fc221d60f46951a360f3e4f637e0edc084daf (patch)
tree	1b622220d08c64a776ea4bd1d50d0dc39ece4c6b
parent	c69e0012f6845fcfe8d89bc980c14b7993d99cde (diff)

diff --git a/binary_data.py b/binary_data.py index d4be159..6108816 100644 --- a/binary_data.py +++ b/binary_data.py
@@ -3,18 +3,32 @@ import numpy
3	from util import load_binary_file	3	from util import load_binary_file
4	from stats import iqr_remove_outliers, iqr_cutoff	4	from stats import iqr_remove_outliers, iqr_cutoff
5		5
6	def compact_file(fname, scale=None, extend=1.5):	6
		7	def get_data(fname, scale, extent, cutoff=None, maxval=1000.0):
7	data = load_binary_file(fname)	8	data = load_binary_file(fname)
8		9
		10	if cutoff and len(data) > cutoff:
		11	data = data[:cutoff]
		12
9	if not scale is None:	13	if not scale is None:
10	data *= scale	14	data *= scale
11		15
12	data.sort()	16	data.sort()
13		17
14	iqr_min, iqr_max = iqr_cutoff(data, extend)	18	if extent:
		19	iqr_min, iqr_max = iqr_cutoff(data, extent)
		20	else:
		21	iqr_min = 0
		22	iqr_max = maxval
15		23
16	min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max])	24	min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max])
17		25
		26	return [data, max_idx, min_idx, iqr_max, iqr_min]
		27
		28
		29	def compact_file(args, *kargs):
		30	data, max_idx, min_idx, iqr_max, iqr_min = get_data(args, *kargs)
		31
18	samples = data[min_idx:max_idx]	32	samples = data[min_idx:max_idx]
19		33
20	filtered = len(data) - len(samples)	34	filtered = len(data) - len(samples)


diff --git a/oplot.py b/oplot.py index e0b4c69..89e0ef0 100755 --- a/oplot.py +++ b/oplot.py
@@ -5,6 +5,8 @@ from plot import decode
5	from util import load_csv_file, load_binary_file, write_csv_file	5	from util import load_csv_file, load_binary_file, write_csv_file
6	from stats import iqr_cutoff	6	from stats import iqr_cutoff
7		7
		8	from binary_data import get_data
		9
8	from math import ceil	10	from math import ceil
9		11
10	import numpy	12	import numpy
@@ -24,9 +26,21 @@ options = [
24	o('-i', '--iqr-extent', action='store', dest='extent', type='float',	26	o('-i', '--iqr-extent', action='store', dest='extent', type='float',
25	help='what extent to use for outlier removal'),	27	help='what extent to use for outlier removal'),
26		28
		29	o('-n', '--normalize', action='store_true', dest='normalize',
		30	help='use normalize counts'),
		31
27	o('-c', '--cut-off', action='store', dest='cutoff', type='int',	32	o('-c', '--cut-off', action='store', dest='cutoff', type='int',
28	help='max number of samples to use'),	33	help='max number of samples to use'),
29		34
		35	o('-x', '--xmax', action='store', dest='xmax', type='int',
		36	help='determines x-axis range'),
		37
		38	o('-y', '--ymax', action='store', dest='ymax', type='float',
		39	help='determines y-axis range'),
		40
		41	o('-b', '--binsize', action='store', dest='binsize', type='float',
		42	help='set binsize of histogram'),
		43
30	]	44	]
31		45
32	defaults = {	46	defaults = {
@@ -39,9 +53,14 @@ defaults = {
39	'cycles' : 2128, # per usec	53	'cycles' : 2128, # per usec
40	'extent' : 3,	54	'extent' : 3,
41	'cutoff' : None,	55	'cutoff' : None,
		56	'normalize' : False,
42		57
43	# formatting options	58	# formatting options
44	'binsize' : 0.25,	59	'binsize' : 0.25,
		60
		61	'xmax' : None,
		62	'ymax' : None,
		63
45	}	64	}
46		65
47		66
@@ -55,19 +74,6 @@ TXT = {
55	'TICK' : 'timer tick overhead',	74	'TICK' : 'timer tick overhead',
56	}	75	}
57		76
58	def get_data(fname, scale, extend):
59	data = load_binary_file(fname)
60
61	if not scale is None:
62	data *= scale
63
64	data.sort()
65
66	iqr_min, iqr_max = iqr_cutoff(data, extend)
67	min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max])
68
69	return [data, max_idx, min_idx, iqr_max, iqr_min]
70
71		77
72	def get_stats_label(samples):	78	def get_stats_label(samples):
73	avg = numpy.mean(samples)	79	avg = numpy.mean(samples)
@@ -114,6 +120,8 @@ class OverheadPlotter(defapp.App):
114		120
115	def write_histogram(self, samples, name, labels=10):	121	def write_histogram(self, samples, name, labels=10):
116	max = ceil(numpy.amax(samples))	122	max = ceil(numpy.amax(samples))
		123	if self.options.xmax:
		124	max = self.options.xmax
117	bin_size = self.options.binsize	125	bin_size = self.options.binsize
118	num_bins = int(max / bin_size)	126	num_bins = int(max / bin_size)
119	(bins, edges) = numpy.histogram(samples, bins=num_bins,	127	(bins, edges) = numpy.histogram(samples, bins=num_bins,
@@ -127,6 +135,10 @@ class OverheadPlotter(defapp.App):
127	cumulative += bins[i]	135	cumulative += bins[i]
128	data[i, 2] = cumulative	136	data[i, 2] = cumulative
129		137
		138	if self.options.normalize:
		139	data[:, 1] /= len(samples)
		140	data[:, 2] /= len(samples)
		141
130	label_rate = len(bins) / labels	142	label_rate = len(bins) / labels
131	if not label_rate:	143	if not label_rate:
132	label_rate = 1	144	label_rate = 1
@@ -135,7 +147,7 @@ class OverheadPlotter(defapp.App):
135	label = '%.2f' % row[0] if i % label_rate == 0 else ''	147	label = '%.2f' % row[0] if i % label_rate == 0 else ''
136	for_file.append([row[0], row[1], row[2], label])	148	for_file.append([row[0], row[1], row[2], label])
137		149
138	return (data, self.write(for_file, name, ext='hist'))	150	return (data, self.write(for_file, name, ext='hist'), edges)
139		151
140	def render(self, p):	152	def render(self, p):
141	if self.options.save_script:	153	if self.options.save_script:
@@ -151,10 +163,8 @@ class OverheadPlotter(defapp.App):
151		163
152	data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile,	164	data, max_idx, min_idx, iqr_max, iqr_min = get_data(datafile,
153	scale,	165	scale,
154	self.options.extent)	166	self.options.extent,
155		167	self.options.cutoff)
156	if self.options.cutoff and len(data) > self.options.cutoff:
157	data = data[:self.options.cutoff]
158		168
159	samples = data[min_idx:max_idx]	169	samples = data[min_idx:max_idx]
160	discarded = (len(data) - len(samples)) / float(len(data)) * 100	170	discarded = (len(data) - len(samples)) / float(len(data)) * 100
@@ -175,20 +185,24 @@ class OverheadPlotter(defapp.App):
175	label(0.98, 0.95, iqr_label,	185	label(0.98, 0.95, iqr_label,
176	coord=['graph', 'graph'], align='right')]	186	coord=['graph', 'graph'], align='right')]
177		187
178	(hist, fname) = self.write_histogram(samples, name)	188	(hist, fname, edges) = self.write_histogram(samples, name)
179		189
180	p.setup_histogram(gap=1, boxwidth=1.0)	190	p.setup_histogram(gap=1, boxwidth=1.0)
181		191
182	p.title = "%s: measured %s for %s tasks per processor (host=%s)" \	192	p.title = "%s: measured %s for %s tasks per processor (host=%s)" \
183	% (conf['scheduler'], TXT[conf['overhead']], conf['n'], conf['host'])	193	% (conf['scheduler'], TXT[conf['overhead']], conf['n'], conf['host'])
184		194
185	p.ylabel = "number of samples"	195	if self.options.normalize:
		196	p.ylabel = "fraction of samples"
		197	else:
		198	p.ylabel = "number of samples"
186	p.xlabel = "overhead in microseconds (bin size = %.2fus)" \	199	p.xlabel = "overhead in microseconds (bin size = %.2fus)" \
187	% self.options.binsize	200	% self.options.binsize
188	# p.xrange = (0, ceil(max_cost))	201
		202	if self.options.ymax:
		203	p.yrange = (0, self.options.ymax)
		204	# p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100))
189	p.xticks = (0, 10)	205	p.xticks = (0, 10)
190	# p.yticks = (0, 1)
191	p.yrange = (0, (ceil(numpy.amax(hist[:,1]) / 100.0) * 100))
192	p.curves = [curve(histogram=fname, col=2, labels_col=4)]	206	p.curves = [curve(histogram=fname, col=2, labels_col=4)]
193		207
194	#### Styling.	208	#### Styling.