aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2016-04-19 19:37:15 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2016-04-19 19:37:15 -0400
commitb2c36fecb42b85f2fe9d6c6ade51aaf00dbbee9d (patch)
tree8bd8b7c9e87210d9f581c517f231cee987096bc0
parent5cae4926d7084c8b152b1eb48162b48169d86d70 (diff)
ft-compute-stats: add histogram & CDF support
-rwxr-xr-xft-compute-stats94
1 files changed, 84 insertions, 10 deletions
diff --git a/ft-compute-stats b/ft-compute-stats
index 45e08cf..6399df4 100755
--- a/ft-compute-stats
+++ b/ft-compute-stats
@@ -8,6 +8,8 @@ import optparse
8import sys 8import sys
9import os 9import os
10 10
11from math import ceil
12
11from os.path import splitext 13from os.path import splitext
12 14
13import itertools as it 15import itertools as it
@@ -23,7 +25,20 @@ def decode_key_value_filename(name):
23 params[k] = v 25 params[k] = v
24 return params 26 return params
25 27
26def print_rows(rows, first_row_prefix='#', other_rows_prefix=' '): 28def print_cols(cols, first_row_prefix='# ', other_rows_prefix=' '):
29 col_widths = [max((len(str(x)) for x in col)) for col in cols]
30
31 prefs = it.chain([first_row_prefix], it.repeat(other_rows_prefix))
32
33 for prefix, row in it.izip(prefs, range(0, max((len(c) for c in cols)))):
34 reached_end = True
35 data = [col[row] if row < len(col) else '' for col in cols]
36 print '%s%s' % (prefix,
37 ", ".join([' ' * (c - len(str(f))) + str(f)
38 for (c, f) in it.izip(col_widths, data)]))
39
40
41def print_rows(rows, first_row_prefix='# ', other_rows_prefix=' '):
27 col = 0 42 col = 0
28 col_widths = [] 43 col_widths = []
29 field_widths = True 44 field_widths = True
@@ -79,10 +94,25 @@ o = optparse.make_option
79opts = [ 94opts = [
80 o('-p', '--cycles-per-usec', action='store', dest='cycles', type='float', 95 o('-p', '--cycles-per-usec', action='store', dest='cycles', type='float',
81 help='how many cycles per usec'), 96 help='how many cycles per usec'),
97 o(None, '--hist', action='store_true', dest='want_hist',
98 help='generate a histogram'),
99 o('-b', '--bin-size', action='store', dest='bin_size', type='float',
100 help='size of each bin in histogram'),
101 o('-n', '--normalize-counts', action='store_true', dest='normalize',
102 help='give relative frequency, not absolute sample counts'),
103 o('-c', '--cumulative', action='store_true', dest='cumulative',
104 help='report cumulative counts (i.e., CDF)'),
105 o(None, '--percent', action='store_true', dest='want_percent',
106 help='give relative frequency as a percentage'),
82 ] 107 ]
83 108
84defaults = { 109defaults = {
85 'cycles' : None, 110 'cycles' : None,
111 'want_hist' : False,
112 'bin_size' : 1000,
113 'normalize' : False,
114 'want_percent' : False,
115 'cumulative' : False,
86 } 116 }
87 117
88options = None 118options = None
@@ -135,6 +165,32 @@ def get_stats(fname):
135 finfo = [fname] 165 finfo = [fname]
136 return [to_str(x) for x in info + stats + finfo] 166 return [to_str(x) for x in info + stats + finfo]
137 167
168def make_bins(max_val):
169 num_bins = int(ceil(max_val / options.bin_size))
170 return [x * options.bin_size for x in range(0, num_bins)]
171
172def hist_file(fname, scale):
173 max_val = 0
174 hist = []
175
176 size = os.stat(fname).st_size
177 if size:
178 samples = numpy.memmap(fname, dtype='float32', mode='c')
179
180 n = len(samples)
181 if n > 0:
182 samples *= scale
183 max_val = numpy.amax(samples)
184 bins = make_bins(max_val)
185 hist, _ = numpy.histogram(samples, bins)
186 if options.cumulative:
187 hist = numpy.cumsum(hist)
188 if options.normalize:
189 hist = [h/n * (100 if options.want_percent else 1) for h in hist]
190
191 return (max_val, hist)
192
193
138if __name__ == '__main__': 194if __name__ == '__main__':
139 # FIXME: would be nicer with argparse 195 # FIXME: would be nicer with argparse
140 parser = optparse.OptionParser(option_list=opts) 196 parser = optparse.OptionParser(option_list=opts)
@@ -142,13 +198,31 @@ if __name__ == '__main__':
142 (options, files) = parser.parse_args() 198 (options, files) = parser.parse_args()
143 199
144 try: 200 try:
145 rows = [] 201 if options.want_hist:
146 rows.append(STATS_HEADERS) 202 cols = []
147 for f in files: 203 max_val = 0
148 try: 204 col_names = []
149 rows.append(get_stats(f)) 205 for i, f in enumerate(files):
150 except IOError, msg: 206 try:
151 print >> sys.stderr, msg 207 (max_in_file, hist) = hist_file(f, 1)
152 print_rows(rows) 208 cols.append([i + 1] + [to_str(x) for x in hist])
209 max_val = max(max_val, max_in_file)
210 col_names.append(f)
211 except IOError, msg:
212 print >> sys.stderr, msg
213 bins = ['Bin'] + make_bins(max_val)
214 print_cols([bins] + cols)
215 print '# Columns:'
216 for i, f in enumerate(col_names):
217 print '# (%d) %s' % (i + 1, f)
218 else:
219 rows = []
220 rows.append(STATS_HEADERS)
221 for f in files:
222 try:
223 rows.append(get_stats(f))
224 except IOError, msg:
225 print >> sys.stderr, msg
226 print_rows(rows)
153 except KeyboardInterrupt: 227 except KeyboardInterrupt:
154 pass 228 pass