diff options
-rwxr-xr-x | ft-compute-stats | 94 |
1 files changed, 84 insertions, 10 deletions
diff --git a/ft-compute-stats b/ft-compute-stats index 45e08cf..6399df4 100755 --- a/ft-compute-stats +++ b/ft-compute-stats | |||
@@ -8,6 +8,8 @@ import optparse | |||
8 | import sys | 8 | import sys |
9 | import os | 9 | import os |
10 | 10 | ||
11 | from math import ceil | ||
12 | |||
11 | from os.path import splitext | 13 | from os.path import splitext |
12 | 14 | ||
13 | import itertools as it | 15 | import itertools as it |
@@ -23,7 +25,20 @@ def decode_key_value_filename(name): | |||
23 | params[k] = v | 25 | params[k] = v |
24 | return params | 26 | return params |
25 | 27 | ||
26 | def print_rows(rows, first_row_prefix='#', other_rows_prefix=' '): | 28 | def print_cols(cols, first_row_prefix='# ', other_rows_prefix=' '): |
29 | col_widths = [max((len(str(x)) for x in col)) for col in cols] | ||
30 | |||
31 | prefs = it.chain([first_row_prefix], it.repeat(other_rows_prefix)) | ||
32 | |||
33 | for prefix, row in it.izip(prefs, range(0, max((len(c) for c in cols)))): | ||
34 | reached_end = True | ||
35 | data = [col[row] if row < len(col) else '' for col in cols] | ||
36 | print '%s%s' % (prefix, | ||
37 | ", ".join([' ' * (c - len(str(f))) + str(f) | ||
38 | for (c, f) in it.izip(col_widths, data)])) | ||
39 | |||
40 | |||
41 | def print_rows(rows, first_row_prefix='# ', other_rows_prefix=' '): | ||
27 | col = 0 | 42 | col = 0 |
28 | col_widths = [] | 43 | col_widths = [] |
29 | field_widths = True | 44 | field_widths = True |
@@ -79,10 +94,25 @@ o = optparse.make_option | |||
79 | opts = [ | 94 | opts = [ |
80 | o('-p', '--cycles-per-usec', action='store', dest='cycles', type='float', | 95 | o('-p', '--cycles-per-usec', action='store', dest='cycles', type='float', |
81 | help='how many cycles per usec'), | 96 | help='how many cycles per usec'), |
97 | o(None, '--hist', action='store_true', dest='want_hist', | ||
98 | help='generate a histogram'), | ||
99 | o('-b', '--bin-size', action='store', dest='bin_size', type='float', | ||
100 | help='size of each bin in histogram'), | ||
101 | o('-n', '--normalize-counts', action='store_true', dest='normalize', | ||
102 | help='give relative frequency, not absolute sample counts'), | ||
103 | o('-c', '--cumulative', action='store_true', dest='cumulative', | ||
104 | help='report cumulative counts (i.e., CDF)'), | ||
105 | o(None, '--percent', action='store_true', dest='want_percent', | ||
106 | help='give relative frequency as a percentage'), | ||
82 | ] | 107 | ] |
83 | 108 | ||
84 | defaults = { | 109 | defaults = { |
85 | 'cycles' : None, | 110 | 'cycles' : None, |
111 | 'want_hist' : False, | ||
112 | 'bin_size' : 1000, | ||
113 | 'normalize' : False, | ||
114 | 'want_percent' : False, | ||
115 | 'cumulative' : False, | ||
86 | } | 116 | } |
87 | 117 | ||
88 | options = None | 118 | options = None |
@@ -135,6 +165,32 @@ def get_stats(fname): | |||
135 | finfo = [fname] | 165 | finfo = [fname] |
136 | return [to_str(x) for x in info + stats + finfo] | 166 | return [to_str(x) for x in info + stats + finfo] |
137 | 167 | ||
168 | def make_bins(max_val): | ||
169 | num_bins = int(ceil(max_val / options.bin_size)) | ||
170 | return [x * options.bin_size for x in range(0, num_bins)] | ||
171 | |||
172 | def hist_file(fname, scale): | ||
173 | max_val = 0 | ||
174 | hist = [] | ||
175 | |||
176 | size = os.stat(fname).st_size | ||
177 | if size: | ||
178 | samples = numpy.memmap(fname, dtype='float32', mode='c') | ||
179 | |||
180 | n = len(samples) | ||
181 | if n > 0: | ||
182 | samples *= scale | ||
183 | max_val = numpy.amax(samples) | ||
184 | bins = make_bins(max_val) | ||
185 | hist, _ = numpy.histogram(samples, bins) | ||
186 | if options.cumulative: | ||
187 | hist = numpy.cumsum(hist) | ||
188 | if options.normalize: | ||
189 | hist = [h/n * (100 if options.want_percent else 1) for h in hist] | ||
190 | |||
191 | return (max_val, hist) | ||
192 | |||
193 | |||
138 | if __name__ == '__main__': | 194 | if __name__ == '__main__': |
139 | # FIXME: would be nicer with argparse | 195 | # FIXME: would be nicer with argparse |
140 | parser = optparse.OptionParser(option_list=opts) | 196 | parser = optparse.OptionParser(option_list=opts) |
@@ -142,13 +198,31 @@ if __name__ == '__main__': | |||
142 | (options, files) = parser.parse_args() | 198 | (options, files) = parser.parse_args() |
143 | 199 | ||
144 | try: | 200 | try: |
145 | rows = [] | 201 | if options.want_hist: |
146 | rows.append(STATS_HEADERS) | 202 | cols = [] |
147 | for f in files: | 203 | max_val = 0 |
148 | try: | 204 | col_names = [] |
149 | rows.append(get_stats(f)) | 205 | for i, f in enumerate(files): |
150 | except IOError, msg: | 206 | try: |
151 | print >> sys.stderr, msg | 207 | (max_in_file, hist) = hist_file(f, 1) |
152 | print_rows(rows) | 208 | cols.append([i + 1] + [to_str(x) for x in hist]) |
209 | max_val = max(max_val, max_in_file) | ||
210 | col_names.append(f) | ||
211 | except IOError, msg: | ||
212 | print >> sys.stderr, msg | ||
213 | bins = ['Bin'] + make_bins(max_val) | ||
214 | print_cols([bins] + cols) | ||
215 | print '# Columns:' | ||
216 | for i, f in enumerate(col_names): | ||
217 | print '# (%d) %s' % (i + 1, f) | ||
218 | else: | ||
219 | rows = [] | ||
220 | rows.append(STATS_HEADERS) | ||
221 | for f in files: | ||
222 | try: | ||
223 | rows.append(get_stats(f)) | ||
224 | except IOError, msg: | ||
225 | print >> sys.stderr, msg | ||
226 | print_rows(rows) | ||
153 | except KeyboardInterrupt: | 227 | except KeyboardInterrupt: |
154 | pass | 228 | pass |