aboutsummaryrefslogtreecommitdiffstats
path: root/binary_data.py
diff options
context:
space:
mode:
authorBjoern B. Brandenburg <bbb@cs.unc.edu>2011-02-18 03:04:46 -0500
committerBjoern B. Brandenburg <bbb@cs.unc.edu>2011-02-18 03:04:46 -0500
commit89e5192f1ee83ebb3a7bd87aefc5d23ce4ab2c2b (patch)
tree4cdc41b4750719c13eb8f6534fce879c577dbd88 /binary_data.py
parente37ed32d9b861581942ec5cfb8948f0602c0a481 (diff)
Support for compacting binary data
Diffstat (limited to 'binary_data.py')
-rw-r--r--binary_data.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/binary_data.py b/binary_data.py
new file mode 100644
index 0000000..e5b47aa
--- /dev/null
+++ b/binary_data.py
@@ -0,0 +1,29 @@
1import numpy
2
3from util import load_binary_file
4from stats import iqr_remove_outliers, iqr_cutoff
5
6def compact_file(fname, scale=None, extend=1.5):
7 data = load_binary_file(fname)
8
9 if not scale is None:
10 data *= scale
11
12 data.sort()
13
14 iqr_min, iqr_max = iqr_cutoff(data, extend)
15
16 min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max])
17
18 samples = data[min_idx:max_idx]
19
20 filtered = len(data) - len(samples)
21 max = samples[-1]
22 min = samples[0]
23 med = numpy.median(samples)
24 avg = numpy.mean(samples)
25
26 std = numpy.std(samples)
27 var = numpy.var(samples)
28
29 return [len(samples), filtered, max, avg, min, med, std, var]