diff options
Diffstat (limited to 'binary_data.py')
-rw-r--r-- | binary_data.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/binary_data.py b/binary_data.py new file mode 100644 index 0000000..e5b47aa --- /dev/null +++ b/binary_data.py | |||
@@ -0,0 +1,29 @@ | |||
1 | import numpy | ||
2 | |||
3 | from util import load_binary_file | ||
4 | from stats import iqr_remove_outliers, iqr_cutoff | ||
5 | |||
6 | def compact_file(fname, scale=None, extend=1.5): | ||
7 | data = load_binary_file(fname) | ||
8 | |||
9 | if not scale is None: | ||
10 | data *= scale | ||
11 | |||
12 | data.sort() | ||
13 | |||
14 | iqr_min, iqr_max = iqr_cutoff(data, extend) | ||
15 | |||
16 | min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) | ||
17 | |||
18 | samples = data[min_idx:max_idx] | ||
19 | |||
20 | filtered = len(data) - len(samples) | ||
21 | max = samples[-1] | ||
22 | min = samples[0] | ||
23 | med = numpy.median(samples) | ||
24 | avg = numpy.mean(samples) | ||
25 | |||
26 | std = numpy.std(samples) | ||
27 | var = numpy.var(samples) | ||
28 | |||
29 | return [len(samples), filtered, max, avg, min, med, std, var] | ||