aboutsummaryrefslogtreecommitdiffstats
path: root/binary_data.py
diff options
context:
space:
mode:
Diffstat (limited to 'binary_data.py')
-rw-r--r--binary_data.py54
1 files changed, 54 insertions, 0 deletions
diff --git a/binary_data.py b/binary_data.py
new file mode 100644
index 0000000..39d6e05
--- /dev/null
+++ b/binary_data.py
@@ -0,0 +1,54 @@
1import numpy
2
3from util import load_binary_file
4from stats import iqr_remove_outliers, iqr_cutoff
5
6
7def get_data(fname, scale, extent, cutoff=None, maxval=1000.0,
8 stdev=False, manual=None):
9 data = load_binary_file(fname)
10
11 if cutoff and len(data) > cutoff:
12 data = data[:cutoff]
13
14 if not scale is None:
15 data *= scale
16
17 data.sort()
18
19 if stdev:
20 # standard deviations filter
21 std = numpy.std(data, ddof=1)
22 avg = numpy.mean(data)
23 lower = avg - std * extent
24 upper = avg + std * extent
25 elif extent:
26 # IQR filter
27 lower, upper = iqr_cutoff(data, extent)
28 else:
29 lower = 0
30 upper = maxval
31
32 min_idx, max_idx = numpy.searchsorted(data, [lower, upper])
33
34 if manual:
35 max_idx -= manual
36
37 return [data, max_idx, min_idx, upper, lower]
38
39
40def compact_file(*args, **kargs):
41 data, max_idx, min_idx, iqr_max, iqr_min = get_data(*args, **kargs)
42
43 samples = data[min_idx:max_idx]
44
45 filtered = len(data) - len(samples)
46 max = samples[-1]
47 min = samples[0]
48 med = numpy.median(samples)
49 avg = numpy.mean(samples)
50
51 std = numpy.std(samples, ddof=1)
52 var = numpy.var(samples)
53
54 return [len(data), filtered, max, avg, min, med, std, var, iqr_max, iqr_min]