aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern B. Brandenburg <bbb@cs.unc.edu>2011-02-21 17:20:59 -0500
committerBjoern B. Brandenburg <bbb@cs.unc.edu>2011-02-21 17:20:59 -0500
commit56267a16bab916d5baa4fa7344520e395110fd90 (patch)
treeeacd422c5cea09c20cc1c12f481d07110b303e4e
parentedde9793da12398d11909a1be2041db93080d22a (diff)
add data filter based on standard deviation
-rw-r--r--binary_data.py22
1 files changed, 15 insertions, 7 deletions
diff --git a/binary_data.py b/binary_data.py
index 6108816..152afb7 100644
--- a/binary_data.py
+++ b/binary_data.py
@@ -4,7 +4,8 @@ from util import load_binary_file
4from stats import iqr_remove_outliers, iqr_cutoff 4from stats import iqr_remove_outliers, iqr_cutoff
5 5
6 6
7def get_data(fname, scale, extent, cutoff=None, maxval=1000.0): 7def get_data(fname, scale, extent, cutoff=None, maxval=1000.0,
8 stdev=False):
8 data = load_binary_file(fname) 9 data = load_binary_file(fname)
9 10
10 if cutoff and len(data) > cutoff: 11 if cutoff and len(data) > cutoff:
@@ -15,15 +16,22 @@ def get_data(fname, scale, extent, cutoff=None, maxval=1000.0):
15 16
16 data.sort() 17 data.sort()
17 18
18 if extent: 19 if stdev:
19 iqr_min, iqr_max = iqr_cutoff(data, extent) 20 # standard deviations filter
21 std = numpy.std(data, ddof=1)
22 avg = numpy.mean(data)
23 lower = avg - std * extent
24 upper = avg + std * extent
25 elif extent:
26 # IQR filter
27 lower, upper = iqr_cutoff(data, extent)
20 else: 28 else:
21 iqr_min = 0 29 lower = 0
22 iqr_max = maxval 30 upper = maxval
23 31
24 min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) 32 min_idx, max_idx = numpy.searchsorted(data, [lower, upper])
25 33
26 return [data, max_idx, min_idx, iqr_max, iqr_min] 34 return [data, max_idx, min_idx, upper, lower]
27 35
28 36
29def compact_file(*args, **kargs): 37def compact_file(*args, **kargs):