From 56267a16bab916d5baa4fa7344520e395110fd90 Mon Sep 17 00:00:00 2001 From: "Bjoern B. Brandenburg" Date: Mon, 21 Feb 2011 17:20:59 -0500 Subject: add data filter based on standard deviation --- binary_data.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'binary_data.py') diff --git a/binary_data.py b/binary_data.py index 6108816..152afb7 100644 --- a/binary_data.py +++ b/binary_data.py @@ -4,7 +4,8 @@ from util import load_binary_file from stats import iqr_remove_outliers, iqr_cutoff -def get_data(fname, scale, extent, cutoff=None, maxval=1000.0): +def get_data(fname, scale, extent, cutoff=None, maxval=1000.0, + stdev=False): data = load_binary_file(fname) if cutoff and len(data) > cutoff: @@ -15,15 +16,22 @@ def get_data(fname, scale, extent, cutoff=None, maxval=1000.0): data.sort() - if extent: - iqr_min, iqr_max = iqr_cutoff(data, extent) + if stdev: + # standard deviations filter + std = numpy.std(data, ddof=1) + avg = numpy.mean(data) + lower = avg - std * extent + upper = avg + std * extent + elif extent: + # IQR filter + lower, upper = iqr_cutoff(data, extent) else: - iqr_min = 0 - iqr_max = maxval + lower = 0 + upper = maxval - min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) + min_idx, max_idx = numpy.searchsorted(data, [lower, upper]) - return [data, max_idx, min_idx, iqr_max, iqr_min] + return [data, max_idx, min_idx, upper, lower] def compact_file(*args, **kargs): -- cgit v1.2.2