From 5e5fc221d60f46951a360f3e4f637e0edc084daf Mon Sep 17 00:00:00 2001 From: "Bjoern B. Brandenburg" Date: Sat, 19 Feb 2011 21:45:54 -0500 Subject: improve data and visualization mangling in oplot.py Also, avoid duplicating binary data parsing code. --- binary_data.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'binary_data.py') diff --git a/binary_data.py b/binary_data.py index d4be159..6108816 100644 --- a/binary_data.py +++ b/binary_data.py @@ -3,18 +3,32 @@ import numpy from util import load_binary_file from stats import iqr_remove_outliers, iqr_cutoff -def compact_file(fname, scale=None, extend=1.5): + +def get_data(fname, scale, extent, cutoff=None, maxval=1000.0): data = load_binary_file(fname) + if cutoff and len(data) > cutoff: + data = data[:cutoff] + if not scale is None: data *= scale data.sort() - iqr_min, iqr_max = iqr_cutoff(data, extend) + if extent: + iqr_min, iqr_max = iqr_cutoff(data, extent) + else: + iqr_min = 0 + iqr_max = maxval min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) + return [data, max_idx, min_idx, iqr_max, iqr_min] + + +def compact_file(*args, **kargs): + data, max_idx, min_idx, iqr_max, iqr_min = get_data(*args, **kargs) + samples = data[min_idx:max_idx] filtered = len(data) - len(samples) -- cgit v1.2.2