import numpy from util import load_binary_file from stats import iqr_remove_outliers, iqr_cutoff def get_data(fname, scale, extent, cutoff=None, maxval=1000.0, stdev=False, manual=None): data = load_binary_file(fname) if cutoff and len(data) > cutoff: data = data[:cutoff] if not scale is None: data *= scale data.sort() if stdev: # standard deviations filter std = numpy.std(data, ddof=1) avg = numpy.mean(data) lower = avg - std * extent upper = avg + std * extent elif extent: # IQR filter lower, upper = iqr_cutoff(data, extent) else: lower = 0 upper = maxval min_idx, max_idx = numpy.searchsorted(data, [lower, upper]) if manual: max_idx -= manual return [data, max_idx, min_idx, upper, lower] def compact_file(*args, **kargs): data, max_idx, min_idx, iqr_max, iqr_min = get_data(*args, **kargs) samples = data[min_idx:max_idx] filtered = len(data) - len(samples) max = samples[-1] min = samples[0] med = numpy.median(samples) avg = numpy.mean(samples) std = numpy.std(samples, ddof=1) var = numpy.var(samples) return [len(data), filtered, max, avg, min, med, std, var, iqr_max, iqr_min]