import numpy from util import load_binary_file from stats import iqr_remove_outliers, iqr_cutoff def compact_file(fname, scale=None, extend=1.5): data = load_binary_file(fname) if not scale is None: data *= scale data.sort() iqr_min, iqr_max = iqr_cutoff(data, extend) min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) samples = data[min_idx:max_idx] filtered = len(data) - len(samples) max = samples[-1] min = samples[0] med = numpy.median(samples) avg = numpy.mean(samples) std = numpy.std(samples) var = numpy.var(samples) return [len(samples), filtered, max, avg, min, med, std, var]