From 89e5192f1ee83ebb3a7bd87aefc5d23ce4ab2c2b Mon Sep 17 00:00:00 2001 From: "Bjoern B. Brandenburg" Date: Fri, 18 Feb 2011 03:04:46 -0500 Subject: Support for compacting binary data --- binary_data.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 binary_data.py (limited to 'binary_data.py') diff --git a/binary_data.py b/binary_data.py new file mode 100644 index 0000000..e5b47aa --- /dev/null +++ b/binary_data.py @@ -0,0 +1,29 @@ +import numpy + +from util import load_binary_file +from stats import iqr_remove_outliers, iqr_cutoff + +def compact_file(fname, scale=None, extend=1.5): + data = load_binary_file(fname) + + if not scale is None: + data *= scale + + data.sort() + + iqr_min, iqr_max = iqr_cutoff(data, extend) + + min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) + + samples = data[min_idx:max_idx] + + filtered = len(data) - len(samples) + max = samples[-1] + min = samples[0] + med = numpy.median(samples) + avg = numpy.mean(samples) + + std = numpy.std(samples) + var = numpy.var(samples) + + return [len(samples), filtered, max, avg, min, med, std, var] -- cgit v1.2.2