diff options
Diffstat (limited to 'binary_data.py')
| -rw-r--r-- | binary_data.py | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/binary_data.py b/binary_data.py new file mode 100644 index 0000000..39d6e05 --- /dev/null +++ b/binary_data.py | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | import numpy | ||
| 2 | |||
| 3 | from util import load_binary_file | ||
| 4 | from stats import iqr_remove_outliers, iqr_cutoff | ||
| 5 | |||
| 6 | |||
| 7 | def get_data(fname, scale, extent, cutoff=None, maxval=1000.0, | ||
| 8 | stdev=False, manual=None): | ||
| 9 | data = load_binary_file(fname) | ||
| 10 | |||
| 11 | if cutoff and len(data) > cutoff: | ||
| 12 | data = data[:cutoff] | ||
| 13 | |||
| 14 | if not scale is None: | ||
| 15 | data *= scale | ||
| 16 | |||
| 17 | data.sort() | ||
| 18 | |||
| 19 | if stdev: | ||
| 20 | # standard deviations filter | ||
| 21 | std = numpy.std(data, ddof=1) | ||
| 22 | avg = numpy.mean(data) | ||
| 23 | lower = avg - std * extent | ||
| 24 | upper = avg + std * extent | ||
| 25 | elif extent: | ||
| 26 | # IQR filter | ||
| 27 | lower, upper = iqr_cutoff(data, extent) | ||
| 28 | else: | ||
| 29 | lower = 0 | ||
| 30 | upper = maxval | ||
| 31 | |||
| 32 | min_idx, max_idx = numpy.searchsorted(data, [lower, upper]) | ||
| 33 | |||
| 34 | if manual: | ||
| 35 | max_idx -= manual | ||
| 36 | |||
| 37 | return [data, max_idx, min_idx, upper, lower] | ||
| 38 | |||
| 39 | |||
| 40 | def compact_file(*args, **kargs): | ||
| 41 | data, max_idx, min_idx, iqr_max, iqr_min = get_data(*args, **kargs) | ||
| 42 | |||
| 43 | samples = data[min_idx:max_idx] | ||
| 44 | |||
| 45 | filtered = len(data) - len(samples) | ||
| 46 | max = samples[-1] | ||
| 47 | min = samples[0] | ||
| 48 | med = numpy.median(samples) | ||
| 49 | avg = numpy.mean(samples) | ||
| 50 | |||
| 51 | std = numpy.std(samples, ddof=1) | ||
| 52 | var = numpy.var(samples) | ||
| 53 | |||
| 54 | return [len(data), filtered, max, avg, min, med, std, var, iqr_max, iqr_min] | ||
