1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
import numpy
from util import load_binary_file
from stats import iqr_remove_outliers, iqr_cutoff
def compact_file(fname, scale=None, extend=1.5):
data = load_binary_file(fname)
if not scale is None:
data *= scale
data.sort()
iqr_min, iqr_max = iqr_cutoff(data, extend)
min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max])
samples = data[min_idx:max_idx]
filtered = len(data) - len(samples)
max = samples[-1]
min = samples[0]
med = numpy.median(samples)
avg = numpy.mean(samples)
std = numpy.std(samples)
var = numpy.var(samples)
return [len(samples), filtered, max, avg, min, med, std, var]
|