aboutsummaryrefslogtreecommitdiffstats
path: root/binary_data.py
blob: 6108816a1454902909c7bd43933ec81565d47d2c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import numpy

from util import load_binary_file
from stats import iqr_remove_outliers, iqr_cutoff


def get_data(fname, scale, extent, cutoff=None, maxval=1000.0):
    data = load_binary_file(fname)

    if cutoff and len(data) > cutoff:
        data = data[:cutoff]

    if not scale is None:
        data *= scale

    data.sort()

    if extent:
        iqr_min, iqr_max = iqr_cutoff(data, extent)
    else:
        iqr_min = 0
        iqr_max = maxval

    min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max])

    return [data, max_idx, min_idx, iqr_max, iqr_min]


def compact_file(*args, **kargs):
    data, max_idx, min_idx, iqr_max, iqr_min = get_data(*args, **kargs)

    samples = data[min_idx:max_idx]

    filtered = len(data) - len(samples)
    max = samples[-1]
    min = samples[0]
    med = numpy.median(samples)
    avg = numpy.mean(samples)

    std = numpy.std(samples, ddof=1)
    var = numpy.var(samples)

    return [len(data), filtered, max, avg, min, med, std, var, iqr_max, iqr_min]