diff options
author | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2011-02-19 21:45:54 -0500 |
---|---|---|
committer | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2011-02-19 21:45:54 -0500 |
commit | 5e5fc221d60f46951a360f3e4f637e0edc084daf (patch) | |
tree | 1b622220d08c64a776ea4bd1d50d0dc39ece4c6b /binary_data.py | |
parent | c69e0012f6845fcfe8d89bc980c14b7993d99cde (diff) |
improve data and visualization mangling in oplot.py
Also, avoid duplicating binary data parsing code.
Diffstat (limited to 'binary_data.py')
-rw-r--r-- | binary_data.py | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/binary_data.py b/binary_data.py index d4be159..6108816 100644 --- a/binary_data.py +++ b/binary_data.py | |||
@@ -3,18 +3,32 @@ import numpy | |||
3 | from util import load_binary_file | 3 | from util import load_binary_file |
4 | from stats import iqr_remove_outliers, iqr_cutoff | 4 | from stats import iqr_remove_outliers, iqr_cutoff |
5 | 5 | ||
6 | def compact_file(fname, scale=None, extend=1.5): | 6 | |
7 | def get_data(fname, scale, extent, cutoff=None, maxval=1000.0): | ||
7 | data = load_binary_file(fname) | 8 | data = load_binary_file(fname) |
8 | 9 | ||
10 | if cutoff and len(data) > cutoff: | ||
11 | data = data[:cutoff] | ||
12 | |||
9 | if not scale is None: | 13 | if not scale is None: |
10 | data *= scale | 14 | data *= scale |
11 | 15 | ||
12 | data.sort() | 16 | data.sort() |
13 | 17 | ||
14 | iqr_min, iqr_max = iqr_cutoff(data, extend) | 18 | if extent: |
19 | iqr_min, iqr_max = iqr_cutoff(data, extent) | ||
20 | else: | ||
21 | iqr_min = 0 | ||
22 | iqr_max = maxval | ||
15 | 23 | ||
16 | min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) | 24 | min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) |
17 | 25 | ||
26 | return [data, max_idx, min_idx, iqr_max, iqr_min] | ||
27 | |||
28 | |||
29 | def compact_file(*args, **kargs): | ||
30 | data, max_idx, min_idx, iqr_max, iqr_min = get_data(*args, **kargs) | ||
31 | |||
18 | samples = data[min_idx:max_idx] | 32 | samples = data[min_idx:max_idx] |
19 | 33 | ||
20 | filtered = len(data) - len(samples) | 34 | filtered = len(data) - len(samples) |