diff options
| author | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2011-02-19 21:45:54 -0500 |
|---|---|---|
| committer | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2011-02-19 21:45:54 -0500 |
| commit | 5e5fc221d60f46951a360f3e4f637e0edc084daf (patch) | |
| tree | 1b622220d08c64a776ea4bd1d50d0dc39ece4c6b /binary_data.py | |
| parent | c69e0012f6845fcfe8d89bc980c14b7993d99cde (diff) | |
improve data and visualization mangling in oplot.py
Also, avoid duplicating binary data parsing code.
Diffstat (limited to 'binary_data.py')
| -rw-r--r-- | binary_data.py | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/binary_data.py b/binary_data.py index d4be159..6108816 100644 --- a/binary_data.py +++ b/binary_data.py | |||
| @@ -3,18 +3,32 @@ import numpy | |||
| 3 | from util import load_binary_file | 3 | from util import load_binary_file |
| 4 | from stats import iqr_remove_outliers, iqr_cutoff | 4 | from stats import iqr_remove_outliers, iqr_cutoff |
| 5 | 5 | ||
| 6 | def compact_file(fname, scale=None, extend=1.5): | 6 | |
| 7 | def get_data(fname, scale, extent, cutoff=None, maxval=1000.0): | ||
| 7 | data = load_binary_file(fname) | 8 | data = load_binary_file(fname) |
| 8 | 9 | ||
| 10 | if cutoff and len(data) > cutoff: | ||
| 11 | data = data[:cutoff] | ||
| 12 | |||
| 9 | if not scale is None: | 13 | if not scale is None: |
| 10 | data *= scale | 14 | data *= scale |
| 11 | 15 | ||
| 12 | data.sort() | 16 | data.sort() |
| 13 | 17 | ||
| 14 | iqr_min, iqr_max = iqr_cutoff(data, extend) | 18 | if extent: |
| 19 | iqr_min, iqr_max = iqr_cutoff(data, extent) | ||
| 20 | else: | ||
| 21 | iqr_min = 0 | ||
| 22 | iqr_max = maxval | ||
| 15 | 23 | ||
| 16 | min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) | 24 | min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max]) |
| 17 | 25 | ||
| 26 | return [data, max_idx, min_idx, iqr_max, iqr_min] | ||
| 27 | |||
| 28 | |||
| 29 | def compact_file(*args, **kargs): | ||
| 30 | data, max_idx, min_idx, iqr_max, iqr_min = get_data(*args, **kargs) | ||
| 31 | |||
| 18 | samples = data[min_idx:max_idx] | 32 | samples = data[min_idx:max_idx] |
| 19 | 33 | ||
| 20 | filtered = len(data) - len(samples) | 34 | filtered = len(data) - len(samples) |
