From 5e5fc221d60f46951a360f3e4f637e0edc084daf Mon Sep 17 00:00:00 2001
From: "Bjoern B. Brandenburg" <bbb@cs.unc.edu>
Date: Sat, 19 Feb 2011 21:45:54 -0500
Subject: improve data and visualization mangling in oplot.py

Also, avoid duplicating binary data parsing code.
---
 binary_data.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'binary_data.py')

diff --git a/binary_data.py b/binary_data.py
index d4be159..6108816 100644
--- a/binary_data.py
+++ b/binary_data.py
@@ -3,18 +3,32 @@ import numpy
 from util import load_binary_file
 from stats import iqr_remove_outliers, iqr_cutoff
 
-def compact_file(fname, scale=None, extend=1.5):
+
+def get_data(fname, scale, extent, cutoff=None, maxval=1000.0):
     data = load_binary_file(fname)
 
+    if cutoff and len(data) > cutoff:
+        data = data[:cutoff]
+
     if not scale is None:
         data *= scale
 
     data.sort()
 
-    iqr_min, iqr_max = iqr_cutoff(data, extend)
+    if extent:
+        iqr_min, iqr_max = iqr_cutoff(data, extent)
+    else:
+        iqr_min = 0
+        iqr_max = maxval
 
     min_idx, max_idx = numpy.searchsorted(data, [iqr_min, iqr_max])
 
+    return [data, max_idx, min_idx, iqr_max, iqr_min]
+
+
+def compact_file(*args, **kargs):
+    data, max_idx, min_idx, iqr_max, iqr_min = get_data(*args, **kargs)
+
     samples = data[min_idx:max_idx]
 
     filtered = len(data) - len(samples)
-- 
cgit v1.2.2