aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xpm_data_analysis/pm_data_analyzer.py29
-rw-r--r--pm_data_analysis/statanalyzer.py56
2 files changed, 71 insertions, 14 deletions
diff --git a/pm_data_analysis/pm_data_analyzer.py b/pm_data_analysis/pm_data_analyzer.py
index 6862f98..644a9aa 100755
--- a/pm_data_analysis/pm_data_analyzer.py
+++ b/pm_data_analysis/pm_data_analyzer.py
@@ -13,6 +13,7 @@ import numpy as np
13# preemption and migration C data exchanger 13# preemption and migration C data exchanger
14import pm 14import pm
15import pmserialize as pms 15import pmserialize as pms
16import statanalyzer as pmstat
16 17
17from optparse import OptionParser 18from optparse import OptionParser
18 19
@@ -34,16 +35,6 @@ class Overhead:
34 def add(self, ovd_vector, label): 35 def add(self, ovd_vector, label):
35 self.overheads.append([ovd_vector, label]) 36 self.overheads.append([ovd_vector, label])
36 37
37
38class InterQuartileRange:
39 def __init__(self, low, high):
40 self.low = low
41 self.high = high
42
43 def remOutliers(self, vector):
44 # discard points etc
45 return vector
46
47def read_valid_data(filename, coresL2, valid_ovds): 38def read_valid_data(filename, coresL2, valid_ovds):
48 suff = filename.find('.raw') 39 suff = filename.find('.raw')
49 if suff == -1: 40 if suff == -1:
@@ -78,10 +69,19 @@ def process_raw_data(filename, coresL2, coresC, valid_ovds):
78 print i[0], i[1] 69 print i[0], i[1]
79 70
80 # instance the statistical analizer to remove outliers 71 # instance the statistical analizer to remove outliers
81 sd = InterQuartileRange(25,75) 72 sd = pmstat.InterQuartileRange(25,75, True)
82 73
83 for i in ovds: 74 for i in ovds:
84 valid_ovds.add(sd.remOutliers(i[0]), i[1]) 75 # just add overheads, "forget" preemption length
76 valid_ovds.add(sd.remOutliers(i[0][:,0]), i[1])
77
78 if verbose:
79 # check outliers removals
80 for i in ovds:
81 print i[1], len(i[0])
82 print "\nAfter outliers removal:"
83 for i in valid_ovds:
84 print i[1], len(i[0])
85 85
86 # serialize valid overheads 86 # serialize valid overheads
87 for i in valid_ovds: 87 for i in valid_ovds:
@@ -111,8 +111,9 @@ def analize_data(valid_ovds, filename, tssize_char):
111 for i in valid_ovds: 111 for i in valid_ovds:
112 # overhead type 112 # overhead type
113 pms.cvs_it(csvf, i[1]) 113 pms.cvs_it(csvf, i[1])
114 # data (atm just overhead, not length) 114 # data (valid_ovds already have only overheads, not length)
115 vector = i[0][:,0] 115 # vector = i[0][:,0]
116 vector = i[0]
116 if vector != []: 117 if vector != []:
117 pms.cvs_it(csvf, "%5.5f" % np.max(vector)) 118 pms.cvs_it(csvf, "%5.5f" % np.max(vector))
118 pms.cvs_it(csvf, "%5.5f" % np.average(vector)) 119 pms.cvs_it(csvf, "%5.5f" % np.average(vector))
diff --git a/pm_data_analysis/statanalyzer.py b/pm_data_analysis/statanalyzer.py
new file mode 100644
index 0000000..6e096c4
--- /dev/null
+++ b/pm_data_analysis/statanalyzer.py
@@ -0,0 +1,56 @@
1#!/usr/bin/env python
2
3import numpy as np
4from scipy import stats
5
6class InterQuartileRange:
7 def __init__(self, low, high, extend = False):
8 self.low = low
9 self.high = high
10 # extend is 1.5 extension of IQR
11 self.extend = extend
12
13 def remOutliers(self, vector):
14 svect = np.sort(vector)
15 q1 = stats.scoreatpercentile(svect, self.low)
16 q3 = stats.scoreatpercentile(svect, self.high)
17
18 # match the values \in svect which are closer to q[1|3]
19 # (q1, q3)
20 q1_pos = -1
21 q3_pos = -1
22 cur_pos = 0
23 for i in svect:
24 if q1_pos != -1 and q3_pos != -1:
25 break
26 if q1_pos == -1 and i > q1:
27 q1_pos = cur_pos
28 if q3_pos == -1 and q3 < i:
29 q3_pos = cur_pos
30
31 cur_pos += 1
32
33 if self.extend == True:
34 # 1.5 IQR outliers elimination
35 eiqr = (svect[q3_pos] - svect[q1_pos]) * 1.5
36 eq1 = svect[q1_pos] - eiqr
37 if eq1 < svect[0]:
38 eq1 = svect[0]
39 eq3 = svect[q3_pos] + eiqr
40 if eq3 > svect[len(svect) - 1]:
41 eq3 = svect[len(svect) - 1]
42 # match the values \in svect which are closer to eq[1|3]
43 q1_pos = -1
44 q3_pos = -1
45 cur_pos = 0
46 for i in svect:
47 if q1_pos != -1 and q3_pos != -1:
48 break
49 if q1_pos == -1 and i > eq1:
50 q1_pos = cur_pos
51 if q3_pos == -1 and eq3 < i:
52 q3_pos = cur_pos
53
54 cur_pos += 1
55
56 return svect[q1_pos : q3_pos]