diff options
| -rwxr-xr-x | pm_data_analysis/pm_data_analyzer.py | 29 | ||||
| -rw-r--r-- | pm_data_analysis/statanalyzer.py | 56 |
2 files changed, 71 insertions, 14 deletions
diff --git a/pm_data_analysis/pm_data_analyzer.py b/pm_data_analysis/pm_data_analyzer.py index 6862f98..644a9aa 100755 --- a/pm_data_analysis/pm_data_analyzer.py +++ b/pm_data_analysis/pm_data_analyzer.py | |||
| @@ -13,6 +13,7 @@ import numpy as np | |||
| 13 | # preemption and migration C data exchanger | 13 | # preemption and migration C data exchanger |
| 14 | import pm | 14 | import pm |
| 15 | import pmserialize as pms | 15 | import pmserialize as pms |
| 16 | import statanalyzer as pmstat | ||
| 16 | 17 | ||
| 17 | from optparse import OptionParser | 18 | from optparse import OptionParser |
| 18 | 19 | ||
| @@ -34,16 +35,6 @@ class Overhead: | |||
| 34 | def add(self, ovd_vector, label): | 35 | def add(self, ovd_vector, label): |
| 35 | self.overheads.append([ovd_vector, label]) | 36 | self.overheads.append([ovd_vector, label]) |
| 36 | 37 | ||
| 37 | |||
| 38 | class InterQuartileRange: | ||
| 39 | def __init__(self, low, high): | ||
| 40 | self.low = low | ||
| 41 | self.high = high | ||
| 42 | |||
| 43 | def remOutliers(self, vector): | ||
| 44 | # discard points etc | ||
| 45 | return vector | ||
| 46 | |||
| 47 | def read_valid_data(filename, coresL2, valid_ovds): | 38 | def read_valid_data(filename, coresL2, valid_ovds): |
| 48 | suff = filename.find('.raw') | 39 | suff = filename.find('.raw') |
| 49 | if suff == -1: | 40 | if suff == -1: |
| @@ -78,10 +69,19 @@ def process_raw_data(filename, coresL2, coresC, valid_ovds): | |||
| 78 | print i[0], i[1] | 69 | print i[0], i[1] |
| 79 | 70 | ||
| 80 | # instance the statistical analizer to remove outliers | 71 | # instance the statistical analizer to remove outliers |
| 81 | sd = InterQuartileRange(25,75) | 72 | sd = pmstat.InterQuartileRange(25,75, True) |
| 82 | 73 | ||
| 83 | for i in ovds: | 74 | for i in ovds: |
| 84 | valid_ovds.add(sd.remOutliers(i[0]), i[1]) | 75 | # just add overheads, "forget" preemption length |
| 76 | valid_ovds.add(sd.remOutliers(i[0][:,0]), i[1]) | ||
| 77 | |||
| 78 | if verbose: | ||
| 79 | # check outliers removals | ||
| 80 | for i in ovds: | ||
| 81 | print i[1], len(i[0]) | ||
| 82 | print "\nAfter outliers removal:" | ||
| 83 | for i in valid_ovds: | ||
| 84 | print i[1], len(i[0]) | ||
| 85 | 85 | ||
| 86 | # serialize valid overheads | 86 | # serialize valid overheads |
| 87 | for i in valid_ovds: | 87 | for i in valid_ovds: |
| @@ -111,8 +111,9 @@ def analize_data(valid_ovds, filename, tssize_char): | |||
| 111 | for i in valid_ovds: | 111 | for i in valid_ovds: |
| 112 | # overhead type | 112 | # overhead type |
| 113 | pms.cvs_it(csvf, i[1]) | 113 | pms.cvs_it(csvf, i[1]) |
| 114 | # data (atm just overhead, not length) | 114 | # data (valid_ovds already have only overheads, not length) |
| 115 | vector = i[0][:,0] | 115 | # vector = i[0][:,0] |
| 116 | vector = i[0] | ||
| 116 | if vector != []: | 117 | if vector != []: |
| 117 | pms.cvs_it(csvf, "%5.5f" % np.max(vector)) | 118 | pms.cvs_it(csvf, "%5.5f" % np.max(vector)) |
| 118 | pms.cvs_it(csvf, "%5.5f" % np.average(vector)) | 119 | pms.cvs_it(csvf, "%5.5f" % np.average(vector)) |
diff --git a/pm_data_analysis/statanalyzer.py b/pm_data_analysis/statanalyzer.py new file mode 100644 index 0000000..6e096c4 --- /dev/null +++ b/pm_data_analysis/statanalyzer.py | |||
| @@ -0,0 +1,56 @@ | |||
| 1 | #!/usr/bin/env python | ||
| 2 | |||
| 3 | import numpy as np | ||
| 4 | from scipy import stats | ||
| 5 | |||
| 6 | class InterQuartileRange: | ||
| 7 | def __init__(self, low, high, extend = False): | ||
| 8 | self.low = low | ||
| 9 | self.high = high | ||
| 10 | # extend is 1.5 extension of IQR | ||
| 11 | self.extend = extend | ||
| 12 | |||
| 13 | def remOutliers(self, vector): | ||
| 14 | svect = np.sort(vector) | ||
| 15 | q1 = stats.scoreatpercentile(svect, self.low) | ||
| 16 | q3 = stats.scoreatpercentile(svect, self.high) | ||
| 17 | |||
| 18 | # match the values \in svect which are closer to q[1|3] | ||
| 19 | # (q1, q3) | ||
| 20 | q1_pos = -1 | ||
| 21 | q3_pos = -1 | ||
| 22 | cur_pos = 0 | ||
| 23 | for i in svect: | ||
| 24 | if q1_pos != -1 and q3_pos != -1: | ||
| 25 | break | ||
| 26 | if q1_pos == -1 and i > q1: | ||
| 27 | q1_pos = cur_pos | ||
| 28 | if q3_pos == -1 and q3 < i: | ||
| 29 | q3_pos = cur_pos | ||
| 30 | |||
| 31 | cur_pos += 1 | ||
| 32 | |||
| 33 | if self.extend == True: | ||
| 34 | # 1.5 IQR outliers elimination | ||
| 35 | eiqr = (svect[q3_pos] - svect[q1_pos]) * 1.5 | ||
| 36 | eq1 = svect[q1_pos] - eiqr | ||
| 37 | if eq1 < svect[0]: | ||
| 38 | eq1 = svect[0] | ||
| 39 | eq3 = svect[q3_pos] + eiqr | ||
| 40 | if eq3 > svect[len(svect) - 1]: | ||
| 41 | eq3 = svect[len(svect) - 1] | ||
| 42 | # match the values \in svect which are closer to eq[1|3] | ||
| 43 | q1_pos = -1 | ||
| 44 | q3_pos = -1 | ||
| 45 | cur_pos = 0 | ||
| 46 | for i in svect: | ||
| 47 | if q1_pos != -1 and q3_pos != -1: | ||
| 48 | break | ||
| 49 | if q1_pos == -1 and i > eq1: | ||
| 50 | q1_pos = cur_pos | ||
| 51 | if q3_pos == -1 and eq3 < i: | ||
| 52 | q3_pos = cur_pos | ||
| 53 | |||
| 54 | cur_pos += 1 | ||
| 55 | |||
| 56 | return svect[q1_pos : q3_pos] | ||
