1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
|
import scipy.stats as s
import numpy as np
from util import select
def iqr(vect):
"return inter-quartile range of a vector"
q25 = s.scoreatpercentile(vect, 25)
q75 = s.scoreatpercentile(vect, 75)
return (q75 - q25, q25, q75)
def cutoff_max(vect, percentile=99):
return s.scoreatpercentile(vect, percentile)
def iqr_cutoff(vect, extend):
(spread, low, high) = iqr(vect)
min_val = low - extend * spread
max_val = high + extend * spread
return min_val, max_val
def iqr_is_not_outlier(table, col=1, extend=1.5):
"create a filter function that flags outliers"
(spread, low, high) = iqr(table[:,col])
min_val = low - extend * spread
max_val = high + extend * spread
return lambda row: min_val <= row[col] <= max_val
def iqr_remove_outliers(table, col=1, extend=1.5):
"""Return a copy that only includes rows that
fall within the IQR-based window.
"""
valid = iqr_is_not_outlier(table, col, extend)
return select(valid, table)
|