diff options
author | Andrea Bastoni <bastoni@sprg.uniroma2.it> | 2010-04-09 00:30:04 -0400 |
---|---|---|
committer | Andrea Bastoni <bastoni@sprg.uniroma2.it> | 2010-04-09 00:30:04 -0400 |
commit | cf47870875e03488442ed39d96acfddfa2993f02 (patch) | |
tree | 02c1957b94dafc20cd19c3d2ed1d3c82a3111c43 /stats.py | |
parent | 36b6614fa9ea3c2656aedb385c5c4154917aa618 (diff) | |
parent | 5a908690888395010b8a6615bc6ee3185920f2dc (diff) |
Merge branch 'master' of cvs.cs.unc.edu:/cvs/proj/litmus/repo/simple-gnuplot-wrapper
Conflicts:
gnuplot.py
- Merge to add my "non clean" (ehm.. crappy) support to errorbars in plot_pm.py
Diffstat (limited to 'stats.py')
-rw-r--r-- | stats.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/stats.py b/stats.py new file mode 100644 index 0000000..f6c4401 --- /dev/null +++ b/stats.py | |||
@@ -0,0 +1,29 @@ | |||
1 | import scipy.stats as s | ||
2 | import numpy as np | ||
3 | |||
4 | from util import select | ||
5 | |||
6 | |||
7 | def iqr(vect): | ||
8 | "return inter-quartile range of a vector" | ||
9 | q25 = s.scoreatpercentile(vect, 25) | ||
10 | q75 = s.scoreatpercentile(vect, 75) | ||
11 | return (q75 - q25, q25, q75) | ||
12 | |||
13 | def cutoff_max(vect, percentile=99): | ||
14 | return s.scoreatpercentile(vect, percentile) | ||
15 | |||
16 | def iqr_is_not_outlier(table, col=1, extend=1.5): | ||
17 | "create a filter function that flags outliers" | ||
18 | (spread, low, high) = iqr(table[:,col]) | ||
19 | min_val = low - extend * spread | ||
20 | max_val = high + extend * spread | ||
21 | return lambda row: min_val <= row[col] <= max_val | ||
22 | |||
23 | def iqr_remove_outliers(table, col=1, extend=1.5): | ||
24 | """Return a copy that only includes rows that | ||
25 | fall within the IQR-based window. | ||
26 | """ | ||
27 | valid = iqr_is_not_outlier(table, col, extend) | ||
28 | return select(valid, table) | ||
29 | |||