1 files changed, 64 insertions, 3 deletions
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
index 2cdf18bc7dd6..896f3248b8a2 100644
--- a/litmus/gpu_affinity.c
+++ b/litmus/gpu_affinity.c
@@ -11,8 +11,10 @@
 // reason for skew: high outliers are less
 // frequent and way out of bounds
-#define HI_THRESHOLD 2
+//#define HI_THRESHOLD 2
-#define LO_THRESHOLD 4
+//#define LO_THRESHOLD 4
+#define NUM_STDEV 2
 #define MIN(a, b) ((a < b) ? a : b)
@@ -33,6 +35,44 @@ static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
        return relative_err;
 }
+lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count)
+{
+        /* brute force: takes about as much time as incremental running methods when
+         * count < 50 (on Bonham). Brute force also less prone to overflow.
+         */
+        lt_t sqdeviations = 0;
+        uint16_t i;
+        for(i = 0; i < count; ++i)
+        {
+                lt_t temp = (int64_t)nums[i] - (int64_t)avg;
+                sqdeviations += temp * temp;
+        }
+        return sqdeviations/count;
+}
+lt_t isqrt(lt_t n)
+{
+        /* integer square root using babylonian method
+         * (algo taken from wikipedia */
+        lt_t res = 0;
+        lt_t bit = ((lt_t)1) << (sizeof(n)*8-2);
+        while (bit > n) {
+                bit >>= 2;
+        }
+        while (bit != 0) {
+                if (n >= res + bit) {
+                        n -= res + bit;
+                        res = (res >> 1) + bit;
+                }
+                else {
+                        res >>= 1;
+                }
+                bit >>= 2;
+        }
+        return res;
+}
 void update_gpu_estimate(struct task_struct *t, lt_t observed)
 {
        //feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
@@ -65,8 +105,28 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
                                                        observed);
                        return;
                }
-        }
 #endif
+        // filter values outside NUM_STDEVx the standard deviation,
+        // but only filter if enough samples have been taken.
+        if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
+                lt_t lower, upper;
+                lt_t range = est->std*NUM_STDEV;
+                lower = est->avg - MIN(range, est->avg); // no underflow.
+                if (unlikely(observed < lower)) {
+                        TRACE_TASK(t, "Observation is too small: %llu\n", observed);
+                        return;
+                }
+                upper = est->avg + range;
+                if (unlikely(observed > upper)) {
+                        TRACE_TASK(t, "Observation is too large: %llu\n", observed);
+                        return;
+                }
+        }
        if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) {
                ++est->count;
@@ -84,6 +144,7 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
        est->history[est->idx] = observed;
        est->sum += observed;
        est->avg = est->sum/est->count;
+        est->std = isqrt(varience(est->history, est->avg, est->count));
        est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE;

diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c index 2cdf18bc7dd6..896f3248b8a2 100644 --- a/litmus/gpu_affinity.c +++ b/litmus/gpu_affinity.c
@@ -11,8 +11,10 @@
11		11
12	// reason for skew: high outliers are less	12	// reason for skew: high outliers are less
13	// frequent and way out of bounds	13	// frequent and way out of bounds
14	#define HI_THRESHOLD 2	14	//#define HI_THRESHOLD 2
15	#define LO_THRESHOLD 4	15	//#define LO_THRESHOLD 4
		16
		17	#define NUM_STDEV 2
16		18
17	#define MIN(a, b) ((a < b) ? a : b)	19	#define MIN(a, b) ((a < b) ? a : b)
18		20
@@ -33,6 +35,44 @@ static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
33	return relative_err;	35	return relative_err;
34	}	36	}
35		37
		38	lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count)
		39	{
		40	/* brute force: takes about as much time as incremental running methods when
		41	* count < 50 (on Bonham). Brute force also less prone to overflow.
		42	*/
		43	lt_t sqdeviations = 0;
		44	uint16_t i;
		45	for(i = 0; i < count; ++i)
		46	{
		47	lt_t temp = (int64_t)nums[i] - (int64_t)avg;
		48	sqdeviations += temp * temp;
		49	}
		50	return sqdeviations/count;
		51	}
		52
		53	lt_t isqrt(lt_t n)
		54	{
		55	/* integer square root using babylonian method
		56	* (algo taken from wikipedia */
		57	lt_t res = 0;
		58	lt_t bit = ((lt_t)1) << (sizeof(n)*8-2);
		59	while (bit > n) {
		60	bit >>= 2;
		61	}
		62
		63	while (bit != 0) {
		64	if (n >= res + bit) {
		65	n -= res + bit;
		66	res = (res >> 1) + bit;
		67	}
		68	else {
		69	res >>= 1;
		70	}
		71	bit >>= 2;
		72	}
		73	return res;
		74	}
		75
36	void update_gpu_estimate(struct task_struct *t, lt_t observed)	76	void update_gpu_estimate(struct task_struct *t, lt_t observed)
37	{	77	{
38	//feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);	78	//feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
@@ -65,8 +105,28 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
65	observed);	105	observed);
66	return;	106	return;
67	}	107	}
68	}
69	#endif	108	#endif
		109	// filter values outside NUM_STDEVx the standard deviation,
		110	// but only filter if enough samples have been taken.
		111	if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
		112	lt_t lower, upper;
		113
		114	lt_t range = est->std*NUM_STDEV;
		115	lower = est->avg - MIN(range, est->avg); // no underflow.
		116
		117	if (unlikely(observed < lower)) {
		118	TRACE_TASK(t, "Observation is too small: %llu\n", observed);
		119	return;
		120	}
		121
		122	upper = est->avg + range;
		123	if (unlikely(observed > upper)) {
		124	TRACE_TASK(t, "Observation is too large: %llu\n", observed);
		125	return;
		126	}
		127	}
		128
		129
70		130
71	if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) {	131	if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) {
72	++est->count;	132	++est->count;
@@ -84,6 +144,7 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
84	est->history[est->idx] = observed;	144	est->history[est->idx] = observed;
85	est->sum += observed;	145	est->sum += observed;
86	est->avg = est->sum/est->count;	146	est->avg = est->sum/est->count;
		147	est->std = isqrt(varience(est->history, est->avg, est->count));
87	est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE;	148	est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE;
88		149
89		150