diff options
Diffstat (limited to 'litmus/gpu_affinity.c')
-rw-r--r-- | litmus/gpu_affinity.c | 67 |
1 files changed, 64 insertions, 3 deletions
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c index 2cdf18bc7dd6..896f3248b8a2 100644 --- a/litmus/gpu_affinity.c +++ b/litmus/gpu_affinity.c | |||
@@ -11,8 +11,10 @@ | |||
11 | 11 | ||
12 | // reason for skew: high outliers are less | 12 | // reason for skew: high outliers are less |
13 | // frequent and way out of bounds | 13 | // frequent and way out of bounds |
14 | #define HI_THRESHOLD 2 | 14 | //#define HI_THRESHOLD 2 |
15 | #define LO_THRESHOLD 4 | 15 | //#define LO_THRESHOLD 4 |
16 | |||
17 | #define NUM_STDEV 2 | ||
16 | 18 | ||
17 | #define MIN(a, b) ((a < b) ? a : b) | 19 | #define MIN(a, b) ((a < b) ? a : b) |
18 | 20 | ||
@@ -33,6 +35,44 @@ static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) | |||
33 | return relative_err; | 35 | return relative_err; |
34 | } | 36 | } |
35 | 37 | ||
38 | lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count) | ||
39 | { | ||
40 | /* brute force: takes about as much time as incremental running methods when | ||
41 | * count < 50 (on Bonham). Brute force also less prone to overflow. | ||
42 | */ | ||
43 | lt_t sqdeviations = 0; | ||
44 | uint16_t i; | ||
45 | for(i = 0; i < count; ++i) | ||
46 | { | ||
47 | lt_t temp = (int64_t)nums[i] - (int64_t)avg; | ||
48 | sqdeviations += temp * temp; | ||
49 | } | ||
50 | return sqdeviations/count; | ||
51 | } | ||
52 | |||
53 | lt_t isqrt(lt_t n) | ||
54 | { | ||
55 | /* integer square root using babylonian method | ||
56 | * (algo taken from wikipedia */ | ||
57 | lt_t res = 0; | ||
58 | lt_t bit = ((lt_t)1) << (sizeof(n)*8-2); | ||
59 | while (bit > n) { | ||
60 | bit >>= 2; | ||
61 | } | ||
62 | |||
63 | while (bit != 0) { | ||
64 | if (n >= res + bit) { | ||
65 | n -= res + bit; | ||
66 | res = (res >> 1) + bit; | ||
67 | } | ||
68 | else { | ||
69 | res >>= 1; | ||
70 | } | ||
71 | bit >>= 2; | ||
72 | } | ||
73 | return res; | ||
74 | } | ||
75 | |||
36 | void update_gpu_estimate(struct task_struct *t, lt_t observed) | 76 | void update_gpu_estimate(struct task_struct *t, lt_t observed) |
37 | { | 77 | { |
38 | //feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); | 78 | //feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); |
@@ -65,8 +105,28 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed) | |||
65 | observed); | 105 | observed); |
66 | return; | 106 | return; |
67 | } | 107 | } |
68 | } | ||
69 | #endif | 108 | #endif |
109 | // filter values outside NUM_STDEVx the standard deviation, | ||
110 | // but only filter if enough samples have been taken. | ||
111 | if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) { | ||
112 | lt_t lower, upper; | ||
113 | |||
114 | lt_t range = est->std*NUM_STDEV; | ||
115 | lower = est->avg - MIN(range, est->avg); // no underflow. | ||
116 | |||
117 | if (unlikely(observed < lower)) { | ||
118 | TRACE_TASK(t, "Observation is too small: %llu\n", observed); | ||
119 | return; | ||
120 | } | ||
121 | |||
122 | upper = est->avg + range; | ||
123 | if (unlikely(observed > upper)) { | ||
124 | TRACE_TASK(t, "Observation is too large: %llu\n", observed); | ||
125 | return; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | |||
70 | 130 | ||
71 | if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) { | 131 | if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) { |
72 | ++est->count; | 132 | ++est->count; |
@@ -84,6 +144,7 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed) | |||
84 | est->history[est->idx] = observed; | 144 | est->history[est->idx] = observed; |
85 | est->sum += observed; | 145 | est->sum += observed; |
86 | est->avg = est->sum/est->count; | 146 | est->avg = est->sum/est->count; |
147 | est->std = isqrt(varience(est->history, est->avg, est->count)); | ||
87 | est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE; | 148 | est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE; |
88 | 149 | ||
89 | 150 | ||