diff options
Diffstat (limited to 'litmus/gpu_affinity.c')
-rw-r--r-- | litmus/gpu_affinity.c | 231 |
1 files changed, 231 insertions, 0 deletions
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c new file mode 100644 index 000000000000..7d73105b4181 --- /dev/null +++ b/litmus/gpu_affinity.c | |||
@@ -0,0 +1,231 @@ | |||
1 | |||
2 | #ifdef CONFIG_LITMUS_NVIDIA | ||
3 | |||
4 | #include <linux/sched.h> | ||
5 | #include <litmus/litmus.h> | ||
6 | #include <litmus/gpu_affinity.h> | ||
7 | |||
8 | #include <litmus/sched_trace.h> | ||
9 | |||
10 | #define OBSERVATION_CAP ((lt_t)(2e9)) | ||
11 | |||
12 | // reason for skew: high outliers are less | ||
13 | // frequent and way out of bounds | ||
14 | //#define HI_THRESHOLD 2 | ||
15 | //#define LO_THRESHOLD 4 | ||
16 | |||
17 | #define NUM_STDEV_NUM 1 | ||
18 | #define NUM_STDEV_DENOM 2 | ||
19 | |||
20 | #define MIN(a, b) ((a < b) ? a : b) | ||
21 | |||
22 | static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) | ||
23 | { | ||
24 | fp_t relative_err; | ||
25 | fp_t err, new; | ||
26 | fp_t actual = _integer_to_fp(observed); | ||
27 | |||
28 | err = _sub(actual, fb->est); | ||
29 | new = _add(_mul(a, err), _mul(b, fb->accum_err)); | ||
30 | |||
31 | relative_err = _div(err, actual); | ||
32 | |||
33 | fb->est = new; | ||
34 | fb->accum_err = _add(fb->accum_err, err); | ||
35 | |||
36 | return relative_err; | ||
37 | } | ||
38 | |||
39 | lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count) | ||
40 | { | ||
41 | /* brute force: takes about as much time as incremental running methods when | ||
42 | * count < 50 (on Bonham). Brute force also less prone to overflow. | ||
43 | */ | ||
44 | lt_t sqdeviations = 0; | ||
45 | uint16_t i; | ||
46 | for(i = 0; i < count; ++i) | ||
47 | { | ||
48 | lt_t temp = (int64_t)nums[i] - (int64_t)avg; | ||
49 | sqdeviations += temp * temp; | ||
50 | } | ||
51 | return sqdeviations/count; | ||
52 | } | ||
53 | |||
54 | lt_t isqrt(lt_t n) | ||
55 | { | ||
56 | /* integer square root using babylonian method | ||
57 | * (algo taken from wikipedia */ | ||
58 | lt_t res = 0; | ||
59 | lt_t bit = ((lt_t)1) << (sizeof(n)*8-2); | ||
60 | while (bit > n) { | ||
61 | bit >>= 2; | ||
62 | } | ||
63 | |||
64 | while (bit != 0) { | ||
65 | if (n >= res + bit) { | ||
66 | n -= res + bit; | ||
67 | res = (res >> 1) + bit; | ||
68 | } | ||
69 | else { | ||
70 | res >>= 1; | ||
71 | } | ||
72 | bit >>= 2; | ||
73 | } | ||
74 | return res; | ||
75 | } | ||
76 | |||
77 | void update_gpu_estimate(struct task_struct *t, lt_t observed) | ||
78 | { | ||
79 | //feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); | ||
80 | avg_est_t *est; | ||
81 | struct migration_info mig_info; | ||
82 | |||
83 | BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST); | ||
84 | |||
85 | est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); | ||
86 | |||
87 | if (unlikely(observed > OBSERVATION_CAP)) { | ||
88 | TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n", | ||
89 | observed, | ||
90 | OBSERVATION_CAP); | ||
91 | return; | ||
92 | } | ||
93 | |||
94 | #if 0 | ||
95 | // filter out values that are HI_THRESHOLDx or (1/LO_THRESHOLD)x out | ||
96 | // of range of the average, but only filter if enough samples | ||
97 | // have been taken. | ||
98 | if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) { | ||
99 | if (unlikely(observed < est->avg/LO_THRESHOLD)) { | ||
100 | TRACE_TASK(t, "Observation is too small: %llu\n", | ||
101 | observed); | ||
102 | return; | ||
103 | } | ||
104 | else if (unlikely(observed > est->avg*HI_THRESHOLD)) { | ||
105 | TRACE_TASK(t, "Observation is too large: %llu\n", | ||
106 | observed); | ||
107 | return; | ||
108 | } | ||
109 | #endif | ||
110 | // filter values outside NUM_STDEVx the standard deviation, | ||
111 | // but only filter if enough samples have been taken. | ||
112 | if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) { | ||
113 | lt_t lower, upper; | ||
114 | |||
115 | lt_t range = (est->std*NUM_STDEV_NUM)/NUM_STDEV_DENOM; | ||
116 | lower = est->avg - MIN(range, est->avg); // no underflow. | ||
117 | |||
118 | if (unlikely(observed < lower)) { | ||
119 | TRACE_TASK(t, "Observation is too small: %llu\n", observed); | ||
120 | return; | ||
121 | } | ||
122 | |||
123 | upper = est->avg + range; | ||
124 | if (unlikely(observed > upper)) { | ||
125 | TRACE_TASK(t, "Observation is too large: %llu\n", observed); | ||
126 | return; | ||
127 | } | ||
128 | } | ||
129 | |||
130 | |||
131 | |||
132 | if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) { | ||
133 | ++est->count; | ||
134 | } | ||
135 | else { | ||
136 | est->sum -= est->history[est->idx]; | ||
137 | } | ||
138 | |||
139 | mig_info.observed = observed; | ||
140 | mig_info.estimated = est->avg; | ||
141 | mig_info.distance = tsk_rt(t)->gpu_migration; | ||
142 | sched_trace_migration(t, &mig_info); | ||
143 | |||
144 | |||
145 | est->history[est->idx] = observed; | ||
146 | est->sum += observed; | ||
147 | est->avg = est->sum/est->count; | ||
148 | est->std = isqrt(varience(est->history, est->avg, est->count)); | ||
149 | est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE; | ||
150 | |||
151 | |||
152 | #if 0 | ||
153 | if(unlikely(fb->est.val == 0)) { | ||
154 | // kludge-- cap observed values to prevent whacky estimations. | ||
155 | // whacky stuff happens during the first few jobs. | ||
156 | if(unlikely(observed > OBSERVATION_CAP)) { | ||
157 | TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n", | ||
158 | observed, OBSERVATION_CAP); | ||
159 | observed = OBSERVATION_CAP; | ||
160 | } | ||
161 | |||
162 | // take the first observation as our estimate | ||
163 | // (initial value of 0 was bogus anyhow) | ||
164 | fb->est = _integer_to_fp(observed); | ||
165 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. | ||
166 | } | ||
167 | else { | ||
168 | fp_t rel_err = update_estimate(fb, | ||
169 | tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration], | ||
170 | tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration], | ||
171 | observed); | ||
172 | |||
173 | if(unlikely(_fp_to_integer(fb->est) <= 0)) { | ||
174 | TRACE_TASK(t, "Invalid estimate. Patching.\n"); | ||
175 | fb->est = _integer_to_fp(observed); | ||
176 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. | ||
177 | } | ||
178 | else { | ||
179 | struct migration_info mig_info; | ||
180 | |||
181 | sched_trace_prediction_err(t, | ||
182 | &(tsk_rt(t)->gpu_migration), | ||
183 | &rel_err); | ||
184 | |||
185 | mig_info.observed = observed; | ||
186 | mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration); | ||
187 | mig_info.distance = tsk_rt(t)->gpu_migration; | ||
188 | |||
189 | sched_trace_migration(t, &mig_info); | ||
190 | } | ||
191 | } | ||
192 | #endif | ||
193 | |||
194 | TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n", | ||
195 | tsk_rt(t)->gpu_migration, | ||
196 | observed, | ||
197 | est->avg); | ||
198 | } | ||
199 | |||
200 | gpu_migration_dist_t gpu_migration_distance(int a, int b) | ||
201 | { | ||
202 | // GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs | ||
203 | int i; | ||
204 | int dist; | ||
205 | |||
206 | if(likely(a >= 0 && b >= 0)) { | ||
207 | for(i = 0; i <= MIG_FAR; ++i) { | ||
208 | if(a>>i == b>>i) { | ||
209 | dist = i; | ||
210 | goto out; | ||
211 | } | ||
212 | } | ||
213 | dist = MIG_NONE; // hopefully never reached. | ||
214 | TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b); | ||
215 | } | ||
216 | else { | ||
217 | dist = MIG_NONE; | ||
218 | } | ||
219 | |||
220 | out: | ||
221 | TRACE_CUR("Distance %d -> %d is %d\n", | ||
222 | a, b, dist); | ||
223 | |||
224 | return dist; | ||
225 | } | ||
226 | |||
227 | |||
228 | |||
229 | |||
230 | #endif | ||
231 | |||