diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-09-20 18:57:20 -0400 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-09-20 18:57:20 -0400 |
commit | 150c1d5ed8541f3a2bfcde3d5f3174b9af4ab4fc (patch) | |
tree | 7970735e676afc237e40e0971119001429bf56ae | |
parent | 33cb64c787070d6b60a02ea40064d717d3b9dc07 (diff) |
Generalized GPU cost predictors + EWMA. (untested)wip-gpu-rtas12
-rw-r--r-- | include/litmus/gpu_affinity.h | 26 | ||||
-rw-r--r-- | include/litmus/rt_param.h | 77 | ||||
-rw-r--r-- | include/litmus/unistd_32.h | 5 | ||||
-rw-r--r-- | include/litmus/unistd_64.h | 7 | ||||
-rw-r--r-- | litmus/gpu_affinity.c | 238 | ||||
-rw-r--r-- | litmus/litmus.c | 194 |
6 files changed, 381 insertions, 166 deletions
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h index d64a15cbf2a5..a2abc4c1fd7c 100644 --- a/include/litmus/gpu_affinity.h +++ b/include/litmus/gpu_affinity.h | |||
@@ -31,18 +31,20 @@ static inline lt_t get_gpu_time(struct task_struct* t) | |||
31 | 31 | ||
32 | static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) | 32 | static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) |
33 | { | 33 | { |
34 | // int i; | 34 | lt_t val = 0; |
35 | // fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); | 35 | |
36 | // lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates... | 36 | switch(t->rt_param.prediction_mode) { |
37 | lt_t val = t->rt_param.gpu_migration_est[dist].avg; | 37 | case SIMPLE_AVG: |
38 | 38 | case CC_BRUTE_AVG: | |
39 | // WARN_ON(temp < 0); | 39 | case CC_MR_AVG: |
40 | 40 | val = t->rt_param.gpu_avg_est[dist].center_line; | |
41 | // lower-bound a distant migration to be at least equal to the level | 41 | break; |
42 | // below it. | 42 | case SIMPLE_EWMA: |
43 | // for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) { | 43 | case CC_BRUTE_EWMA: |
44 | // val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); | 44 | case CC_MR_EWMA: |
45 | // } | 45 | val = t->rt_param.gpu_ewma_est[dist].center_line; |
46 | break; | ||
47 | } | ||
46 | 48 | ||
47 | return ((val > 0) ? val : dist+1); | 49 | return ((val > 0) ? val : dist+1); |
48 | } | 50 | } |
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 44f85a366574..6da36f50240d 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h | |||
@@ -41,6 +41,32 @@ typedef enum { | |||
41 | PRECISE_SIGNALS, /* budget signals are triggered with hrtimers */ | 41 | PRECISE_SIGNALS, /* budget signals are triggered with hrtimers */ |
42 | } budget_signal_policy_t; | 42 | } budget_signal_policy_t; |
43 | 43 | ||
44 | typedef enum | ||
45 | { | ||
46 | /* simple - all observations counted */ | ||
47 | SIMPLE_AVG = 1, | ||
48 | SIMPLE_EWMA, | ||
49 | |||
50 | /* control chart, brute - std deviation fully calculated */ | ||
51 | CC_BRUTE_AVG, | ||
52 | CC_BRUTE_EWMA, | ||
53 | |||
54 | /* control chart, moving range - std deviation est. from moving range */ | ||
55 | CC_MR_AVG, | ||
56 | CC_MR_EWMA, | ||
57 | } predictor_type; | ||
58 | |||
59 | typedef struct predictor | ||
60 | { | ||
61 | predictor_type type; | ||
62 | uint16_t window_size; | ||
63 | |||
64 | uint8_t sigmas; | ||
65 | uint16_t alpha_num; | ||
66 | uint16_t alpha_denom; | ||
67 | } predictor_t; | ||
68 | |||
69 | |||
44 | /* We use the common priority interpretation "lower index == higher priority", | 70 | /* We use the common priority interpretation "lower index == higher priority", |
45 | * which is commonly used in fixed-priority schedulability analysis papers. | 71 | * which is commonly used in fixed-priority schedulability analysis papers. |
46 | * So, a numerically lower priority value implies higher scheduling priority, | 72 | * So, a numerically lower priority value implies higher scheduling priority, |
@@ -180,23 +206,48 @@ typedef enum gpu_migration_dist | |||
180 | MIG_LAST = MIG_NONE | 206 | MIG_LAST = MIG_NONE |
181 | } gpu_migration_dist_t; | 207 | } gpu_migration_dist_t; |
182 | 208 | ||
183 | typedef struct feedback_est{ | ||
184 | fp_t est; | ||
185 | fp_t accum_err; | ||
186 | } feedback_est_t; | ||
187 | |||
188 | |||
189 | #define AVG_EST_WINDOW_SIZE 20 | 209 | #define AVG_EST_WINDOW_SIZE 20 |
190 | 210 | ||
191 | typedef struct avg_est{ | 211 | typedef struct obs_history |
192 | lt_t history[AVG_EST_WINDOW_SIZE]; | 212 | { |
213 | uint16_t window_size; | ||
193 | uint16_t count; | 214 | uint16_t count; |
194 | uint16_t idx; | 215 | uint16_t idx; |
216 | uint16_t ridx; | ||
217 | |||
218 | lt_t last_observed; | ||
219 | lt_t *observations; | ||
220 | lt_t *range; | ||
221 | } obs_history_t; | ||
222 | |||
223 | typedef struct avg_est { | ||
224 | uint8_t sigmas; | ||
225 | |||
226 | obs_history_t history; | ||
227 | |||
195 | lt_t sum; | 228 | lt_t sum; |
229 | lt_t rsum; | ||
230 | |||
196 | lt_t std; | 231 | lt_t std; |
197 | lt_t avg; | 232 | lt_t center_line; |
198 | } avg_est_t; | 233 | } avg_est_t; |
199 | 234 | ||
235 | |||
236 | typedef struct ewma_est { | ||
237 | uint8_t sigmas; | ||
238 | uint16_t alpha_num; | ||
239 | uint16_t alpha_denom; | ||
240 | |||
241 | obs_history_t history; | ||
242 | |||
243 | lt_t rsum; | ||
244 | |||
245 | lt_t std; | ||
246 | lt_t center_line; | ||
247 | } ewma_est_t; | ||
248 | |||
249 | |||
250 | |||
200 | /* RT task parameters for scheduling extensions | 251 | /* RT task parameters for scheduling extensions |
201 | * These parameters are inherited during clone and therefore must | 252 | * These parameters are inherited during clone and therefore must |
202 | * be explicitly set up before the task set is launched. | 253 | * be explicitly set up before the task set is launched. |
@@ -243,7 +294,13 @@ struct rt_param { | |||
243 | long unsigned int held_gpus; // bitmap of held GPUs. | 294 | long unsigned int held_gpus; // bitmap of held GPUs. |
244 | 295 | ||
245 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 296 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
246 | avg_est_t gpu_migration_est[MIG_LAST+1]; | 297 | // avg_est_t gpu_migration_est[MIG_LAST+1]; |
298 | predictor_type prediction_mode; | ||
299 | union | ||
300 | { | ||
301 | avg_est_t gpu_avg_est[MIG_LAST+1]; | ||
302 | ewma_est_t gpu_ewma_est[MIG_LAST+1]; | ||
303 | }; | ||
247 | 304 | ||
248 | gpu_migration_dist_t gpu_migration; | 305 | gpu_migration_dist_t gpu_migration; |
249 | int last_gpu; | 306 | int last_gpu; |
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h index c86b743408ed..964f02082275 100644 --- a/include/litmus/unistd_32.h +++ b/include/litmus/unistd_32.h | |||
@@ -21,6 +21,7 @@ | |||
21 | #define __NR_litmus_dgl_unlock __LSC(13) | 21 | #define __NR_litmus_dgl_unlock __LSC(13) |
22 | #define __NR_register_nv_device __LSC(14) | 22 | #define __NR_register_nv_device __LSC(14) |
23 | 23 | ||
24 | #define __NR_set_aux_tasks _LSC(15) | 24 | #define __NR_config_gpu_affinity_predictor __LSC(15) |
25 | #define __NR_set_aux_tasks _LSC(16) | ||
25 | 26 | ||
26 | #define NR_litmus_syscalls 16 | 27 | #define NR_litmus_syscalls 17 |
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h index 3825bc129dbd..70be98d4f1e7 100644 --- a/include/litmus/unistd_64.h +++ b/include/litmus/unistd_64.h | |||
@@ -35,8 +35,9 @@ __SYSCALL(__NR_litmus_dgl_lock, sys_litmus_dgl_lock) | |||
35 | __SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock) | 35 | __SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock) |
36 | #define __NR_register_nv_device __LSC(14) | 36 | #define __NR_register_nv_device __LSC(14) |
37 | __SYSCALL(__NR_register_nv_device, sys_register_nv_device) | 37 | __SYSCALL(__NR_register_nv_device, sys_register_nv_device) |
38 | 38 | #define __NR_config_gpu_affinity_predictor __LSC(15) | |
39 | #define __NR_set_aux_tasks __LSC(15) | 39 | __SYSCALL(__NR_config_gpu_affinity_predictor, sys_config_gpu_affinity_predictor) |
40 | #define __NR_set_aux_tasks __LSC(16) | ||
40 | __SYSCALL(__NR_set_aux_tasks, sys_set_aux_tasks) | 41 | __SYSCALL(__NR_set_aux_tasks, sys_set_aux_tasks) |
41 | 42 | ||
42 | #define NR_litmus_syscalls 16 | 43 | #define NR_litmus_syscalls 17 |
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c index 7d73105b4181..7a2b5005be45 100644 --- a/litmus/gpu_affinity.c +++ b/litmus/gpu_affinity.c | |||
@@ -9,33 +9,10 @@ | |||
9 | 9 | ||
10 | #define OBSERVATION_CAP ((lt_t)(2e9)) | 10 | #define OBSERVATION_CAP ((lt_t)(2e9)) |
11 | 11 | ||
12 | // reason for skew: high outliers are less | 12 | #define MIN_OBSERVATIONS 10 |
13 | // frequent and way out of bounds | ||
14 | //#define HI_THRESHOLD 2 | ||
15 | //#define LO_THRESHOLD 4 | ||
16 | |||
17 | #define NUM_STDEV_NUM 1 | ||
18 | #define NUM_STDEV_DENOM 2 | ||
19 | 13 | ||
20 | #define MIN(a, b) ((a < b) ? a : b) | 14 | #define MIN(a, b) ((a < b) ? a : b) |
21 | 15 | ||
22 | static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) | ||
23 | { | ||
24 | fp_t relative_err; | ||
25 | fp_t err, new; | ||
26 | fp_t actual = _integer_to_fp(observed); | ||
27 | |||
28 | err = _sub(actual, fb->est); | ||
29 | new = _add(_mul(a, err), _mul(b, fb->accum_err)); | ||
30 | |||
31 | relative_err = _div(err, actual); | ||
32 | |||
33 | fb->est = new; | ||
34 | fb->accum_err = _add(fb->accum_err, err); | ||
35 | |||
36 | return relative_err; | ||
37 | } | ||
38 | |||
39 | lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count) | 16 | lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count) |
40 | { | 17 | { |
41 | /* brute force: takes about as much time as incremental running methods when | 18 | /* brute force: takes about as much time as incremental running methods when |
@@ -74,127 +51,154 @@ lt_t isqrt(lt_t n) | |||
74 | return res; | 51 | return res; |
75 | } | 52 | } |
76 | 53 | ||
77 | void update_gpu_estimate(struct task_struct *t, lt_t observed) | 54 | static lt_t avg_method(struct task_struct *t, lt_t observed) |
78 | { | 55 | { |
79 | //feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); | 56 | avg_est_t *est = &(tsk_rt(t)->gpu_avg_est[tsk_rt(t)->gpu_migration]); |
80 | avg_est_t *est; | 57 | |
81 | struct migration_info mig_info; | 58 | /* cut-off */ |
82 | 59 | if (tsk_rt(t)->prediction_mode != SIMPLE_AVG && | |
83 | BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST); | 60 | est->history.count >= MIN_OBSERVATIONS) { |
61 | lt_t upper = est->center_line + est->std * est->sigmas; | ||
62 | lt_t lower = est->center_line - est->std * est->sigmas; | ||
63 | if (unlikely(observed < lower || observed > upper)) { | ||
64 | TRACE_TASK(t, "%llu out of range [%llu, %llu]\n", | ||
65 | observed, lower, upper); | ||
66 | return 0; | ||
67 | } | ||
68 | } | ||
84 | 69 | ||
85 | est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); | 70 | if (unlikely(est->history.count < est->history.window_size)) { |
71 | ++est->history.count; | ||
72 | } | ||
73 | else { | ||
74 | est->sum -= est->history.observations[est->history.idx]; | ||
86 | 75 | ||
87 | if (unlikely(observed > OBSERVATION_CAP)) { | 76 | if (tsk_rt(t)->prediction_mode == CC_MR_AVG) { |
88 | TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n", | 77 | est->rsum -= est->history.range[est->history.ridx]; |
89 | observed, | 78 | } |
90 | OBSERVATION_CAP); | ||
91 | return; | ||
92 | } | 79 | } |
93 | 80 | ||
94 | #if 0 | 81 | est->history.observations[est->history.idx] = observed; |
95 | // filter out values that are HI_THRESHOLDx or (1/LO_THRESHOLD)x out | 82 | est->sum += observed; |
96 | // of range of the average, but only filter if enough samples | 83 | est->center_line = est->sum/est->history.count; |
97 | // have been taken. | 84 | |
98 | if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) { | 85 | if (tsk_rt(t)->prediction_mode == CC_BRUTE_AVG) { |
99 | if (unlikely(observed < est->avg/LO_THRESHOLD)) { | 86 | if (likely(est->history.count > 1)) { |
100 | TRACE_TASK(t, "Observation is too small: %llu\n", | 87 | est->std = isqrt(varience(est->history.observations, est->center_line, est->history.count)); |
101 | observed); | ||
102 | return; | ||
103 | } | 88 | } |
104 | else if (unlikely(observed > est->avg*HI_THRESHOLD)) { | 89 | } |
105 | TRACE_TASK(t, "Observation is too large: %llu\n", | 90 | else if (tsk_rt(t)->prediction_mode == CC_MR_AVG) { |
106 | observed); | 91 | if (likely(est->history.count > 1)) { |
107 | return; | 92 | lt_t mr; |
93 | est->history.range[est->history.ridx] = (observed > est->history.last_observed) ? | ||
94 | observed - est->history.last_observed : | ||
95 | est->history.last_observed - observed; | ||
96 | est->rsum += est->history.range[est->history.ridx]; | ||
97 | mr = est->rsum / (est->history.count - 1); | ||
98 | est->std = est->rsum + (128*est->rsum)/1000; // std = mr/d_2, w/ d_2 = 1.128 | ||
99 | est->history.ridx = (est->history.ridx + 1) % (est->history.window_size - 1); | ||
108 | } | 100 | } |
109 | #endif | 101 | est->history.last_observed = observed; |
110 | // filter values outside NUM_STDEVx the standard deviation, | 102 | } |
111 | // but only filter if enough samples have been taken. | ||
112 | if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) { | ||
113 | lt_t lower, upper; | ||
114 | 103 | ||
115 | lt_t range = (est->std*NUM_STDEV_NUM)/NUM_STDEV_DENOM; | 104 | est->history.idx = (est->history.idx + 1) % est->history.window_size; |
116 | lower = est->avg - MIN(range, est->avg); // no underflow. | ||
117 | 105 | ||
118 | if (unlikely(observed < lower)) { | 106 | return est->center_line; |
119 | TRACE_TASK(t, "Observation is too small: %llu\n", observed); | 107 | } |
120 | return; | ||
121 | } | ||
122 | 108 | ||
123 | upper = est->avg + range; | 109 | static lt_t ewma_method(struct task_struct *t, lt_t observed) |
124 | if (unlikely(observed > upper)) { | 110 | { |
125 | TRACE_TASK(t, "Observation is too large: %llu\n", observed); | 111 | ewma_est_t *est = &(tsk_rt(t)->gpu_ewma_est[tsk_rt(t)->gpu_migration]); |
126 | return; | 112 | |
113 | /* cut-off */ | ||
114 | if (tsk_rt(t)->prediction_mode != SIMPLE_EWMA && | ||
115 | est->history.count >= MIN_OBSERVATIONS) { | ||
116 | lt_t upper = est->center_line + est->std * est->sigmas; | ||
117 | lt_t lower = est->center_line - est->std * est->sigmas; | ||
118 | if (unlikely(observed < lower || observed > upper)) { | ||
119 | TRACE_TASK(t, "%llu out of range [%llu, %llu]\n", | ||
120 | observed, lower, upper); | ||
121 | return 0; | ||
127 | } | 122 | } |
128 | } | 123 | } |
129 | 124 | ||
125 | if (unlikely(est->history.count < est->history.window_size)) { | ||
126 | ++est->history.count; | ||
127 | } | ||
128 | else if (tsk_rt(t)->prediction_mode == CC_MR_EWMA) { | ||
129 | est->rsum -= est->history.range[est->history.ridx]; | ||
130 | } | ||
130 | 131 | ||
131 | 132 | if (unlikely(est->history.count == 1)) { | |
132 | if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) { | 133 | est->center_line = observed; |
133 | ++est->count; | ||
134 | } | 134 | } |
135 | else { | 135 | else { |
136 | est->sum -= est->history[est->idx]; | 136 | est->center_line = |
137 | ((est->alpha_num * est->center_line)/est->alpha_denom) + | ||
138 | (((est->alpha_denom - est->alpha_num) * observed)/est->alpha_denom); | ||
137 | } | 139 | } |
138 | 140 | ||
139 | mig_info.observed = observed; | 141 | if (tsk_rt(t)->prediction_mode == CC_BRUTE_EWMA) { |
140 | mig_info.estimated = est->avg; | 142 | est->history.observations[est->history.idx] = observed; |
141 | mig_info.distance = tsk_rt(t)->gpu_migration; | 143 | est->std = isqrt(varience(est->history.observations, est->center_line, est->history.count)); |
142 | sched_trace_migration(t, &mig_info); | 144 | est->history.idx = (est->history.idx + 1) % est->history.window_size; |
143 | |||
144 | |||
145 | est->history[est->idx] = observed; | ||
146 | est->sum += observed; | ||
147 | est->avg = est->sum/est->count; | ||
148 | est->std = isqrt(varience(est->history, est->avg, est->count)); | ||
149 | est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE; | ||
150 | |||
151 | |||
152 | #if 0 | ||
153 | if(unlikely(fb->est.val == 0)) { | ||
154 | // kludge-- cap observed values to prevent whacky estimations. | ||
155 | // whacky stuff happens during the first few jobs. | ||
156 | if(unlikely(observed > OBSERVATION_CAP)) { | ||
157 | TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n", | ||
158 | observed, OBSERVATION_CAP); | ||
159 | observed = OBSERVATION_CAP; | ||
160 | } | ||
161 | |||
162 | // take the first observation as our estimate | ||
163 | // (initial value of 0 was bogus anyhow) | ||
164 | fb->est = _integer_to_fp(observed); | ||
165 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. | ||
166 | } | 145 | } |
167 | else { | 146 | else if (tsk_rt(t)->prediction_mode == CC_MR_EWMA) { |
168 | fp_t rel_err = update_estimate(fb, | 147 | if (likely(est->history.count > 1)) { |
169 | tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration], | 148 | lt_t mr; |
170 | tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration], | 149 | est->history.range[est->history.ridx] = (observed > est->history.last_observed) ? |
171 | observed); | 150 | observed - est->history.last_observed : |
172 | 151 | est->history.last_observed - observed; | |
173 | if(unlikely(_fp_to_integer(fb->est) <= 0)) { | 152 | est->rsum += est->history.range[est->history.ridx]; |
174 | TRACE_TASK(t, "Invalid estimate. Patching.\n"); | 153 | mr = est->rsum / (est->history.count - 1); |
175 | fb->est = _integer_to_fp(observed); | 154 | est->std = est->rsum + (128*est->rsum)/1000; // std = mr/d_2, w/ d_2 = 1.128 |
176 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. | 155 | est->history.ridx = (est->history.ridx + 1) % (est->history.window_size - 1); |
177 | } | 156 | } |
178 | else { | 157 | est->history.last_observed = observed; |
179 | struct migration_info mig_info; | 158 | } |
180 | 159 | ||
181 | sched_trace_prediction_err(t, | 160 | return est->center_line; |
182 | &(tsk_rt(t)->gpu_migration), | 161 | } |
183 | &rel_err); | ||
184 | 162 | ||
185 | mig_info.observed = observed; | 163 | void update_gpu_estimate(struct task_struct *t, lt_t observed) |
186 | mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration); | 164 | { |
187 | mig_info.distance = tsk_rt(t)->gpu_migration; | 165 | lt_t new_est; |
166 | struct migration_info mig_info; | ||
188 | 167 | ||
189 | sched_trace_migration(t, &mig_info); | 168 | BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST); |
190 | } | 169 | |
170 | if (unlikely(observed > OBSERVATION_CAP)) { | ||
171 | TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n", | ||
172 | observed, | ||
173 | OBSERVATION_CAP); | ||
174 | return; | ||
191 | } | 175 | } |
192 | #endif | 176 | |
177 | switch (tsk_rt(t)->prediction_mode) { | ||
178 | case SIMPLE_AVG: | ||
179 | case CC_BRUTE_AVG: | ||
180 | case CC_MR_AVG: | ||
181 | new_est = avg_method(t, observed); | ||
182 | break; | ||
183 | case SIMPLE_EWMA: | ||
184 | case CC_BRUTE_EWMA: | ||
185 | case CC_MR_EWMA: | ||
186 | new_est = ewma_method(t, observed); | ||
187 | break; | ||
188 | default: | ||
189 | BUG(); | ||
190 | break; | ||
191 | } | ||
192 | |||
193 | mig_info.observed = observed; | ||
194 | mig_info.estimated = new_est; | ||
195 | mig_info.distance = tsk_rt(t)->gpu_migration; | ||
196 | sched_trace_migration(t, &mig_info); | ||
193 | 197 | ||
194 | TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n", | 198 | TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n", |
195 | tsk_rt(t)->gpu_migration, | 199 | tsk_rt(t)->gpu_migration, |
196 | observed, | 200 | observed, |
197 | est->avg); | 201 | new_est); |
198 | } | 202 | } |
199 | 203 | ||
200 | gpu_migration_dist_t gpu_migration_distance(int a, int b) | 204 | gpu_migration_dist_t gpu_migration_distance(int a, int b) |
diff --git a/litmus/litmus.c b/litmus/litmus.c index d368202ab8c3..8380c0f6a393 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c | |||
@@ -332,28 +332,183 @@ asmlinkage long sys_null_call(cycles_t __user *ts) | |||
332 | } | 332 | } |
333 | 333 | ||
334 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) | 334 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) |
335 | void init_gpu_affinity_state(struct task_struct* p) | 335 | static void create_gpu_cc_mr_avg(obs_history_t* obs, uint16_t window_size) |
336 | { | 336 | { |
337 | // under-damped | 337 | obs->window_size = window_size; |
338 | //p->rt_param.gpu_fb_param_a = _frac(14008, 10000); | 338 | obs->observations = kmalloc(sizeof(lt_t)*window_size, GFP_KERNEL); |
339 | //p->rt_param.gpu_fb_param_b = _frac(16024, 10000); | 339 | obs->range = kmalloc(sizeof(lt_t)*(window_size-1), GFP_KERNEL); |
340 | } | ||
340 | 341 | ||
341 | #if 0 | 342 | static void create_gpu_cc_brute_avg(obs_history_t* obs, uint16_t window_size) |
342 | // emperical; | 343 | { |
343 | p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000); | 344 | obs->window_size = window_size; |
344 | p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000); | 345 | obs->observations = kmalloc(sizeof(lt_t)*window_size, GFP_KERNEL); |
346 | obs->range = NULL; | ||
347 | } | ||
345 | 348 | ||
346 | p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000); | 349 | static void create_gpu_simple_avg(obs_history_t* obs, uint16_t window_size) |
347 | p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000); | 350 | { |
351 | obs->window_size = 0; | ||
352 | obs->observations = NULL; | ||
353 | obs->range = NULL; | ||
354 | } | ||
348 | 355 | ||
349 | p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000); | 356 | static void destroy_obs_history(obs_history_t* obs) |
350 | p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000); | 357 | { |
358 | if (obs->observations) | ||
359 | kfree(obs->observations); | ||
360 | if (obs->range) | ||
361 | kfree(obs->range); | ||
362 | } | ||
363 | |||
364 | static void create_gpu_cc_mr_ewma(obs_history_t* obs, uint16_t window_size) | ||
365 | { | ||
366 | obs->window_size = window_size; | ||
367 | obs->observations = NULL; | ||
368 | obs->range = kmalloc(sizeof(lt_t)*(window_size-1), GFP_KERNEL); | ||
369 | } | ||
370 | |||
371 | static void create_gpu_cc_brute_ewma(obs_history_t* obs, uint16_t window_size) | ||
372 | { | ||
373 | obs->window_size = window_size; | ||
374 | obs->observations = kmalloc(sizeof(lt_t)*window_size, GFP_KERNEL); | ||
375 | obs->range = NULL; | ||
376 | } | ||
377 | |||
378 | static void create_gpu_simple_ewma(obs_history_t* obs, uint16_t window_size) | ||
379 | { | ||
380 | obs->window_size = 0; | ||
381 | obs->observations = NULL; | ||
382 | obs->range = NULL; | ||
383 | } | ||
384 | |||
385 | |||
386 | static void create_gpu_affinity_state(struct task_struct* p, uint8_t sigmas, uint16_t window_size) | ||
387 | { | ||
388 | int i; | ||
351 | 389 | ||
352 | p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000); | ||
353 | p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000); | ||
354 | #endif | ||
355 | p->rt_param.gpu_migration = MIG_NONE; | 390 | p->rt_param.gpu_migration = MIG_NONE; |
356 | p->rt_param.last_gpu = -1; | 391 | p->rt_param.last_gpu = -1; |
392 | |||
393 | switch (p->rt_param.prediction_mode) { | ||
394 | case SIMPLE_AVG: | ||
395 | case CC_BRUTE_AVG: | ||
396 | case CC_MR_AVG: | ||
397 | memset(&p->rt_param.gpu_avg_est, 0, sizeof(p->rt_param.gpu_avg_est)); | ||
398 | break; | ||
399 | case SIMPLE_EWMA: | ||
400 | case CC_BRUTE_EWMA: | ||
401 | case CC_MR_EWMA: | ||
402 | memset(&p->rt_param.gpu_ewma_est, 0, sizeof(p->rt_param.gpu_ewma_est)); | ||
403 | break; | ||
404 | default: | ||
405 | BUG(); | ||
406 | break; | ||
407 | } | ||
408 | |||
409 | for (i = 0; i <= MIG_LAST; ++i) { | ||
410 | switch (p->rt_param.prediction_mode) { | ||
411 | case SIMPLE_AVG: | ||
412 | create_gpu_simple_avg(&p->rt_param.gpu_avg_est[i].history, window_size); | ||
413 | break; | ||
414 | case CC_BRUTE_AVG: | ||
415 | p->rt_param.gpu_avg_est[i].sigmas = sigmas; | ||
416 | create_gpu_cc_brute_avg(&p->rt_param.gpu_avg_est[i].history, window_size); | ||
417 | break; | ||
418 | case CC_MR_AVG: | ||
419 | p->rt_param.gpu_avg_est[i].sigmas = sigmas; | ||
420 | create_gpu_cc_mr_avg(&p->rt_param.gpu_avg_est[i].history, window_size); | ||
421 | break; | ||
422 | case SIMPLE_EWMA: | ||
423 | create_gpu_simple_ewma(&p->rt_param.gpu_avg_est[i].history, window_size); | ||
424 | break; | ||
425 | case CC_BRUTE_EWMA: | ||
426 | p->rt_param.gpu_ewma_est[i].sigmas = sigmas; | ||
427 | create_gpu_cc_brute_ewma(&p->rt_param.gpu_ewma_est[i].history, window_size); | ||
428 | break; | ||
429 | case CC_MR_EWMA: | ||
430 | p->rt_param.gpu_ewma_est[i].sigmas = sigmas; | ||
431 | create_gpu_cc_mr_ewma(&p->rt_param.gpu_ewma_est[i].history, window_size); | ||
432 | break; | ||
433 | default: | ||
434 | BUG(); | ||
435 | break; | ||
436 | } | ||
437 | } | ||
438 | } | ||
439 | |||
440 | static void destroy_gpu_affinity_state(struct task_struct* p) | ||
441 | { | ||
442 | int i; | ||
443 | |||
444 | if (p->rt_param.prediction_mode == CC_BRUTE_AVG || | ||
445 | p->rt_param.prediction_mode == CC_MR_AVG) { | ||
446 | for (i = 0; i <= MIG_LAST; ++i) { | ||
447 | destroy_obs_history(&p->rt_param.gpu_avg_est[i].history); | ||
448 | } | ||
449 | } | ||
450 | else if (p->rt_param.prediction_mode == CC_BRUTE_EWMA || | ||
451 | p->rt_param.prediction_mode == CC_MR_EWMA) { | ||
452 | for (i = 0; i <= MIG_LAST; ++i) { | ||
453 | destroy_obs_history(&p->rt_param.gpu_ewma_est[i].history); | ||
454 | } | ||
455 | } | ||
456 | } | ||
457 | |||
458 | asmlinkage long sys_config_gpu_affinity_predictor(void* __user arg) | ||
459 | { | ||
460 | struct task_struct *t = current; | ||
461 | predictor_t params; | ||
462 | |||
463 | if (!access_ok(VERIFY_READ, arg, sizeof(params))) { | ||
464 | return -EINVAL; | ||
465 | } | ||
466 | if (__copy_from_user(¶ms, arg, sizeof(params))) { | ||
467 | return -EINVAL; | ||
468 | } | ||
469 | |||
470 | if (params.type != SIMPLE_AVG && | ||
471 | params.type != SIMPLE_EWMA && | ||
472 | params.type != CC_BRUTE_AVG && | ||
473 | params.type != CC_BRUTE_EWMA && | ||
474 | params.type != CC_MR_AVG && | ||
475 | params.type != CC_MR_EWMA) { | ||
476 | return -EINVAL; | ||
477 | } | ||
478 | |||
479 | if (params.type != SIMPLE_AVG && params.type != SIMPLE_EWMA) { | ||
480 | if (params.window_size <= 10) { | ||
481 | return -EINVAL; | ||
482 | } | ||
483 | } | ||
484 | |||
485 | if (params.type == SIMPLE_EWMA || | ||
486 | params.type == CC_BRUTE_EWMA || | ||
487 | params.type == CC_MR_EWMA) { | ||
488 | if (params.alpha_num > params.alpha_denom) { | ||
489 | return -EINVAL; | ||
490 | } | ||
491 | if (params.alpha_num == 0) { | ||
492 | return -EINVAL; | ||
493 | } | ||
494 | if (params.alpha_denom == 0) { | ||
495 | params.alpha_denom = 1; | ||
496 | } | ||
497 | } | ||
498 | |||
499 | if (t->rt_param.prediction_mode != 0) { | ||
500 | destroy_gpu_affinity_state(t); | ||
501 | } | ||
502 | |||
503 | t->rt_param.prediction_mode = params.type; | ||
504 | create_gpu_affinity_state(t, params.sigmas, params.window_size); | ||
505 | |||
506 | return 0; | ||
507 | } | ||
508 | #else | ||
509 | asmlinkage long sys_config_gpu_affinity_predictor(void* __user arg) | ||
510 | { | ||
511 | return -EINVAL; | ||
357 | } | 512 | } |
358 | #endif | 513 | #endif |
359 | 514 | ||
@@ -426,10 +581,6 @@ static void reinit_litmus_state(struct task_struct* p, int restore) | |||
426 | p->rt_param.ctrl_page = ctrl_page; | 581 | p->rt_param.ctrl_page = ctrl_page; |
427 | } | 582 | } |
428 | 583 | ||
429 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) | ||
430 | init_gpu_affinity_state(p); | ||
431 | #endif | ||
432 | |||
433 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | 584 | #ifdef CONFIG_LITMUS_NESTED_LOCKING |
434 | INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order); | 585 | INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order); |
435 | raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock); | 586 | raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock); |
@@ -467,9 +618,6 @@ long __litmus_admit_task(struct task_struct* tsk) | |||
467 | #ifdef CONFIG_LITMUS_NVIDIA | 618 | #ifdef CONFIG_LITMUS_NVIDIA |
468 | atomic_set(&tsk_rt(tsk)->nv_int_count, 0); | 619 | atomic_set(&tsk_rt(tsk)->nv_int_count, 0); |
469 | #endif | 620 | #endif |
470 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) | ||
471 | init_gpu_affinity_state(tsk); | ||
472 | #endif | ||
473 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | 621 | #ifdef CONFIG_LITMUS_NESTED_LOCKING |
474 | tsk_rt(tsk)->blocked_lock = NULL; | 622 | tsk_rt(tsk)->blocked_lock = NULL; |
475 | raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock); | 623 | raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock); |
@@ -539,6 +687,8 @@ void litmus_exit_task(struct task_struct* tsk) | |||
539 | bheap_node_free(tsk_rt(tsk)->heap_node); | 687 | bheap_node_free(tsk_rt(tsk)->heap_node); |
540 | release_heap_free(tsk_rt(tsk)->rel_heap); | 688 | release_heap_free(tsk_rt(tsk)->rel_heap); |
541 | 689 | ||
690 | destroy_gpu_affinity_state(tsk); | ||
691 | |||
542 | atomic_dec(&rt_task_count); | 692 | atomic_dec(&rt_task_count); |
543 | reinit_litmus_state(tsk, 1); | 693 | reinit_litmus_state(tsk, 1); |
544 | } | 694 | } |