aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-09-20 18:57:20 -0400
committerGlenn Elliott <gelliott@cs.unc.edu>2012-09-20 18:57:20 -0400
commit150c1d5ed8541f3a2bfcde3d5f3174b9af4ab4fc (patch)
tree7970735e676afc237e40e0971119001429bf56ae
parent33cb64c787070d6b60a02ea40064d717d3b9dc07 (diff)
Generalized GPU cost predictors + EWMA. (untested)wip-gpu-rtas12
-rw-r--r--include/litmus/gpu_affinity.h26
-rw-r--r--include/litmus/rt_param.h77
-rw-r--r--include/litmus/unistd_32.h5
-rw-r--r--include/litmus/unistd_64.h7
-rw-r--r--litmus/gpu_affinity.c238
-rw-r--r--litmus/litmus.c194
6 files changed, 381 insertions, 166 deletions
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
index d64a15cbf2a5..a2abc4c1fd7c 100644
--- a/include/litmus/gpu_affinity.h
+++ b/include/litmus/gpu_affinity.h
@@ -31,18 +31,20 @@ static inline lt_t get_gpu_time(struct task_struct* t)
31 31
32static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) 32static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist)
33{ 33{
34// int i; 34 lt_t val = 0;
35// fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); 35
36// lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates... 36 switch(t->rt_param.prediction_mode) {
37 lt_t val = t->rt_param.gpu_migration_est[dist].avg; 37 case SIMPLE_AVG:
38 38 case CC_BRUTE_AVG:
39// WARN_ON(temp < 0); 39 case CC_MR_AVG:
40 40 val = t->rt_param.gpu_avg_est[dist].center_line;
41 // lower-bound a distant migration to be at least equal to the level 41 break;
42 // below it. 42 case SIMPLE_EWMA:
43// for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) { 43 case CC_BRUTE_EWMA:
44// val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); 44 case CC_MR_EWMA:
45// } 45 val = t->rt_param.gpu_ewma_est[dist].center_line;
46 break;
47 }
46 48
47 return ((val > 0) ? val : dist+1); 49 return ((val > 0) ? val : dist+1);
48} 50}
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 44f85a366574..6da36f50240d 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -41,6 +41,32 @@ typedef enum {
41 PRECISE_SIGNALS, /* budget signals are triggered with hrtimers */ 41 PRECISE_SIGNALS, /* budget signals are triggered with hrtimers */
42} budget_signal_policy_t; 42} budget_signal_policy_t;
43 43
44typedef enum
45{
46 /* simple - all observations counted */
47 SIMPLE_AVG = 1,
48 SIMPLE_EWMA,
49
50 /* control chart, brute - std deviation fully calculated */
51 CC_BRUTE_AVG,
52 CC_BRUTE_EWMA,
53
54 /* control chart, moving range - std deviation est. from moving range */
55 CC_MR_AVG,
56 CC_MR_EWMA,
57} predictor_type;
58
59typedef struct predictor
60{
61 predictor_type type;
62 uint16_t window_size;
63
64 uint8_t sigmas;
65 uint16_t alpha_num;
66 uint16_t alpha_denom;
67} predictor_t;
68
69
44/* We use the common priority interpretation "lower index == higher priority", 70/* We use the common priority interpretation "lower index == higher priority",
45 * which is commonly used in fixed-priority schedulability analysis papers. 71 * which is commonly used in fixed-priority schedulability analysis papers.
46 * So, a numerically lower priority value implies higher scheduling priority, 72 * So, a numerically lower priority value implies higher scheduling priority,
@@ -180,23 +206,48 @@ typedef enum gpu_migration_dist
180 MIG_LAST = MIG_NONE 206 MIG_LAST = MIG_NONE
181} gpu_migration_dist_t; 207} gpu_migration_dist_t;
182 208
183typedef struct feedback_est{
184 fp_t est;
185 fp_t accum_err;
186} feedback_est_t;
187
188
189#define AVG_EST_WINDOW_SIZE 20 209#define AVG_EST_WINDOW_SIZE 20
190 210
191typedef struct avg_est{ 211typedef struct obs_history
192 lt_t history[AVG_EST_WINDOW_SIZE]; 212{
213 uint16_t window_size;
193 uint16_t count; 214 uint16_t count;
194 uint16_t idx; 215 uint16_t idx;
216 uint16_t ridx;
217
218 lt_t last_observed;
219 lt_t *observations;
220 lt_t *range;
221} obs_history_t;
222
223typedef struct avg_est {
224 uint8_t sigmas;
225
226 obs_history_t history;
227
195 lt_t sum; 228 lt_t sum;
229 lt_t rsum;
230
196 lt_t std; 231 lt_t std;
197 lt_t avg; 232 lt_t center_line;
198} avg_est_t; 233} avg_est_t;
199 234
235
236typedef struct ewma_est {
237 uint8_t sigmas;
238 uint16_t alpha_num;
239 uint16_t alpha_denom;
240
241 obs_history_t history;
242
243 lt_t rsum;
244
245 lt_t std;
246 lt_t center_line;
247} ewma_est_t;
248
249
250
200/* RT task parameters for scheduling extensions 251/* RT task parameters for scheduling extensions
201 * These parameters are inherited during clone and therefore must 252 * These parameters are inherited during clone and therefore must
202 * be explicitly set up before the task set is launched. 253 * be explicitly set up before the task set is launched.
@@ -243,7 +294,13 @@ struct rt_param {
243 long unsigned int held_gpus; // bitmap of held GPUs. 294 long unsigned int held_gpus; // bitmap of held GPUs.
244 295
245#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 296#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
246 avg_est_t gpu_migration_est[MIG_LAST+1]; 297// avg_est_t gpu_migration_est[MIG_LAST+1];
298 predictor_type prediction_mode;
299 union
300 {
301 avg_est_t gpu_avg_est[MIG_LAST+1];
302 ewma_est_t gpu_ewma_est[MIG_LAST+1];
303 };
247 304
248 gpu_migration_dist_t gpu_migration; 305 gpu_migration_dist_t gpu_migration;
249 int last_gpu; 306 int last_gpu;
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
index c86b743408ed..964f02082275 100644
--- a/include/litmus/unistd_32.h
+++ b/include/litmus/unistd_32.h
@@ -21,6 +21,7 @@
21#define __NR_litmus_dgl_unlock __LSC(13) 21#define __NR_litmus_dgl_unlock __LSC(13)
22#define __NR_register_nv_device __LSC(14) 22#define __NR_register_nv_device __LSC(14)
23 23
24#define __NR_set_aux_tasks _LSC(15) 24#define __NR_config_gpu_affinity_predictor __LSC(15)
25#define __NR_set_aux_tasks _LSC(16)
25 26
26#define NR_litmus_syscalls 16 27#define NR_litmus_syscalls 17
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index 3825bc129dbd..70be98d4f1e7 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -35,8 +35,9 @@ __SYSCALL(__NR_litmus_dgl_lock, sys_litmus_dgl_lock)
35__SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock) 35__SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock)
36#define __NR_register_nv_device __LSC(14) 36#define __NR_register_nv_device __LSC(14)
37__SYSCALL(__NR_register_nv_device, sys_register_nv_device) 37__SYSCALL(__NR_register_nv_device, sys_register_nv_device)
38 38#define __NR_config_gpu_affinity_predictor __LSC(15)
39#define __NR_set_aux_tasks __LSC(15) 39__SYSCALL(__NR_config_gpu_affinity_predictor, sys_config_gpu_affinity_predictor)
40#define __NR_set_aux_tasks __LSC(16)
40__SYSCALL(__NR_set_aux_tasks, sys_set_aux_tasks) 41__SYSCALL(__NR_set_aux_tasks, sys_set_aux_tasks)
41 42
42#define NR_litmus_syscalls 16 43#define NR_litmus_syscalls 17
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
index 7d73105b4181..7a2b5005be45 100644
--- a/litmus/gpu_affinity.c
+++ b/litmus/gpu_affinity.c
@@ -9,33 +9,10 @@
9 9
10#define OBSERVATION_CAP ((lt_t)(2e9)) 10#define OBSERVATION_CAP ((lt_t)(2e9))
11 11
12// reason for skew: high outliers are less 12#define MIN_OBSERVATIONS 10
13// frequent and way out of bounds
14//#define HI_THRESHOLD 2
15//#define LO_THRESHOLD 4
16
17#define NUM_STDEV_NUM 1
18#define NUM_STDEV_DENOM 2
19 13
20#define MIN(a, b) ((a < b) ? a : b) 14#define MIN(a, b) ((a < b) ? a : b)
21 15
22static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
23{
24 fp_t relative_err;
25 fp_t err, new;
26 fp_t actual = _integer_to_fp(observed);
27
28 err = _sub(actual, fb->est);
29 new = _add(_mul(a, err), _mul(b, fb->accum_err));
30
31 relative_err = _div(err, actual);
32
33 fb->est = new;
34 fb->accum_err = _add(fb->accum_err, err);
35
36 return relative_err;
37}
38
39lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count) 16lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count)
40{ 17{
41 /* brute force: takes about as much time as incremental running methods when 18 /* brute force: takes about as much time as incremental running methods when
@@ -74,127 +51,154 @@ lt_t isqrt(lt_t n)
74 return res; 51 return res;
75} 52}
76 53
77void update_gpu_estimate(struct task_struct *t, lt_t observed) 54static lt_t avg_method(struct task_struct *t, lt_t observed)
78{ 55{
79 //feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); 56 avg_est_t *est = &(tsk_rt(t)->gpu_avg_est[tsk_rt(t)->gpu_migration]);
80 avg_est_t *est; 57
81 struct migration_info mig_info; 58 /* cut-off */
82 59 if (tsk_rt(t)->prediction_mode != SIMPLE_AVG &&
83 BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST); 60 est->history.count >= MIN_OBSERVATIONS) {
61 lt_t upper = est->center_line + est->std * est->sigmas;
62 lt_t lower = est->center_line - est->std * est->sigmas;
63 if (unlikely(observed < lower || observed > upper)) {
64 TRACE_TASK(t, "%llu out of range [%llu, %llu]\n",
65 observed, lower, upper);
66 return 0;
67 }
68 }
84 69
85 est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); 70 if (unlikely(est->history.count < est->history.window_size)) {
71 ++est->history.count;
72 }
73 else {
74 est->sum -= est->history.observations[est->history.idx];
86 75
87 if (unlikely(observed > OBSERVATION_CAP)) { 76 if (tsk_rt(t)->prediction_mode == CC_MR_AVG) {
88 TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n", 77 est->rsum -= est->history.range[est->history.ridx];
89 observed, 78 }
90 OBSERVATION_CAP);
91 return;
92 } 79 }
93 80
94#if 0 81 est->history.observations[est->history.idx] = observed;
95 // filter out values that are HI_THRESHOLDx or (1/LO_THRESHOLD)x out 82 est->sum += observed;
96 // of range of the average, but only filter if enough samples 83 est->center_line = est->sum/est->history.count;
97 // have been taken. 84
98 if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) { 85 if (tsk_rt(t)->prediction_mode == CC_BRUTE_AVG) {
99 if (unlikely(observed < est->avg/LO_THRESHOLD)) { 86 if (likely(est->history.count > 1)) {
100 TRACE_TASK(t, "Observation is too small: %llu\n", 87 est->std = isqrt(varience(est->history.observations, est->center_line, est->history.count));
101 observed);
102 return;
103 } 88 }
104 else if (unlikely(observed > est->avg*HI_THRESHOLD)) { 89 }
105 TRACE_TASK(t, "Observation is too large: %llu\n", 90 else if (tsk_rt(t)->prediction_mode == CC_MR_AVG) {
106 observed); 91 if (likely(est->history.count > 1)) {
107 return; 92 lt_t mr;
93 est->history.range[est->history.ridx] = (observed > est->history.last_observed) ?
94 observed - est->history.last_observed :
95 est->history.last_observed - observed;
96 est->rsum += est->history.range[est->history.ridx];
97 mr = est->rsum / (est->history.count - 1);
98 est->std = est->rsum + (128*est->rsum)/1000; // std = mr/d_2, w/ d_2 = 1.128
99 est->history.ridx = (est->history.ridx + 1) % (est->history.window_size - 1);
108 } 100 }
109#endif 101 est->history.last_observed = observed;
110 // filter values outside NUM_STDEVx the standard deviation, 102 }
111 // but only filter if enough samples have been taken.
112 if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
113 lt_t lower, upper;
114 103
115 lt_t range = (est->std*NUM_STDEV_NUM)/NUM_STDEV_DENOM; 104 est->history.idx = (est->history.idx + 1) % est->history.window_size;
116 lower = est->avg - MIN(range, est->avg); // no underflow.
117 105
118 if (unlikely(observed < lower)) { 106 return est->center_line;
119 TRACE_TASK(t, "Observation is too small: %llu\n", observed); 107}
120 return;
121 }
122 108
123 upper = est->avg + range; 109static lt_t ewma_method(struct task_struct *t, lt_t observed)
124 if (unlikely(observed > upper)) { 110{
125 TRACE_TASK(t, "Observation is too large: %llu\n", observed); 111 ewma_est_t *est = &(tsk_rt(t)->gpu_ewma_est[tsk_rt(t)->gpu_migration]);
126 return; 112
113 /* cut-off */
114 if (tsk_rt(t)->prediction_mode != SIMPLE_EWMA &&
115 est->history.count >= MIN_OBSERVATIONS) {
116 lt_t upper = est->center_line + est->std * est->sigmas;
117 lt_t lower = est->center_line - est->std * est->sigmas;
118 if (unlikely(observed < lower || observed > upper)) {
119 TRACE_TASK(t, "%llu out of range [%llu, %llu]\n",
120 observed, lower, upper);
121 return 0;
127 } 122 }
128 } 123 }
129 124
125 if (unlikely(est->history.count < est->history.window_size)) {
126 ++est->history.count;
127 }
128 else if (tsk_rt(t)->prediction_mode == CC_MR_EWMA) {
129 est->rsum -= est->history.range[est->history.ridx];
130 }
130 131
131 132 if (unlikely(est->history.count == 1)) {
132 if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) { 133 est->center_line = observed;
133 ++est->count;
134 } 134 }
135 else { 135 else {
136 est->sum -= est->history[est->idx]; 136 est->center_line =
137 ((est->alpha_num * est->center_line)/est->alpha_denom) +
138 (((est->alpha_denom - est->alpha_num) * observed)/est->alpha_denom);
137 } 139 }
138 140
139 mig_info.observed = observed; 141 if (tsk_rt(t)->prediction_mode == CC_BRUTE_EWMA) {
140 mig_info.estimated = est->avg; 142 est->history.observations[est->history.idx] = observed;
141 mig_info.distance = tsk_rt(t)->gpu_migration; 143 est->std = isqrt(varience(est->history.observations, est->center_line, est->history.count));
142 sched_trace_migration(t, &mig_info); 144 est->history.idx = (est->history.idx + 1) % est->history.window_size;
143
144
145 est->history[est->idx] = observed;
146 est->sum += observed;
147 est->avg = est->sum/est->count;
148 est->std = isqrt(varience(est->history, est->avg, est->count));
149 est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE;
150
151
152#if 0
153 if(unlikely(fb->est.val == 0)) {
154 // kludge-- cap observed values to prevent whacky estimations.
155 // whacky stuff happens during the first few jobs.
156 if(unlikely(observed > OBSERVATION_CAP)) {
157 TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
158 observed, OBSERVATION_CAP);
159 observed = OBSERVATION_CAP;
160 }
161
162 // take the first observation as our estimate
163 // (initial value of 0 was bogus anyhow)
164 fb->est = _integer_to_fp(observed);
165 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
166 } 145 }
167 else { 146 else if (tsk_rt(t)->prediction_mode == CC_MR_EWMA) {
168 fp_t rel_err = update_estimate(fb, 147 if (likely(est->history.count > 1)) {
169 tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration], 148 lt_t mr;
170 tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration], 149 est->history.range[est->history.ridx] = (observed > est->history.last_observed) ?
171 observed); 150 observed - est->history.last_observed :
172 151 est->history.last_observed - observed;
173 if(unlikely(_fp_to_integer(fb->est) <= 0)) { 152 est->rsum += est->history.range[est->history.ridx];
174 TRACE_TASK(t, "Invalid estimate. Patching.\n"); 153 mr = est->rsum / (est->history.count - 1);
175 fb->est = _integer_to_fp(observed); 154 est->std = est->rsum + (128*est->rsum)/1000; // std = mr/d_2, w/ d_2 = 1.128
176 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. 155 est->history.ridx = (est->history.ridx + 1) % (est->history.window_size - 1);
177 } 156 }
178 else { 157 est->history.last_observed = observed;
179 struct migration_info mig_info; 158 }
180 159
181 sched_trace_prediction_err(t, 160 return est->center_line;
182 &(tsk_rt(t)->gpu_migration), 161}
183 &rel_err);
184 162
185 mig_info.observed = observed; 163void update_gpu_estimate(struct task_struct *t, lt_t observed)
186 mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration); 164{
187 mig_info.distance = tsk_rt(t)->gpu_migration; 165 lt_t new_est;
166 struct migration_info mig_info;
188 167
189 sched_trace_migration(t, &mig_info); 168 BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
190 } 169
170 if (unlikely(observed > OBSERVATION_CAP)) {
171 TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n",
172 observed,
173 OBSERVATION_CAP);
174 return;
191 } 175 }
192#endif 176
177 switch (tsk_rt(t)->prediction_mode) {
178 case SIMPLE_AVG:
179 case CC_BRUTE_AVG:
180 case CC_MR_AVG:
181 new_est = avg_method(t, observed);
182 break;
183 case SIMPLE_EWMA:
184 case CC_BRUTE_EWMA:
185 case CC_MR_EWMA:
186 new_est = ewma_method(t, observed);
187 break;
188 default:
189 BUG();
190 break;
191 }
192
193 mig_info.observed = observed;
194 mig_info.estimated = new_est;
195 mig_info.distance = tsk_rt(t)->gpu_migration;
196 sched_trace_migration(t, &mig_info);
193 197
194 TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n", 198 TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n",
195 tsk_rt(t)->gpu_migration, 199 tsk_rt(t)->gpu_migration,
196 observed, 200 observed,
197 est->avg); 201 new_est);
198} 202}
199 203
200gpu_migration_dist_t gpu_migration_distance(int a, int b) 204gpu_migration_dist_t gpu_migration_distance(int a, int b)
diff --git a/litmus/litmus.c b/litmus/litmus.c
index d368202ab8c3..8380c0f6a393 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -332,28 +332,183 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
332} 332}
333 333
334#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) 334#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
335void init_gpu_affinity_state(struct task_struct* p) 335static void create_gpu_cc_mr_avg(obs_history_t* obs, uint16_t window_size)
336{ 336{
337 // under-damped 337 obs->window_size = window_size;
338 //p->rt_param.gpu_fb_param_a = _frac(14008, 10000); 338 obs->observations = kmalloc(sizeof(lt_t)*window_size, GFP_KERNEL);
339 //p->rt_param.gpu_fb_param_b = _frac(16024, 10000); 339 obs->range = kmalloc(sizeof(lt_t)*(window_size-1), GFP_KERNEL);
340}
340 341
341#if 0 342static void create_gpu_cc_brute_avg(obs_history_t* obs, uint16_t window_size)
342 // emperical; 343{
343 p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000); 344 obs->window_size = window_size;
344 p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000); 345 obs->observations = kmalloc(sizeof(lt_t)*window_size, GFP_KERNEL);
346 obs->range = NULL;
347}
345 348
346 p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000); 349static void create_gpu_simple_avg(obs_history_t* obs, uint16_t window_size)
347 p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000); 350{
351 obs->window_size = 0;
352 obs->observations = NULL;
353 obs->range = NULL;
354}
348 355
349 p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000); 356static void destroy_obs_history(obs_history_t* obs)
350 p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000); 357{
358 if (obs->observations)
359 kfree(obs->observations);
360 if (obs->range)
361 kfree(obs->range);
362}
363
364static void create_gpu_cc_mr_ewma(obs_history_t* obs, uint16_t window_size)
365{
366 obs->window_size = window_size;
367 obs->observations = NULL;
368 obs->range = kmalloc(sizeof(lt_t)*(window_size-1), GFP_KERNEL);
369}
370
371static void create_gpu_cc_brute_ewma(obs_history_t* obs, uint16_t window_size)
372{
373 obs->window_size = window_size;
374 obs->observations = kmalloc(sizeof(lt_t)*window_size, GFP_KERNEL);
375 obs->range = NULL;
376}
377
378static void create_gpu_simple_ewma(obs_history_t* obs, uint16_t window_size)
379{
380 obs->window_size = 0;
381 obs->observations = NULL;
382 obs->range = NULL;
383}
384
385
386static void create_gpu_affinity_state(struct task_struct* p, uint8_t sigmas, uint16_t window_size)
387{
388 int i;
351 389
352 p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
353 p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
354#endif
355 p->rt_param.gpu_migration = MIG_NONE; 390 p->rt_param.gpu_migration = MIG_NONE;
356 p->rt_param.last_gpu = -1; 391 p->rt_param.last_gpu = -1;
392
393 switch (p->rt_param.prediction_mode) {
394 case SIMPLE_AVG:
395 case CC_BRUTE_AVG:
396 case CC_MR_AVG:
397 memset(&p->rt_param.gpu_avg_est, 0, sizeof(p->rt_param.gpu_avg_est));
398 break;
399 case SIMPLE_EWMA:
400 case CC_BRUTE_EWMA:
401 case CC_MR_EWMA:
402 memset(&p->rt_param.gpu_ewma_est, 0, sizeof(p->rt_param.gpu_ewma_est));
403 break;
404 default:
405 BUG();
406 break;
407 }
408
409 for (i = 0; i <= MIG_LAST; ++i) {
410 switch (p->rt_param.prediction_mode) {
411 case SIMPLE_AVG:
412 create_gpu_simple_avg(&p->rt_param.gpu_avg_est[i].history, window_size);
413 break;
414 case CC_BRUTE_AVG:
415 p->rt_param.gpu_avg_est[i].sigmas = sigmas;
416 create_gpu_cc_brute_avg(&p->rt_param.gpu_avg_est[i].history, window_size);
417 break;
418 case CC_MR_AVG:
419 p->rt_param.gpu_avg_est[i].sigmas = sigmas;
420 create_gpu_cc_mr_avg(&p->rt_param.gpu_avg_est[i].history, window_size);
421 break;
422 case SIMPLE_EWMA:
423 create_gpu_simple_ewma(&p->rt_param.gpu_avg_est[i].history, window_size);
424 break;
425 case CC_BRUTE_EWMA:
426 p->rt_param.gpu_ewma_est[i].sigmas = sigmas;
427 create_gpu_cc_brute_ewma(&p->rt_param.gpu_ewma_est[i].history, window_size);
428 break;
429 case CC_MR_EWMA:
430 p->rt_param.gpu_ewma_est[i].sigmas = sigmas;
431 create_gpu_cc_mr_ewma(&p->rt_param.gpu_ewma_est[i].history, window_size);
432 break;
433 default:
434 BUG();
435 break;
436 }
437 }
438}
439
440static void destroy_gpu_affinity_state(struct task_struct* p)
441{
442 int i;
443
444 if (p->rt_param.prediction_mode == CC_BRUTE_AVG ||
445 p->rt_param.prediction_mode == CC_MR_AVG) {
446 for (i = 0; i <= MIG_LAST; ++i) {
447 destroy_obs_history(&p->rt_param.gpu_avg_est[i].history);
448 }
449 }
450 else if (p->rt_param.prediction_mode == CC_BRUTE_EWMA ||
451 p->rt_param.prediction_mode == CC_MR_EWMA) {
452 for (i = 0; i <= MIG_LAST; ++i) {
453 destroy_obs_history(&p->rt_param.gpu_ewma_est[i].history);
454 }
455 }
456}
457
458asmlinkage long sys_config_gpu_affinity_predictor(void* __user arg)
459{
460 struct task_struct *t = current;
461 predictor_t params;
462
463 if (!access_ok(VERIFY_READ, arg, sizeof(params))) {
464 return -EINVAL;
465 }
466 if (__copy_from_user(&params, arg, sizeof(params))) {
467 return -EINVAL;
468 }
469
470 if (params.type != SIMPLE_AVG &&
471 params.type != SIMPLE_EWMA &&
472 params.type != CC_BRUTE_AVG &&
473 params.type != CC_BRUTE_EWMA &&
474 params.type != CC_MR_AVG &&
475 params.type != CC_MR_EWMA) {
476 return -EINVAL;
477 }
478
479 if (params.type != SIMPLE_AVG && params.type != SIMPLE_EWMA) {
480 if (params.window_size <= 10) {
481 return -EINVAL;
482 }
483 }
484
485 if (params.type == SIMPLE_EWMA ||
486 params.type == CC_BRUTE_EWMA ||
487 params.type == CC_MR_EWMA) {
488 if (params.alpha_num > params.alpha_denom) {
489 return -EINVAL;
490 }
491 if (params.alpha_num == 0) {
492 return -EINVAL;
493 }
494 if (params.alpha_denom == 0) {
495 params.alpha_denom = 1;
496 }
497 }
498
499 if (t->rt_param.prediction_mode != 0) {
500 destroy_gpu_affinity_state(t);
501 }
502
503 t->rt_param.prediction_mode = params.type;
504 create_gpu_affinity_state(t, params.sigmas, params.window_size);
505
506 return 0;
507}
508#else
509asmlinkage long sys_config_gpu_affinity_predictor(void* __user arg)
510{
511 return -EINVAL;
357} 512}
358#endif 513#endif
359 514
@@ -426,10 +581,6 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
426 p->rt_param.ctrl_page = ctrl_page; 581 p->rt_param.ctrl_page = ctrl_page;
427 } 582 }
428 583
429#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
430 init_gpu_affinity_state(p);
431#endif
432
433#ifdef CONFIG_LITMUS_NESTED_LOCKING 584#ifdef CONFIG_LITMUS_NESTED_LOCKING
434 INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order); 585 INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order);
435 raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock); 586 raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock);
@@ -467,9 +618,6 @@ long __litmus_admit_task(struct task_struct* tsk)
467#ifdef CONFIG_LITMUS_NVIDIA 618#ifdef CONFIG_LITMUS_NVIDIA
468 atomic_set(&tsk_rt(tsk)->nv_int_count, 0); 619 atomic_set(&tsk_rt(tsk)->nv_int_count, 0);
469#endif 620#endif
470#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
471 init_gpu_affinity_state(tsk);
472#endif
473#ifdef CONFIG_LITMUS_NESTED_LOCKING 621#ifdef CONFIG_LITMUS_NESTED_LOCKING
474 tsk_rt(tsk)->blocked_lock = NULL; 622 tsk_rt(tsk)->blocked_lock = NULL;
475 raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock); 623 raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock);
@@ -539,6 +687,8 @@ void litmus_exit_task(struct task_struct* tsk)
539 bheap_node_free(tsk_rt(tsk)->heap_node); 687 bheap_node_free(tsk_rt(tsk)->heap_node);
540 release_heap_free(tsk_rt(tsk)->rel_heap); 688 release_heap_free(tsk_rt(tsk)->rel_heap);
541 689
690 destroy_gpu_affinity_state(tsk);
691
542 atomic_dec(&rt_task_count); 692 atomic_dec(&rt_task_count);
543 reinit_litmus_state(tsk, 1); 693 reinit_litmus_state(tsk, 1);
544 } 694 }