diff options
Diffstat (limited to 'litmus/kfmlp_lock.c')
-rw-r--r-- | litmus/kfmlp_lock.c | 1003 |
1 files changed, 1003 insertions, 0 deletions
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c new file mode 100644 index 000000000000..785a095275e6 --- /dev/null +++ b/litmus/kfmlp_lock.c | |||
@@ -0,0 +1,1003 @@ | |||
1 | #include <linux/slab.h> | ||
2 | #include <linux/uaccess.h> | ||
3 | |||
4 | #include <litmus/trace.h> | ||
5 | #include <litmus/sched_plugin.h> | ||
6 | #include <litmus/fdso.h> | ||
7 | |||
8 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
9 | #include <litmus/gpu_affinity.h> | ||
10 | #include <litmus/nvidia_info.h> | ||
11 | #endif | ||
12 | |||
13 | #include <litmus/kfmlp_lock.h> | ||
14 | |||
15 | static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, | ||
16 | struct kfmlp_queue* queue) | ||
17 | { | ||
18 | return (queue - &sem->queues[0]); | ||
19 | } | ||
20 | |||
21 | static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, | ||
22 | struct task_struct* holder) | ||
23 | { | ||
24 | int i; | ||
25 | for(i = 0; i < sem->num_resources; ++i) | ||
26 | if(sem->queues[i].owner == holder) | ||
27 | return(&sem->queues[i]); | ||
28 | return(NULL); | ||
29 | } | ||
30 | |||
31 | /* caller is responsible for locking */ | ||
32 | static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue, | ||
33 | struct task_struct *skip) | ||
34 | { | ||
35 | struct list_head *pos; | ||
36 | struct task_struct *queued, *found = NULL; | ||
37 | |||
38 | list_for_each(pos, &kqueue->wait.task_list) { | ||
39 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
40 | task_list)->private; | ||
41 | |||
42 | /* Compare task prios, find high prio task. */ | ||
43 | //if (queued != skip && edf_higher_prio(queued, found)) | ||
44 | if (queued != skip && litmus->compare(queued, found)) | ||
45 | found = queued; | ||
46 | } | ||
47 | return found; | ||
48 | } | ||
49 | |||
50 | static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* sem, | ||
51 | struct kfmlp_queue* search_start) | ||
52 | { | ||
53 | // we start our search at search_start instead of at the beginning of the | ||
54 | // queue list to load-balance across all resources. | ||
55 | struct kfmlp_queue* step = search_start; | ||
56 | struct kfmlp_queue* shortest = sem->shortest_queue; | ||
57 | |||
58 | do | ||
59 | { | ||
60 | step = (step+1 != &sem->queues[sem->num_resources]) ? | ||
61 | step+1 : &sem->queues[0]; | ||
62 | |||
63 | if(step->count < shortest->count) | ||
64 | { | ||
65 | shortest = step; | ||
66 | if(step->count == 0) | ||
67 | break; /* can't get any shorter */ | ||
68 | } | ||
69 | |||
70 | }while(step != search_start); | ||
71 | |||
72 | return(shortest); | ||
73 | } | ||
74 | |||
75 | |||
76 | static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem, | ||
77 | wait_queue_t** to_steal, | ||
78 | struct kfmlp_queue** to_steal_from) | ||
79 | { | ||
80 | /* must hold sem->lock */ | ||
81 | |||
82 | int i; | ||
83 | |||
84 | *to_steal = NULL; | ||
85 | *to_steal_from = NULL; | ||
86 | |||
87 | for(i = 0; i < sem->num_resources; ++i) | ||
88 | { | ||
89 | if( (sem->queues[i].count > 1) && | ||
90 | ((*to_steal_from == NULL) || | ||
91 | //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) | ||
92 | (litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) ) | ||
93 | { | ||
94 | *to_steal_from = &sem->queues[i]; | ||
95 | } | ||
96 | } | ||
97 | |||
98 | if(*to_steal_from) | ||
99 | { | ||
100 | struct list_head *pos; | ||
101 | struct task_struct *target = (*to_steal_from)->hp_waiter; | ||
102 | |||
103 | TRACE_CUR("want to steal hp_waiter (%s/%d) from queue %d\n", | ||
104 | target->comm, | ||
105 | target->pid, | ||
106 | kfmlp_get_idx(sem, *to_steal_from)); | ||
107 | |||
108 | list_for_each(pos, &(*to_steal_from)->wait.task_list) | ||
109 | { | ||
110 | wait_queue_t *node = list_entry(pos, wait_queue_t, task_list); | ||
111 | struct task_struct *queued = (struct task_struct*) node->private; | ||
112 | /* Compare task prios, find high prio task. */ | ||
113 | if (queued == target) | ||
114 | { | ||
115 | *to_steal = node; | ||
116 | |||
117 | TRACE_CUR("steal: selected %s/%d from queue %d\n", | ||
118 | queued->comm, queued->pid, | ||
119 | kfmlp_get_idx(sem, *to_steal_from)); | ||
120 | |||
121 | return queued; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | TRACE_CUR("Could not find %s/%d in queue %d!!! THIS IS A BUG!\n", | ||
126 | target->comm, | ||
127 | target->pid, | ||
128 | kfmlp_get_idx(sem, *to_steal_from)); | ||
129 | } | ||
130 | |||
131 | return NULL; | ||
132 | } | ||
133 | |||
134 | static void kfmlp_steal_node(struct kfmlp_semaphore *sem, | ||
135 | struct kfmlp_queue *dst, | ||
136 | wait_queue_t *wait, | ||
137 | struct kfmlp_queue *src) | ||
138 | { | ||
139 | struct task_struct* t = (struct task_struct*) wait->private; | ||
140 | |||
141 | __remove_wait_queue(&src->wait, wait); | ||
142 | --(src->count); | ||
143 | |||
144 | if(t == src->hp_waiter) { | ||
145 | src->hp_waiter = kfmlp_find_hp_waiter(src, NULL); | ||
146 | |||
147 | TRACE_CUR("queue %d: %s/%d is new hp_waiter\n", | ||
148 | kfmlp_get_idx(sem, src), | ||
149 | (src->hp_waiter) ? src->hp_waiter->comm : "nil", | ||
150 | (src->hp_waiter) ? src->hp_waiter->pid : -1); | ||
151 | |||
152 | if(src->owner && tsk_rt(src->owner)->inh_task == t) { | ||
153 | litmus->decrease_prio(src->owner, src->hp_waiter); | ||
154 | } | ||
155 | } | ||
156 | |||
157 | if(sem->shortest_queue->count > src->count) { | ||
158 | sem->shortest_queue = src; | ||
159 | TRACE_CUR("queue %d is the shortest\n", kfmlp_get_idx(sem, sem->shortest_queue)); | ||
160 | } | ||
161 | |||
162 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
163 | if(sem->aff_obs) { | ||
164 | sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t); | ||
165 | } | ||
166 | #endif | ||
167 | |||
168 | init_waitqueue_entry(wait, t); | ||
169 | __add_wait_queue_tail_exclusive(&dst->wait, wait); | ||
170 | ++(dst->count); | ||
171 | |||
172 | if(litmus->compare(t, dst->hp_waiter)) { | ||
173 | dst->hp_waiter = t; | ||
174 | |||
175 | TRACE_CUR("queue %d: %s/%d is new hp_waiter\n", | ||
176 | kfmlp_get_idx(sem, dst), | ||
177 | t->comm, t->pid); | ||
178 | |||
179 | if(dst->owner && litmus->compare(t, dst->owner)) | ||
180 | { | ||
181 | litmus->increase_prio(dst->owner, t); | ||
182 | } | ||
183 | } | ||
184 | |||
185 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
186 | if(sem->aff_obs) { | ||
187 | sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t); | ||
188 | } | ||
189 | #endif | ||
190 | } | ||
191 | |||
192 | |||
193 | int kfmlp_lock(struct litmus_lock* l) | ||
194 | { | ||
195 | struct task_struct* t = current; | ||
196 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
197 | struct kfmlp_queue* my_queue = NULL; | ||
198 | wait_queue_t wait; | ||
199 | unsigned long flags; | ||
200 | |||
201 | if (!is_realtime(t)) | ||
202 | return -EPERM; | ||
203 | |||
204 | spin_lock_irqsave(&sem->lock, flags); | ||
205 | |||
206 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
207 | if(sem->aff_obs) { | ||
208 | my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t); | ||
209 | } | ||
210 | if(!my_queue) { | ||
211 | my_queue = sem->shortest_queue; | ||
212 | } | ||
213 | #else | ||
214 | my_queue = sem->shortest_queue; | ||
215 | #endif | ||
216 | |||
217 | if (my_queue->owner) { | ||
218 | /* resource is not free => must suspend and wait */ | ||
219 | TRACE_CUR("queue %d: Resource is not free => must suspend and wait. (queue size = %d)\n", | ||
220 | kfmlp_get_idx(sem, my_queue), | ||
221 | my_queue->count); | ||
222 | |||
223 | init_waitqueue_entry(&wait, t); | ||
224 | |||
225 | /* FIXME: interruptible would be nice some day */ | ||
226 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
227 | |||
228 | __add_wait_queue_tail_exclusive(&my_queue->wait, &wait); | ||
229 | |||
230 | TRACE_CUR("queue %d: hp_waiter is currently %s/%d\n", | ||
231 | kfmlp_get_idx(sem, my_queue), | ||
232 | (my_queue->hp_waiter) ? my_queue->hp_waiter->comm : "nil", | ||
233 | (my_queue->hp_waiter) ? my_queue->hp_waiter->pid : -1); | ||
234 | |||
235 | /* check if we need to activate priority inheritance */ | ||
236 | //if (edf_higher_prio(t, my_queue->hp_waiter)) | ||
237 | if (litmus->compare(t, my_queue->hp_waiter)) { | ||
238 | my_queue->hp_waiter = t; | ||
239 | TRACE_CUR("queue %d: %s/%d is new hp_waiter\n", | ||
240 | kfmlp_get_idx(sem, my_queue), | ||
241 | t->comm, t->pid); | ||
242 | |||
243 | //if (edf_higher_prio(t, my_queue->owner)) | ||
244 | if (litmus->compare(t, my_queue->owner)) { | ||
245 | litmus->increase_prio(my_queue->owner, my_queue->hp_waiter); | ||
246 | } | ||
247 | } | ||
248 | |||
249 | ++(my_queue->count); | ||
250 | |||
251 | if(my_queue == sem->shortest_queue) { | ||
252 | sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); | ||
253 | TRACE_CUR("queue %d is the shortest\n", | ||
254 | kfmlp_get_idx(sem, sem->shortest_queue)); | ||
255 | } | ||
256 | |||
257 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
258 | if(sem->aff_obs) { | ||
259 | sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t); | ||
260 | } | ||
261 | #endif | ||
262 | |||
263 | /* release lock before sleeping */ | ||
264 | spin_unlock_irqrestore(&sem->lock, flags); | ||
265 | |||
266 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
267 | * when we wake up; we are guaranteed to have the lock since | ||
268 | * there is only one wake up per release (or steal). | ||
269 | */ | ||
270 | suspend_for_lock(); | ||
271 | |||
272 | |||
273 | if(my_queue->owner == t) { | ||
274 | TRACE_CUR("queue %d: acquired through waiting\n", | ||
275 | kfmlp_get_idx(sem, my_queue)); | ||
276 | } | ||
277 | else { | ||
278 | /* this case may happen if our wait entry was stolen | ||
279 | between queues. record where we went. */ | ||
280 | my_queue = kfmlp_get_queue(sem, t); | ||
281 | |||
282 | BUG_ON(!my_queue); | ||
283 | TRACE_CUR("queue %d: acquired through stealing\n", | ||
284 | kfmlp_get_idx(sem, my_queue)); | ||
285 | } | ||
286 | } | ||
287 | else { | ||
288 | TRACE_CUR("queue %d: acquired immediately\n", | ||
289 | kfmlp_get_idx(sem, my_queue)); | ||
290 | |||
291 | my_queue->owner = t; | ||
292 | |||
293 | ++(my_queue->count); | ||
294 | |||
295 | if(my_queue == sem->shortest_queue) { | ||
296 | sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); | ||
297 | TRACE_CUR("queue %d is the shortest\n", | ||
298 | kfmlp_get_idx(sem, sem->shortest_queue)); | ||
299 | } | ||
300 | |||
301 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
302 | if(sem->aff_obs) { | ||
303 | sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t); | ||
304 | sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t); | ||
305 | } | ||
306 | #endif | ||
307 | |||
308 | spin_unlock_irqrestore(&sem->lock, flags); | ||
309 | } | ||
310 | |||
311 | |||
312 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
313 | if(sem->aff_obs) { | ||
314 | return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue); | ||
315 | } | ||
316 | #endif | ||
317 | return kfmlp_get_idx(sem, my_queue); | ||
318 | } | ||
319 | |||
320 | |||
321 | int kfmlp_unlock(struct litmus_lock* l) | ||
322 | { | ||
323 | struct task_struct *t = current, *next; | ||
324 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
325 | struct kfmlp_queue *my_queue, *to_steal_from; | ||
326 | unsigned long flags; | ||
327 | int err = 0; | ||
328 | |||
329 | my_queue = kfmlp_get_queue(sem, t); | ||
330 | |||
331 | if (!my_queue) { | ||
332 | err = -EINVAL; | ||
333 | goto out; | ||
334 | } | ||
335 | |||
336 | spin_lock_irqsave(&sem->lock, flags); | ||
337 | |||
338 | TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue)); | ||
339 | |||
340 | my_queue->owner = NULL; // clear ownership | ||
341 | --(my_queue->count); | ||
342 | |||
343 | if(my_queue->count < sem->shortest_queue->count) | ||
344 | { | ||
345 | sem->shortest_queue = my_queue; | ||
346 | TRACE_CUR("queue %d is the shortest\n", | ||
347 | kfmlp_get_idx(sem, sem->shortest_queue)); | ||
348 | } | ||
349 | |||
350 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
351 | if(sem->aff_obs) { | ||
352 | sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t); | ||
353 | sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t); | ||
354 | } | ||
355 | #endif | ||
356 | |||
357 | /* we lose the benefit of priority inheritance (if any) */ | ||
358 | if (tsk_rt(t)->inh_task) | ||
359 | litmus->decrease_prio(t, NULL); | ||
360 | |||
361 | |||
362 | /* check if there are jobs waiting for this resource */ | ||
363 | RETRY: | ||
364 | next = __waitqueue_remove_first(&my_queue->wait); | ||
365 | if (next) { | ||
366 | /* next becomes the resouce holder */ | ||
367 | my_queue->owner = next; | ||
368 | |||
369 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
370 | if(sem->aff_obs) { | ||
371 | sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next); | ||
372 | } | ||
373 | #endif | ||
374 | |||
375 | TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", | ||
376 | kfmlp_get_idx(sem, my_queue), next->comm, next->pid); | ||
377 | |||
378 | /* determine new hp_waiter if necessary */ | ||
379 | if (next == my_queue->hp_waiter) { | ||
380 | TRACE_TASK(next, "was highest-prio waiter\n"); | ||
381 | my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); | ||
382 | if (my_queue->hp_waiter) | ||
383 | TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); | ||
384 | else | ||
385 | TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue)); | ||
386 | } else { | ||
387 | /* Well, if next is not the highest-priority waiter, | ||
388 | * then it ought to inherit the highest-priority | ||
389 | * waiter's priority. */ | ||
390 | litmus->increase_prio(next, my_queue->hp_waiter); | ||
391 | } | ||
392 | |||
393 | /* wake up next */ | ||
394 | wake_up_process(next); | ||
395 | } | ||
396 | else { | ||
397 | // TODO: put this stealing logic before we attempt to release | ||
398 | // our resource. (simplifies code and gets rid of ugly goto RETRY. | ||
399 | wait_queue_t *wait; | ||
400 | |||
401 | TRACE_CUR("queue %d: looking to steal someone...\n", | ||
402 | kfmlp_get_idx(sem, my_queue)); | ||
403 | |||
404 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
405 | next = (sem->aff_obs) ? | ||
406 | sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) : | ||
407 | kfmlp_select_hp_steal(sem, &wait, &to_steal_from); | ||
408 | #else | ||
409 | next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from); | ||
410 | #endif | ||
411 | |||
412 | if(next) { | ||
413 | TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", | ||
414 | kfmlp_get_idx(sem, my_queue), | ||
415 | next->comm, next->pid, | ||
416 | kfmlp_get_idx(sem, to_steal_from)); | ||
417 | |||
418 | kfmlp_steal_node(sem, my_queue, wait, to_steal_from); | ||
419 | |||
420 | goto RETRY; // will succeed this time. | ||
421 | } | ||
422 | else { | ||
423 | TRACE_CUR("queue %d: no one to steal.\n", | ||
424 | kfmlp_get_idx(sem, my_queue)); | ||
425 | } | ||
426 | } | ||
427 | |||
428 | spin_unlock_irqrestore(&sem->lock, flags); | ||
429 | |||
430 | out: | ||
431 | return err; | ||
432 | } | ||
433 | |||
434 | int kfmlp_close(struct litmus_lock* l) | ||
435 | { | ||
436 | struct task_struct *t = current; | ||
437 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
438 | struct kfmlp_queue *my_queue; | ||
439 | unsigned long flags; | ||
440 | |||
441 | int owner; | ||
442 | |||
443 | spin_lock_irqsave(&sem->lock, flags); | ||
444 | |||
445 | my_queue = kfmlp_get_queue(sem, t); | ||
446 | owner = (my_queue) ? (my_queue->owner == t) : 0; | ||
447 | |||
448 | spin_unlock_irqrestore(&sem->lock, flags); | ||
449 | |||
450 | if (owner) | ||
451 | kfmlp_unlock(l); | ||
452 | |||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | void kfmlp_free(struct litmus_lock* l) | ||
457 | { | ||
458 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
459 | kfree(sem->queues); | ||
460 | kfree(sem); | ||
461 | } | ||
462 | |||
463 | |||
464 | |||
465 | struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args) | ||
466 | { | ||
467 | struct kfmlp_semaphore* sem; | ||
468 | int num_resources = 0; | ||
469 | int i; | ||
470 | |||
471 | if(!access_ok(VERIFY_READ, args, sizeof(num_resources))) | ||
472 | { | ||
473 | return(NULL); | ||
474 | } | ||
475 | if(__copy_from_user(&num_resources, args, sizeof(num_resources))) | ||
476 | { | ||
477 | return(NULL); | ||
478 | } | ||
479 | if(num_resources < 1) | ||
480 | { | ||
481 | return(NULL); | ||
482 | } | ||
483 | |||
484 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
485 | if(!sem) | ||
486 | { | ||
487 | return(NULL); | ||
488 | } | ||
489 | |||
490 | sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL); | ||
491 | if(!sem->queues) | ||
492 | { | ||
493 | kfree(sem); | ||
494 | return(NULL); | ||
495 | } | ||
496 | |||
497 | sem->litmus_lock.ops = ops; | ||
498 | spin_lock_init(&sem->lock); | ||
499 | sem->num_resources = num_resources; | ||
500 | |||
501 | for(i = 0; i < num_resources; ++i) | ||
502 | { | ||
503 | sem->queues[i].owner = NULL; | ||
504 | sem->queues[i].hp_waiter = NULL; | ||
505 | init_waitqueue_head(&sem->queues[i].wait); | ||
506 | sem->queues[i].count = 0; | ||
507 | } | ||
508 | |||
509 | sem->shortest_queue = &sem->queues[0]; | ||
510 | |||
511 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
512 | sem->aff_obs = NULL; | ||
513 | #endif | ||
514 | |||
515 | return &sem->litmus_lock; | ||
516 | } | ||
517 | |||
518 | |||
519 | |||
520 | |||
521 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
522 | |||
523 | static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica) | ||
524 | { | ||
525 | int gpu = replica % aff->nr_rsrc; | ||
526 | return gpu; | ||
527 | } | ||
528 | |||
529 | static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica) | ||
530 | { | ||
531 | int gpu = __replica_to_gpu(aff, replica) + aff->offset; | ||
532 | return gpu; | ||
533 | } | ||
534 | |||
535 | static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu) | ||
536 | { | ||
537 | int replica = gpu - aff->offset; | ||
538 | return replica; | ||
539 | } | ||
540 | |||
541 | |||
542 | int kfmlp_aff_obs_close(struct affinity_observer* obs) | ||
543 | { | ||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | void kfmlp_aff_obs_free(struct affinity_observer* obs) | ||
548 | { | ||
549 | struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs); | ||
550 | kfree(kfmlp_aff->nr_cur_users_on_rsrc); | ||
551 | kfree(kfmlp_aff->q_info); | ||
552 | kfree(kfmlp_aff); | ||
553 | } | ||
554 | |||
555 | static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops, | ||
556 | struct kfmlp_affinity_ops* kfmlp_ops, | ||
557 | void* __user args) | ||
558 | { | ||
559 | struct kfmlp_affinity* kfmlp_aff; | ||
560 | struct gpu_affinity_observer_args aff_args; | ||
561 | struct kfmlp_semaphore* sem; | ||
562 | int i; | ||
563 | unsigned long flags; | ||
564 | |||
565 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { | ||
566 | return(NULL); | ||
567 | } | ||
568 | if(__copy_from_user(&aff_args, args, sizeof(aff_args))) { | ||
569 | return(NULL); | ||
570 | } | ||
571 | |||
572 | sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od); | ||
573 | |||
574 | if(sem->litmus_lock.type != KFMLP_SEM) { | ||
575 | TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type); | ||
576 | return(NULL); | ||
577 | } | ||
578 | |||
579 | if((aff_args.nr_simult_users <= 0) || | ||
580 | (sem->num_resources%aff_args.nr_simult_users != 0)) { | ||
581 | TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " | ||
582 | "(%d) per replica. #replicas should be evenly divisible " | ||
583 | "by #simult_users.\n", | ||
584 | sem->litmus_lock.ident, | ||
585 | sem->num_resources, | ||
586 | aff_args.nr_simult_users); | ||
587 | return(NULL); | ||
588 | } | ||
589 | |||
590 | // if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { | ||
591 | // TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", | ||
592 | // NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); | ||
593 | //// return(NULL); | ||
594 | // } | ||
595 | |||
596 | kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL); | ||
597 | if(!kfmlp_aff) { | ||
598 | return(NULL); | ||
599 | } | ||
600 | |||
601 | kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL); | ||
602 | if(!kfmlp_aff->q_info) { | ||
603 | kfree(kfmlp_aff); | ||
604 | return(NULL); | ||
605 | } | ||
606 | |||
607 | kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL); | ||
608 | if(!kfmlp_aff->nr_cur_users_on_rsrc) { | ||
609 | kfree(kfmlp_aff->q_info); | ||
610 | kfree(kfmlp_aff); | ||
611 | return(NULL); | ||
612 | } | ||
613 | |||
614 | affinity_observer_new(&kfmlp_aff->obs, ops, &aff_args.obs); | ||
615 | |||
616 | kfmlp_aff->ops = kfmlp_ops; | ||
617 | kfmlp_aff->offset = aff_args.replica_to_gpu_offset; | ||
618 | kfmlp_aff->nr_simult = aff_args.nr_simult_users; | ||
619 | kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult; | ||
620 | |||
621 | memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc)); | ||
622 | |||
623 | for(i = 0; i < sem->num_resources; ++i) { | ||
624 | kfmlp_aff->q_info[i].q = &sem->queues[i]; | ||
625 | kfmlp_aff->q_info[i].estimated_len = 0; | ||
626 | |||
627 | // multiple q_info's will point to the same resource (aka GPU) if | ||
628 | // aff_args.nr_simult_users > 1 | ||
629 | kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)]; | ||
630 | } | ||
631 | |||
632 | // attach observer to the lock | ||
633 | spin_lock_irqsave(&sem->lock, flags); | ||
634 | sem->aff_obs = kfmlp_aff; | ||
635 | spin_unlock_irqrestore(&sem->lock, flags); | ||
636 | |||
637 | return &kfmlp_aff->obs; | ||
638 | } | ||
639 | |||
640 | |||
641 | |||
642 | |||
643 | static int gpu_replica_to_resource(struct kfmlp_affinity* aff, | ||
644 | struct kfmlp_queue* fq) { | ||
645 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
646 | return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq))); | ||
647 | } | ||
648 | |||
649 | |||
650 | // Smart KFMLP Affinity | ||
651 | |||
652 | //static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff) | ||
653 | //{ | ||
654 | // struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
655 | // struct kfmlp_queue_info *shortest = &aff->q_info[0]; | ||
656 | // int i; | ||
657 | // | ||
658 | // for(i = 1; i < sem->num_resources; ++i) { | ||
659 | // if(aff->q_info[i].estimated_len < shortest->estimated_len) { | ||
660 | // shortest = &aff->q_info[i]; | ||
661 | // } | ||
662 | // } | ||
663 | // | ||
664 | // return(shortest); | ||
665 | //} | ||
666 | |||
667 | struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) | ||
668 | { | ||
669 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
670 | lt_t min_len; | ||
671 | int min_nr_users; | ||
672 | struct kfmlp_queue_info *shortest; | ||
673 | struct kfmlp_queue *to_enqueue; | ||
674 | int i; | ||
675 | int affinity_gpu; | ||
676 | |||
677 | // simply pick the shortest queue if, we have no affinity, or we have | ||
678 | // affinity with the shortest | ||
679 | if(unlikely(tsk_rt(t)->last_gpu < 0)) { | ||
680 | affinity_gpu = aff->offset; // first gpu | ||
681 | TRACE_CUR("no affinity\n"); | ||
682 | } | ||
683 | else { | ||
684 | affinity_gpu = tsk_rt(t)->last_gpu; | ||
685 | } | ||
686 | |||
687 | // all things being equal, let's start with the queue with which we have | ||
688 | // affinity. this helps us maintain affinity even when we don't have | ||
689 | // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) | ||
690 | shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; | ||
691 | |||
692 | // if(shortest == aff->shortest_queue) { | ||
693 | // TRACE_CUR("special case: have affinity with shortest queue\n"); | ||
694 | // goto out; | ||
695 | // } | ||
696 | |||
697 | min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); | ||
698 | min_nr_users = *(shortest->nr_cur_users); | ||
699 | |||
700 | TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", | ||
701 | get_gpu_estimate(t, MIG_LOCAL), | ||
702 | kfmlp_get_idx(sem, shortest->q), | ||
703 | min_len); | ||
704 | |||
705 | for(i = 0; i < sem->num_resources; ++i) { | ||
706 | if(&aff->q_info[i] != shortest) { | ||
707 | |||
708 | lt_t est_len = | ||
709 | aff->q_info[i].estimated_len + | ||
710 | get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))); | ||
711 | |||
712 | // queue is smaller, or they're equal and the other has a smaller number | ||
713 | // of total users. | ||
714 | // | ||
715 | // tie-break on the shortest number of simult users. this only kicks in | ||
716 | // when there are more than 1 empty queues. | ||
717 | if((est_len < min_len) || | ||
718 | ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { | ||
719 | shortest = &aff->q_info[i]; | ||
720 | min_len = est_len; | ||
721 | min_nr_users = *(aff->q_info[i].nr_cur_users); | ||
722 | } | ||
723 | |||
724 | TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", | ||
725 | get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))), | ||
726 | kfmlp_get_idx(sem, aff->q_info[i].q), | ||
727 | est_len); | ||
728 | } | ||
729 | } | ||
730 | |||
731 | to_enqueue = shortest->q; | ||
732 | TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", | ||
733 | kfmlp_get_idx(sem, to_enqueue), | ||
734 | kfmlp_get_idx(sem, sem->shortest_queue)); | ||
735 | |||
736 | return to_enqueue; | ||
737 | } | ||
738 | |||
739 | struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from) | ||
740 | { | ||
741 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
742 | |||
743 | // For now, just steal highest priority waiter | ||
744 | // TODO: Implement affinity-aware stealing. | ||
745 | |||
746 | return kfmlp_select_hp_steal(sem, to_steal, to_steal_from); | ||
747 | } | ||
748 | |||
749 | |||
750 | void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
751 | { | ||
752 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
753 | int replica = kfmlp_get_idx(sem, fq); | ||
754 | int gpu = replica_to_gpu(aff, replica); | ||
755 | struct kfmlp_queue_info *info = &aff->q_info[replica]; | ||
756 | lt_t est_time; | ||
757 | lt_t est_len_before; | ||
758 | |||
759 | if(current == t) { | ||
760 | tsk_rt(t)->suspend_gpu_tracker_on_block = 1; | ||
761 | } | ||
762 | |||
763 | est_len_before = info->estimated_len; | ||
764 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
765 | info->estimated_len += est_time; | ||
766 | |||
767 | TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n", | ||
768 | kfmlp_get_idx(sem, info->q), | ||
769 | est_len_before, est_time, | ||
770 | info->estimated_len); | ||
771 | |||
772 | // if(aff->shortest_queue == info) { | ||
773 | // // we may no longer be the shortest | ||
774 | // aff->shortest_queue = kfmlp_aff_find_shortest(aff); | ||
775 | // | ||
776 | // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", | ||
777 | // kfmlp_get_idx(sem, aff->shortest_queue->q), | ||
778 | // aff->shortest_queue->q->count, | ||
779 | // aff->shortest_queue->estimated_len); | ||
780 | // } | ||
781 | } | ||
782 | |||
783 | void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
784 | { | ||
785 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
786 | int replica = kfmlp_get_idx(sem, fq); | ||
787 | int gpu = replica_to_gpu(aff, replica); | ||
788 | struct kfmlp_queue_info *info = &aff->q_info[replica]; | ||
789 | lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
790 | |||
791 | if(est_time > info->estimated_len) { | ||
792 | WARN_ON(1); | ||
793 | info->estimated_len = 0; | ||
794 | } | ||
795 | else { | ||
796 | info->estimated_len -= est_time; | ||
797 | } | ||
798 | |||
799 | TRACE_CUR("fq %d est len is now %llu\n", | ||
800 | kfmlp_get_idx(sem, info->q), | ||
801 | info->estimated_len); | ||
802 | |||
803 | // check to see if we're the shortest queue now. | ||
804 | // if((aff->shortest_queue != info) && | ||
805 | // (aff->shortest_queue->estimated_len > info->estimated_len)) { | ||
806 | // | ||
807 | // aff->shortest_queue = info; | ||
808 | // | ||
809 | // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", | ||
810 | // kfmlp_get_idx(sem, info->q), | ||
811 | // info->q->count, | ||
812 | // info->estimated_len); | ||
813 | // } | ||
814 | } | ||
815 | |||
816 | void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
817 | { | ||
818 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
819 | int replica = kfmlp_get_idx(sem, fq); | ||
820 | int gpu = replica_to_gpu(aff, replica); | ||
821 | |||
822 | tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration | ||
823 | |||
824 | TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n", | ||
825 | t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration); | ||
826 | |||
827 | // count the number or resource holders | ||
828 | ++(*(aff->q_info[replica].nr_cur_users)); | ||
829 | |||
830 | reg_nv_device(gpu, 1, t); // register | ||
831 | |||
832 | |||
833 | tsk_rt(t)->suspend_gpu_tracker_on_block = 0; | ||
834 | reset_gpu_tracker(t); | ||
835 | start_gpu_tracker(t); | ||
836 | } | ||
837 | |||
838 | void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
839 | { | ||
840 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
841 | int replica = kfmlp_get_idx(sem, fq); | ||
842 | int gpu = replica_to_gpu(aff, replica); | ||
843 | lt_t est_time; | ||
844 | |||
845 | stop_gpu_tracker(t); // stop the tracker before we do anything else. | ||
846 | |||
847 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
848 | |||
849 | tsk_rt(t)->last_gpu = gpu; | ||
850 | |||
851 | // count the number or resource holders | ||
852 | --(*(aff->q_info[replica].nr_cur_users)); | ||
853 | |||
854 | reg_nv_device(gpu, 0, t); // unregister | ||
855 | |||
856 | // update estimates | ||
857 | update_gpu_estimate(t, get_gpu_time(t)); | ||
858 | |||
859 | TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. estimated was %llu. diff is %d\n", | ||
860 | t->comm, t->pid, gpu, | ||
861 | get_gpu_time(t), | ||
862 | est_time, | ||
863 | (long long)get_gpu_time(t) - (long long)est_time); | ||
864 | } | ||
865 | |||
866 | struct kfmlp_affinity_ops gpu_kfmlp_affinity = | ||
867 | { | ||
868 | .advise_enqueue = gpu_kfmlp_advise_enqueue, | ||
869 | .advise_steal = gpu_kfmlp_advise_steal, | ||
870 | .notify_enqueue = gpu_kfmlp_notify_enqueue, | ||
871 | .notify_dequeue = gpu_kfmlp_notify_dequeue, | ||
872 | .notify_acquired = gpu_kfmlp_notify_acquired, | ||
873 | .notify_freed = gpu_kfmlp_notify_freed, | ||
874 | .replica_to_resource = gpu_replica_to_resource, | ||
875 | }; | ||
876 | |||
877 | struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops, | ||
878 | void* __user args) | ||
879 | { | ||
880 | return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args); | ||
881 | } | ||
882 | |||
883 | |||
884 | |||
885 | |||
886 | |||
887 | |||
888 | |||
889 | |||
890 | // Simple KFMLP Affinity (standard KFMLP with auto-gpu registration) | ||
891 | |||
892 | struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) | ||
893 | { | ||
894 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
895 | int min_count; | ||
896 | int min_nr_users; | ||
897 | struct kfmlp_queue_info *shortest; | ||
898 | struct kfmlp_queue *to_enqueue; | ||
899 | int i; | ||
900 | |||
901 | // TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n"); | ||
902 | |||
903 | shortest = &aff->q_info[0]; | ||
904 | min_count = shortest->q->count; | ||
905 | min_nr_users = *(shortest->nr_cur_users); | ||
906 | |||
907 | TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", | ||
908 | kfmlp_get_idx(sem, shortest->q), | ||
909 | shortest->q->count, | ||
910 | min_nr_users); | ||
911 | |||
912 | for(i = 1; i < sem->num_resources; ++i) { | ||
913 | int len = aff->q_info[i].q->count; | ||
914 | |||
915 | // queue is smaller, or they're equal and the other has a smaller number | ||
916 | // of total users. | ||
917 | // | ||
918 | // tie-break on the shortest number of simult users. this only kicks in | ||
919 | // when there are more than 1 empty queues. | ||
920 | if((len < min_count) || | ||
921 | ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { | ||
922 | shortest = &aff->q_info[i]; | ||
923 | min_count = shortest->q->count; | ||
924 | min_nr_users = *(aff->q_info[i].nr_cur_users); | ||
925 | } | ||
926 | |||
927 | TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", | ||
928 | kfmlp_get_idx(sem, aff->q_info[i].q), | ||
929 | aff->q_info[i].q->count, | ||
930 | *(aff->q_info[i].nr_cur_users)); | ||
931 | } | ||
932 | |||
933 | to_enqueue = shortest->q; | ||
934 | TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", | ||
935 | kfmlp_get_idx(sem, to_enqueue), | ||
936 | kfmlp_get_idx(sem, sem->shortest_queue)); | ||
937 | |||
938 | return to_enqueue; | ||
939 | } | ||
940 | |||
941 | struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from) | ||
942 | { | ||
943 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
944 | // TRACE_CUR("Simple GPU KFMLP advise_steal invoked\n"); | ||
945 | return kfmlp_select_hp_steal(sem, to_steal, to_steal_from); | ||
946 | } | ||
947 | |||
948 | void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
949 | { | ||
950 | // TRACE_CUR("Simple GPU KFMLP notify_enqueue invoked\n"); | ||
951 | } | ||
952 | |||
953 | void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
954 | { | ||
955 | // TRACE_CUR("Simple GPU KFMLP notify_dequeue invoked\n"); | ||
956 | } | ||
957 | |||
958 | void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
959 | { | ||
960 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
961 | int replica = kfmlp_get_idx(sem, fq); | ||
962 | int gpu = replica_to_gpu(aff, replica); | ||
963 | |||
964 | // TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n"); | ||
965 | |||
966 | // count the number or resource holders | ||
967 | ++(*(aff->q_info[replica].nr_cur_users)); | ||
968 | |||
969 | reg_nv_device(gpu, 1, t); // register | ||
970 | } | ||
971 | |||
972 | void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
973 | { | ||
974 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
975 | int replica = kfmlp_get_idx(sem, fq); | ||
976 | int gpu = replica_to_gpu(aff, replica); | ||
977 | |||
978 | // TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n"); | ||
979 | // count the number or resource holders | ||
980 | --(*(aff->q_info[replica].nr_cur_users)); | ||
981 | |||
982 | reg_nv_device(gpu, 0, t); // unregister | ||
983 | } | ||
984 | |||
985 | struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity = | ||
986 | { | ||
987 | .advise_enqueue = simple_gpu_kfmlp_advise_enqueue, | ||
988 | .advise_steal = simple_gpu_kfmlp_advise_steal, | ||
989 | .notify_enqueue = simple_gpu_kfmlp_notify_enqueue, | ||
990 | .notify_dequeue = simple_gpu_kfmlp_notify_dequeue, | ||
991 | .notify_acquired = simple_gpu_kfmlp_notify_acquired, | ||
992 | .notify_freed = simple_gpu_kfmlp_notify_freed, | ||
993 | .replica_to_resource = gpu_replica_to_resource, | ||
994 | }; | ||
995 | |||
996 | struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops, | ||
997 | void* __user args) | ||
998 | { | ||
999 | return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args); | ||
1000 | } | ||
1001 | |||
1002 | #endif | ||
1003 | |||