aboutsummaryrefslogtreecommitdiffstats
path: root/litmus/kfmlp_lock.c
diff options
context:
space:
mode:
Diffstat (limited to 'litmus/kfmlp_lock.c')
-rw-r--r--litmus/kfmlp_lock.c1003
1 files changed, 1003 insertions, 0 deletions
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
new file mode 100644
index 000000000000..785a095275e6
--- /dev/null
+++ b/litmus/kfmlp_lock.c
@@ -0,0 +1,1003 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/fdso.h>
7
8#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
9#include <litmus/gpu_affinity.h>
10#include <litmus/nvidia_info.h>
11#endif
12
13#include <litmus/kfmlp_lock.h>
14
15static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
16 struct kfmlp_queue* queue)
17{
18 return (queue - &sem->queues[0]);
19}
20
21static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
22 struct task_struct* holder)
23{
24 int i;
25 for(i = 0; i < sem->num_resources; ++i)
26 if(sem->queues[i].owner == holder)
27 return(&sem->queues[i]);
28 return(NULL);
29}
30
31/* caller is responsible for locking */
32static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
33 struct task_struct *skip)
34{
35 struct list_head *pos;
36 struct task_struct *queued, *found = NULL;
37
38 list_for_each(pos, &kqueue->wait.task_list) {
39 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
40 task_list)->private;
41
42 /* Compare task prios, find high prio task. */
43 //if (queued != skip && edf_higher_prio(queued, found))
44 if (queued != skip && litmus->compare(queued, found))
45 found = queued;
46 }
47 return found;
48}
49
50static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* sem,
51 struct kfmlp_queue* search_start)
52{
53 // we start our search at search_start instead of at the beginning of the
54 // queue list to load-balance across all resources.
55 struct kfmlp_queue* step = search_start;
56 struct kfmlp_queue* shortest = sem->shortest_queue;
57
58 do
59 {
60 step = (step+1 != &sem->queues[sem->num_resources]) ?
61 step+1 : &sem->queues[0];
62
63 if(step->count < shortest->count)
64 {
65 shortest = step;
66 if(step->count == 0)
67 break; /* can't get any shorter */
68 }
69
70 }while(step != search_start);
71
72 return(shortest);
73}
74
75
76static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
77 wait_queue_t** to_steal,
78 struct kfmlp_queue** to_steal_from)
79{
80 /* must hold sem->lock */
81
82 int i;
83
84 *to_steal = NULL;
85 *to_steal_from = NULL;
86
87 for(i = 0; i < sem->num_resources; ++i)
88 {
89 if( (sem->queues[i].count > 1) &&
90 ((*to_steal_from == NULL) ||
91 //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
92 (litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) )
93 {
94 *to_steal_from = &sem->queues[i];
95 }
96 }
97
98 if(*to_steal_from)
99 {
100 struct list_head *pos;
101 struct task_struct *target = (*to_steal_from)->hp_waiter;
102
103 TRACE_CUR("want to steal hp_waiter (%s/%d) from queue %d\n",
104 target->comm,
105 target->pid,
106 kfmlp_get_idx(sem, *to_steal_from));
107
108 list_for_each(pos, &(*to_steal_from)->wait.task_list)
109 {
110 wait_queue_t *node = list_entry(pos, wait_queue_t, task_list);
111 struct task_struct *queued = (struct task_struct*) node->private;
112 /* Compare task prios, find high prio task. */
113 if (queued == target)
114 {
115 *to_steal = node;
116
117 TRACE_CUR("steal: selected %s/%d from queue %d\n",
118 queued->comm, queued->pid,
119 kfmlp_get_idx(sem, *to_steal_from));
120
121 return queued;
122 }
123 }
124
125 TRACE_CUR("Could not find %s/%d in queue %d!!! THIS IS A BUG!\n",
126 target->comm,
127 target->pid,
128 kfmlp_get_idx(sem, *to_steal_from));
129 }
130
131 return NULL;
132}
133
134static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
135 struct kfmlp_queue *dst,
136 wait_queue_t *wait,
137 struct kfmlp_queue *src)
138{
139 struct task_struct* t = (struct task_struct*) wait->private;
140
141 __remove_wait_queue(&src->wait, wait);
142 --(src->count);
143
144 if(t == src->hp_waiter) {
145 src->hp_waiter = kfmlp_find_hp_waiter(src, NULL);
146
147 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
148 kfmlp_get_idx(sem, src),
149 (src->hp_waiter) ? src->hp_waiter->comm : "nil",
150 (src->hp_waiter) ? src->hp_waiter->pid : -1);
151
152 if(src->owner && tsk_rt(src->owner)->inh_task == t) {
153 litmus->decrease_prio(src->owner, src->hp_waiter);
154 }
155 }
156
157 if(sem->shortest_queue->count > src->count) {
158 sem->shortest_queue = src;
159 TRACE_CUR("queue %d is the shortest\n", kfmlp_get_idx(sem, sem->shortest_queue));
160 }
161
162#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
163 if(sem->aff_obs) {
164 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t);
165 }
166#endif
167
168 init_waitqueue_entry(wait, t);
169 __add_wait_queue_tail_exclusive(&dst->wait, wait);
170 ++(dst->count);
171
172 if(litmus->compare(t, dst->hp_waiter)) {
173 dst->hp_waiter = t;
174
175 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
176 kfmlp_get_idx(sem, dst),
177 t->comm, t->pid);
178
179 if(dst->owner && litmus->compare(t, dst->owner))
180 {
181 litmus->increase_prio(dst->owner, t);
182 }
183 }
184
185#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
186 if(sem->aff_obs) {
187 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t);
188 }
189#endif
190}
191
192
193int kfmlp_lock(struct litmus_lock* l)
194{
195 struct task_struct* t = current;
196 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
197 struct kfmlp_queue* my_queue = NULL;
198 wait_queue_t wait;
199 unsigned long flags;
200
201 if (!is_realtime(t))
202 return -EPERM;
203
204 spin_lock_irqsave(&sem->lock, flags);
205
206#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
207 if(sem->aff_obs) {
208 my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t);
209 }
210 if(!my_queue) {
211 my_queue = sem->shortest_queue;
212 }
213#else
214 my_queue = sem->shortest_queue;
215#endif
216
217 if (my_queue->owner) {
218 /* resource is not free => must suspend and wait */
219 TRACE_CUR("queue %d: Resource is not free => must suspend and wait. (queue size = %d)\n",
220 kfmlp_get_idx(sem, my_queue),
221 my_queue->count);
222
223 init_waitqueue_entry(&wait, t);
224
225 /* FIXME: interruptible would be nice some day */
226 set_task_state(t, TASK_UNINTERRUPTIBLE);
227
228 __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
229
230 TRACE_CUR("queue %d: hp_waiter is currently %s/%d\n",
231 kfmlp_get_idx(sem, my_queue),
232 (my_queue->hp_waiter) ? my_queue->hp_waiter->comm : "nil",
233 (my_queue->hp_waiter) ? my_queue->hp_waiter->pid : -1);
234
235 /* check if we need to activate priority inheritance */
236 //if (edf_higher_prio(t, my_queue->hp_waiter))
237 if (litmus->compare(t, my_queue->hp_waiter)) {
238 my_queue->hp_waiter = t;
239 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
240 kfmlp_get_idx(sem, my_queue),
241 t->comm, t->pid);
242
243 //if (edf_higher_prio(t, my_queue->owner))
244 if (litmus->compare(t, my_queue->owner)) {
245 litmus->increase_prio(my_queue->owner, my_queue->hp_waiter);
246 }
247 }
248
249 ++(my_queue->count);
250
251 if(my_queue == sem->shortest_queue) {
252 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
253 TRACE_CUR("queue %d is the shortest\n",
254 kfmlp_get_idx(sem, sem->shortest_queue));
255 }
256
257#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
258 if(sem->aff_obs) {
259 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
260 }
261#endif
262
263 /* release lock before sleeping */
264 spin_unlock_irqrestore(&sem->lock, flags);
265
266 /* We depend on the FIFO order. Thus, we don't need to recheck
267 * when we wake up; we are guaranteed to have the lock since
268 * there is only one wake up per release (or steal).
269 */
270 suspend_for_lock();
271
272
273 if(my_queue->owner == t) {
274 TRACE_CUR("queue %d: acquired through waiting\n",
275 kfmlp_get_idx(sem, my_queue));
276 }
277 else {
278 /* this case may happen if our wait entry was stolen
279 between queues. record where we went. */
280 my_queue = kfmlp_get_queue(sem, t);
281
282 BUG_ON(!my_queue);
283 TRACE_CUR("queue %d: acquired through stealing\n",
284 kfmlp_get_idx(sem, my_queue));
285 }
286 }
287 else {
288 TRACE_CUR("queue %d: acquired immediately\n",
289 kfmlp_get_idx(sem, my_queue));
290
291 my_queue->owner = t;
292
293 ++(my_queue->count);
294
295 if(my_queue == sem->shortest_queue) {
296 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
297 TRACE_CUR("queue %d is the shortest\n",
298 kfmlp_get_idx(sem, sem->shortest_queue));
299 }
300
301#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
302 if(sem->aff_obs) {
303 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
304 sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t);
305 }
306#endif
307
308 spin_unlock_irqrestore(&sem->lock, flags);
309 }
310
311
312#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
313 if(sem->aff_obs) {
314 return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue);
315 }
316#endif
317 return kfmlp_get_idx(sem, my_queue);
318}
319
320
321int kfmlp_unlock(struct litmus_lock* l)
322{
323 struct task_struct *t = current, *next;
324 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
325 struct kfmlp_queue *my_queue, *to_steal_from;
326 unsigned long flags;
327 int err = 0;
328
329 my_queue = kfmlp_get_queue(sem, t);
330
331 if (!my_queue) {
332 err = -EINVAL;
333 goto out;
334 }
335
336 spin_lock_irqsave(&sem->lock, flags);
337
338 TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue));
339
340 my_queue->owner = NULL; // clear ownership
341 --(my_queue->count);
342
343 if(my_queue->count < sem->shortest_queue->count)
344 {
345 sem->shortest_queue = my_queue;
346 TRACE_CUR("queue %d is the shortest\n",
347 kfmlp_get_idx(sem, sem->shortest_queue));
348 }
349
350#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
351 if(sem->aff_obs) {
352 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t);
353 sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t);
354 }
355#endif
356
357 /* we lose the benefit of priority inheritance (if any) */
358 if (tsk_rt(t)->inh_task)
359 litmus->decrease_prio(t, NULL);
360
361
362 /* check if there are jobs waiting for this resource */
363RETRY:
364 next = __waitqueue_remove_first(&my_queue->wait);
365 if (next) {
366 /* next becomes the resouce holder */
367 my_queue->owner = next;
368
369#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
370 if(sem->aff_obs) {
371 sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next);
372 }
373#endif
374
375 TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
376 kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
377
378 /* determine new hp_waiter if necessary */
379 if (next == my_queue->hp_waiter) {
380 TRACE_TASK(next, "was highest-prio waiter\n");
381 my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
382 if (my_queue->hp_waiter)
383 TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
384 else
385 TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
386 } else {
387 /* Well, if next is not the highest-priority waiter,
388 * then it ought to inherit the highest-priority
389 * waiter's priority. */
390 litmus->increase_prio(next, my_queue->hp_waiter);
391 }
392
393 /* wake up next */
394 wake_up_process(next);
395 }
396 else {
397 // TODO: put this stealing logic before we attempt to release
398 // our resource. (simplifies code and gets rid of ugly goto RETRY.
399 wait_queue_t *wait;
400
401 TRACE_CUR("queue %d: looking to steal someone...\n",
402 kfmlp_get_idx(sem, my_queue));
403
404#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
405 next = (sem->aff_obs) ?
406 sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) :
407 kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
408#else
409 next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
410#endif
411
412 if(next) {
413 TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
414 kfmlp_get_idx(sem, my_queue),
415 next->comm, next->pid,
416 kfmlp_get_idx(sem, to_steal_from));
417
418 kfmlp_steal_node(sem, my_queue, wait, to_steal_from);
419
420 goto RETRY; // will succeed this time.
421 }
422 else {
423 TRACE_CUR("queue %d: no one to steal.\n",
424 kfmlp_get_idx(sem, my_queue));
425 }
426 }
427
428 spin_unlock_irqrestore(&sem->lock, flags);
429
430out:
431 return err;
432}
433
434int kfmlp_close(struct litmus_lock* l)
435{
436 struct task_struct *t = current;
437 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
438 struct kfmlp_queue *my_queue;
439 unsigned long flags;
440
441 int owner;
442
443 spin_lock_irqsave(&sem->lock, flags);
444
445 my_queue = kfmlp_get_queue(sem, t);
446 owner = (my_queue) ? (my_queue->owner == t) : 0;
447
448 spin_unlock_irqrestore(&sem->lock, flags);
449
450 if (owner)
451 kfmlp_unlock(l);
452
453 return 0;
454}
455
456void kfmlp_free(struct litmus_lock* l)
457{
458 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
459 kfree(sem->queues);
460 kfree(sem);
461}
462
463
464
465struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
466{
467 struct kfmlp_semaphore* sem;
468 int num_resources = 0;
469 int i;
470
471 if(!access_ok(VERIFY_READ, args, sizeof(num_resources)))
472 {
473 return(NULL);
474 }
475 if(__copy_from_user(&num_resources, args, sizeof(num_resources)))
476 {
477 return(NULL);
478 }
479 if(num_resources < 1)
480 {
481 return(NULL);
482 }
483
484 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
485 if(!sem)
486 {
487 return(NULL);
488 }
489
490 sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
491 if(!sem->queues)
492 {
493 kfree(sem);
494 return(NULL);
495 }
496
497 sem->litmus_lock.ops = ops;
498 spin_lock_init(&sem->lock);
499 sem->num_resources = num_resources;
500
501 for(i = 0; i < num_resources; ++i)
502 {
503 sem->queues[i].owner = NULL;
504 sem->queues[i].hp_waiter = NULL;
505 init_waitqueue_head(&sem->queues[i].wait);
506 sem->queues[i].count = 0;
507 }
508
509 sem->shortest_queue = &sem->queues[0];
510
511#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
512 sem->aff_obs = NULL;
513#endif
514
515 return &sem->litmus_lock;
516}
517
518
519
520
521#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
522
523static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica)
524{
525 int gpu = replica % aff->nr_rsrc;
526 return gpu;
527}
528
529static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica)
530{
531 int gpu = __replica_to_gpu(aff, replica) + aff->offset;
532 return gpu;
533}
534
535static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu)
536{
537 int replica = gpu - aff->offset;
538 return replica;
539}
540
541
542int kfmlp_aff_obs_close(struct affinity_observer* obs)
543{
544 return 0;
545}
546
547void kfmlp_aff_obs_free(struct affinity_observer* obs)
548{
549 struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
550 kfree(kfmlp_aff->nr_cur_users_on_rsrc);
551 kfree(kfmlp_aff->q_info);
552 kfree(kfmlp_aff);
553}
554
555static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops,
556 struct kfmlp_affinity_ops* kfmlp_ops,
557 void* __user args)
558{
559 struct kfmlp_affinity* kfmlp_aff;
560 struct gpu_affinity_observer_args aff_args;
561 struct kfmlp_semaphore* sem;
562 int i;
563 unsigned long flags;
564
565 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
566 return(NULL);
567 }
568 if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
569 return(NULL);
570 }
571
572 sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
573
574 if(sem->litmus_lock.type != KFMLP_SEM) {
575 TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
576 return(NULL);
577 }
578
579 if((aff_args.nr_simult_users <= 0) ||
580 (sem->num_resources%aff_args.nr_simult_users != 0)) {
581 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
582 "(%d) per replica. #replicas should be evenly divisible "
583 "by #simult_users.\n",
584 sem->litmus_lock.ident,
585 sem->num_resources,
586 aff_args.nr_simult_users);
587 return(NULL);
588 }
589
590// if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
591// TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
592// NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
593//// return(NULL);
594// }
595
596 kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
597 if(!kfmlp_aff) {
598 return(NULL);
599 }
600
601 kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
602 if(!kfmlp_aff->q_info) {
603 kfree(kfmlp_aff);
604 return(NULL);
605 }
606
607 kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
608 if(!kfmlp_aff->nr_cur_users_on_rsrc) {
609 kfree(kfmlp_aff->q_info);
610 kfree(kfmlp_aff);
611 return(NULL);
612 }
613
614 affinity_observer_new(&kfmlp_aff->obs, ops, &aff_args.obs);
615
616 kfmlp_aff->ops = kfmlp_ops;
617 kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
618 kfmlp_aff->nr_simult = aff_args.nr_simult_users;
619 kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
620
621 memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
622
623 for(i = 0; i < sem->num_resources; ++i) {
624 kfmlp_aff->q_info[i].q = &sem->queues[i];
625 kfmlp_aff->q_info[i].estimated_len = 0;
626
627 // multiple q_info's will point to the same resource (aka GPU) if
628 // aff_args.nr_simult_users > 1
629 kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)];
630 }
631
632 // attach observer to the lock
633 spin_lock_irqsave(&sem->lock, flags);
634 sem->aff_obs = kfmlp_aff;
635 spin_unlock_irqrestore(&sem->lock, flags);
636
637 return &kfmlp_aff->obs;
638}
639
640
641
642
643static int gpu_replica_to_resource(struct kfmlp_affinity* aff,
644 struct kfmlp_queue* fq) {
645 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
646 return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq)));
647}
648
649
650// Smart KFMLP Affinity
651
652//static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
653//{
654// struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
655// struct kfmlp_queue_info *shortest = &aff->q_info[0];
656// int i;
657//
658// for(i = 1; i < sem->num_resources; ++i) {
659// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
660// shortest = &aff->q_info[i];
661// }
662// }
663//
664// return(shortest);
665//}
666
667struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
668{
669 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
670 lt_t min_len;
671 int min_nr_users;
672 struct kfmlp_queue_info *shortest;
673 struct kfmlp_queue *to_enqueue;
674 int i;
675 int affinity_gpu;
676
677 // simply pick the shortest queue if, we have no affinity, or we have
678 // affinity with the shortest
679 if(unlikely(tsk_rt(t)->last_gpu < 0)) {
680 affinity_gpu = aff->offset; // first gpu
681 TRACE_CUR("no affinity\n");
682 }
683 else {
684 affinity_gpu = tsk_rt(t)->last_gpu;
685 }
686
687 // all things being equal, let's start with the queue with which we have
688 // affinity. this helps us maintain affinity even when we don't have
689 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
690 shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
691
692// if(shortest == aff->shortest_queue) {
693// TRACE_CUR("special case: have affinity with shortest queue\n");
694// goto out;
695// }
696
697 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
698 min_nr_users = *(shortest->nr_cur_users);
699
700 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
701 get_gpu_estimate(t, MIG_LOCAL),
702 kfmlp_get_idx(sem, shortest->q),
703 min_len);
704
705 for(i = 0; i < sem->num_resources; ++i) {
706 if(&aff->q_info[i] != shortest) {
707
708 lt_t est_len =
709 aff->q_info[i].estimated_len +
710 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
711
712 // queue is smaller, or they're equal and the other has a smaller number
713 // of total users.
714 //
715 // tie-break on the shortest number of simult users. this only kicks in
716 // when there are more than 1 empty queues.
717 if((est_len < min_len) ||
718 ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
719 shortest = &aff->q_info[i];
720 min_len = est_len;
721 min_nr_users = *(aff->q_info[i].nr_cur_users);
722 }
723
724 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
725 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
726 kfmlp_get_idx(sem, aff->q_info[i].q),
727 est_len);
728 }
729 }
730
731 to_enqueue = shortest->q;
732 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
733 kfmlp_get_idx(sem, to_enqueue),
734 kfmlp_get_idx(sem, sem->shortest_queue));
735
736 return to_enqueue;
737}
738
739struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
740{
741 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
742
743 // For now, just steal highest priority waiter
744 // TODO: Implement affinity-aware stealing.
745
746 return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
747}
748
749
750void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
751{
752 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
753 int replica = kfmlp_get_idx(sem, fq);
754 int gpu = replica_to_gpu(aff, replica);
755 struct kfmlp_queue_info *info = &aff->q_info[replica];
756 lt_t est_time;
757 lt_t est_len_before;
758
759 if(current == t) {
760 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
761 }
762
763 est_len_before = info->estimated_len;
764 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
765 info->estimated_len += est_time;
766
767 TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
768 kfmlp_get_idx(sem, info->q),
769 est_len_before, est_time,
770 info->estimated_len);
771
772// if(aff->shortest_queue == info) {
773// // we may no longer be the shortest
774// aff->shortest_queue = kfmlp_aff_find_shortest(aff);
775//
776// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
777// kfmlp_get_idx(sem, aff->shortest_queue->q),
778// aff->shortest_queue->q->count,
779// aff->shortest_queue->estimated_len);
780// }
781}
782
783void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
784{
785 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
786 int replica = kfmlp_get_idx(sem, fq);
787 int gpu = replica_to_gpu(aff, replica);
788 struct kfmlp_queue_info *info = &aff->q_info[replica];
789 lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
790
791 if(est_time > info->estimated_len) {
792 WARN_ON(1);
793 info->estimated_len = 0;
794 }
795 else {
796 info->estimated_len -= est_time;
797 }
798
799 TRACE_CUR("fq %d est len is now %llu\n",
800 kfmlp_get_idx(sem, info->q),
801 info->estimated_len);
802
803 // check to see if we're the shortest queue now.
804// if((aff->shortest_queue != info) &&
805// (aff->shortest_queue->estimated_len > info->estimated_len)) {
806//
807// aff->shortest_queue = info;
808//
809// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
810// kfmlp_get_idx(sem, info->q),
811// info->q->count,
812// info->estimated_len);
813// }
814}
815
816void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
817{
818 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
819 int replica = kfmlp_get_idx(sem, fq);
820 int gpu = replica_to_gpu(aff, replica);
821
822 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
823
824 TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n",
825 t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
826
827 // count the number or resource holders
828 ++(*(aff->q_info[replica].nr_cur_users));
829
830 reg_nv_device(gpu, 1, t); // register
831
832
833 tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
834 reset_gpu_tracker(t);
835 start_gpu_tracker(t);
836}
837
838void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
839{
840 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
841 int replica = kfmlp_get_idx(sem, fq);
842 int gpu = replica_to_gpu(aff, replica);
843 lt_t est_time;
844
845 stop_gpu_tracker(t); // stop the tracker before we do anything else.
846
847 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
848
849 tsk_rt(t)->last_gpu = gpu;
850
851 // count the number or resource holders
852 --(*(aff->q_info[replica].nr_cur_users));
853
854 reg_nv_device(gpu, 0, t); // unregister
855
856 // update estimates
857 update_gpu_estimate(t, get_gpu_time(t));
858
859 TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. estimated was %llu. diff is %d\n",
860 t->comm, t->pid, gpu,
861 get_gpu_time(t),
862 est_time,
863 (long long)get_gpu_time(t) - (long long)est_time);
864}
865
866struct kfmlp_affinity_ops gpu_kfmlp_affinity =
867{
868 .advise_enqueue = gpu_kfmlp_advise_enqueue,
869 .advise_steal = gpu_kfmlp_advise_steal,
870 .notify_enqueue = gpu_kfmlp_notify_enqueue,
871 .notify_dequeue = gpu_kfmlp_notify_dequeue,
872 .notify_acquired = gpu_kfmlp_notify_acquired,
873 .notify_freed = gpu_kfmlp_notify_freed,
874 .replica_to_resource = gpu_replica_to_resource,
875};
876
877struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
878 void* __user args)
879{
880 return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args);
881}
882
883
884
885
886
887
888
889
890// Simple KFMLP Affinity (standard KFMLP with auto-gpu registration)
891
892struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
893{
894 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
895 int min_count;
896 int min_nr_users;
897 struct kfmlp_queue_info *shortest;
898 struct kfmlp_queue *to_enqueue;
899 int i;
900
901// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
902
903 shortest = &aff->q_info[0];
904 min_count = shortest->q->count;
905 min_nr_users = *(shortest->nr_cur_users);
906
907 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
908 kfmlp_get_idx(sem, shortest->q),
909 shortest->q->count,
910 min_nr_users);
911
912 for(i = 1; i < sem->num_resources; ++i) {
913 int len = aff->q_info[i].q->count;
914
915 // queue is smaller, or they're equal and the other has a smaller number
916 // of total users.
917 //
918 // tie-break on the shortest number of simult users. this only kicks in
919 // when there are more than 1 empty queues.
920 if((len < min_count) ||
921 ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
922 shortest = &aff->q_info[i];
923 min_count = shortest->q->count;
924 min_nr_users = *(aff->q_info[i].nr_cur_users);
925 }
926
927 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
928 kfmlp_get_idx(sem, aff->q_info[i].q),
929 aff->q_info[i].q->count,
930 *(aff->q_info[i].nr_cur_users));
931 }
932
933 to_enqueue = shortest->q;
934 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
935 kfmlp_get_idx(sem, to_enqueue),
936 kfmlp_get_idx(sem, sem->shortest_queue));
937
938 return to_enqueue;
939}
940
941struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
942{
943 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
944// TRACE_CUR("Simple GPU KFMLP advise_steal invoked\n");
945 return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
946}
947
948void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
949{
950// TRACE_CUR("Simple GPU KFMLP notify_enqueue invoked\n");
951}
952
953void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
954{
955// TRACE_CUR("Simple GPU KFMLP notify_dequeue invoked\n");
956}
957
958void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
959{
960 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
961 int replica = kfmlp_get_idx(sem, fq);
962 int gpu = replica_to_gpu(aff, replica);
963
964// TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n");
965
966 // count the number or resource holders
967 ++(*(aff->q_info[replica].nr_cur_users));
968
969 reg_nv_device(gpu, 1, t); // register
970}
971
972void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
973{
974 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
975 int replica = kfmlp_get_idx(sem, fq);
976 int gpu = replica_to_gpu(aff, replica);
977
978// TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n");
979 // count the number or resource holders
980 --(*(aff->q_info[replica].nr_cur_users));
981
982 reg_nv_device(gpu, 0, t); // unregister
983}
984
985struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
986{
987 .advise_enqueue = simple_gpu_kfmlp_advise_enqueue,
988 .advise_steal = simple_gpu_kfmlp_advise_steal,
989 .notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
990 .notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
991 .notify_acquired = simple_gpu_kfmlp_notify_acquired,
992 .notify_freed = simple_gpu_kfmlp_notify_freed,
993 .replica_to_resource = gpu_replica_to_resource,
994};
995
996struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
997 void* __user args)
998{
999 return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args);
1000}
1001
1002#endif
1003