aboutsummaryrefslogtreecommitdiffstats
path: root/litmus/ikglp_lock.c
diff options
context:
space:
mode:
Diffstat (limited to 'litmus/ikglp_lock.c')
-rw-r--r--litmus/ikglp_lock.c2976
1 files changed, 2976 insertions, 0 deletions
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
new file mode 100644
index 000000000000..a4ae74331782
--- /dev/null
+++ b/litmus/ikglp_lock.c
@@ -0,0 +1,2976 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/fdso.h>
7
8#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
9#include <litmus/gpu_affinity.h>
10#include <litmus/nvidia_info.h>
11#endif
12
13#include <litmus/ikglp_lock.h>
14
15// big signed value.
16#define IKGLP_INVAL_DISTANCE 0x7FFFFFFF
17
18int ikglp_max_heap_base_priority_order(struct binheap_node *a,
19 struct binheap_node *b)
20{
21 ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
22 ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
23
24 BUG_ON(!d_a);
25 BUG_ON(!d_b);
26
27 return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
28}
29
30int ikglp_min_heap_base_priority_order(struct binheap_node *a,
31 struct binheap_node *b)
32{
33 ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
34 ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
35
36 return litmus->__compare(d_b->task, BASE, d_a->task, BASE);
37}
38
39int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a,
40 struct binheap_node *b)
41{
42 ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node);
43 ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node);
44
45 return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
46}
47
48
49int ikglp_min_heap_donee_order(struct binheap_node *a,
50 struct binheap_node *b)
51{
52 struct task_struct *prio_a, *prio_b;
53
54 ikglp_donee_heap_node_t *d_a =
55 binheap_entry(a, ikglp_donee_heap_node_t, node);
56 ikglp_donee_heap_node_t *d_b =
57 binheap_entry(b, ikglp_donee_heap_node_t, node);
58
59 if(!d_a->donor_info) {
60 prio_a = d_a->task;
61 }
62 else {
63 prio_a = d_a->donor_info->task;
64 BUG_ON(d_a->task != d_a->donor_info->donee_info->task);
65 }
66
67 if(!d_b->donor_info) {
68 prio_b = d_b->task;
69 }
70 else {
71 prio_b = d_b->donor_info->task;
72 BUG_ON(d_b->task != d_b->donor_info->donee_info->task);
73 }
74
75 // note reversed order
76 return litmus->__compare(prio_b, BASE, prio_a, BASE);
77}
78
79
80
81static inline int ikglp_get_idx(struct ikglp_semaphore *sem,
82 struct fifo_queue *queue)
83{
84 return (queue - &sem->fifo_queues[0]);
85}
86
87static inline struct fifo_queue* ikglp_get_queue(struct ikglp_semaphore *sem,
88 struct task_struct *holder)
89{
90 int i;
91 for(i = 0; i < sem->nr_replicas; ++i)
92 if(sem->fifo_queues[i].owner == holder)
93 return(&sem->fifo_queues[i]);
94 return(NULL);
95}
96
97
98
99static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
100 struct task_struct *skip)
101{
102 struct list_head *pos;
103 struct task_struct *queued, *found = NULL;
104
105 list_for_each(pos, &kqueue->wait.task_list) {
106 queued = (struct task_struct*) list_entry(pos,
107 wait_queue_t, task_list)->private;
108
109 /* Compare task prios, find high prio task. */
110 if(queued != skip && litmus->compare(queued, found))
111 found = queued;
112 }
113 return found;
114}
115
116static struct fifo_queue* ikglp_find_shortest(struct ikglp_semaphore *sem,
117 struct fifo_queue *search_start)
118{
119 // we start our search at search_start instead of at the beginning of the
120 // queue list to load-balance across all resources.
121 struct fifo_queue* step = search_start;
122 struct fifo_queue* shortest = sem->shortest_fifo_queue;
123
124 do {
125 step = (step+1 != &sem->fifo_queues[sem->nr_replicas]) ?
126 step+1 : &sem->fifo_queues[0];
127
128 if(step->count < shortest->count) {
129 shortest = step;
130 if(step->count == 0)
131 break; /* can't get any shorter */
132 }
133
134 }while(step != search_start);
135
136 return(shortest);
137}
138
139static inline struct task_struct* ikglp_mth_highest(struct ikglp_semaphore *sem)
140{
141 return binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node)->task;
142}
143
144
145
146#if 0
147static void print_global_list(struct binheap_node* n, int depth)
148{
149 ikglp_heap_node_t *global_heap_node;
150 char padding[81] = " ";
151
152 if(n == NULL) {
153 TRACE_CUR("+-> %p\n", NULL);
154 return;
155 }
156
157 global_heap_node = binheap_entry(n, ikglp_heap_node_t, node);
158
159 if(depth*2 <= 80)
160 padding[depth*2] = '\0';
161
162 TRACE_CUR("%s+-> %s/%d\n",
163 padding,
164 global_heap_node->task->comm,
165 global_heap_node->task->pid);
166
167 if(n->left) print_global_list(n->left, depth+1);
168 if(n->right) print_global_list(n->right, depth+1);
169}
170
171static void print_donees(struct ikglp_semaphore *sem, struct binheap_node *n, int depth)
172{
173 ikglp_donee_heap_node_t *donee_node;
174 char padding[81] = " ";
175 struct task_struct* donor = NULL;
176
177 if(n == NULL) {
178 TRACE_CUR("+-> %p\n", NULL);
179 return;
180 }
181
182 donee_node = binheap_entry(n, ikglp_donee_heap_node_t, node);
183
184 if(depth*2 <= 80)
185 padding[depth*2] = '\0';
186
187 if(donee_node->donor_info) {
188 donor = donee_node->donor_info->task;
189 }
190
191 TRACE_CUR("%s+-> %s/%d (d: %s/%d) (fq: %d)\n",
192 padding,
193 donee_node->task->comm,
194 donee_node->task->pid,
195 (donor) ? donor->comm : "nil",
196 (donor) ? donor->pid : -1,
197 ikglp_get_idx(sem, donee_node->fq));
198
199 if(n->left) print_donees(sem, n->left, depth+1);
200 if(n->right) print_donees(sem, n->right, depth+1);
201}
202
203static void print_donors(struct binheap_node *n, int depth)
204{
205 ikglp_wait_state_t *donor_node;
206 char padding[81] = " ";
207
208 if(n == NULL) {
209 TRACE_CUR("+-> %p\n", NULL);
210 return;
211 }
212
213 donor_node = binheap_entry(n, ikglp_wait_state_t, node);
214
215 if(depth*2 <= 80)
216 padding[depth*2] = '\0';
217
218
219 TRACE_CUR("%s+-> %s/%d (donee: %s/%d)\n",
220 padding,
221 donor_node->task->comm,
222 donor_node->task->pid,
223 donor_node->donee_info->task->comm,
224 donor_node->donee_info->task->pid);
225
226 if(n->left) print_donors(n->left, depth+1);
227 if(n->right) print_donors(n->right, depth+1);
228}
229#endif
230
231static void ikglp_add_global_list(struct ikglp_semaphore *sem,
232 struct task_struct *t,
233 ikglp_heap_node_t *node)
234{
235
236
237 node->task = t;
238 INIT_BINHEAP_NODE(&node->node);
239
240 if(sem->top_m_size < sem->m) {
241 TRACE_CUR("Trivially adding %s/%d to top-m global list.\n",
242 t->comm, t->pid);
243// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
244// print_global_list(sem->top_m.root, 1);
245
246 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
247 ++(sem->top_m_size);
248
249// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
250// print_global_list(sem->top_m.root, 1);
251 }
252 else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
253 ikglp_heap_node_t *evicted =
254 binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node);
255
256 TRACE_CUR("Adding %s/%d to top-m and evicting %s/%d.\n",
257 t->comm, t->pid,
258 evicted->task->comm, evicted->task->pid);
259
260// TRACE_CUR("Not-Top-M Before:\n");
261// print_global_list(sem->not_top_m.root, 1);
262// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
263// print_global_list(sem->top_m.root, 1);
264
265
266 binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node);
267 INIT_BINHEAP_NODE(&evicted->node);
268 binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node);
269
270 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
271
272// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
273// print_global_list(sem->top_m.root, 1);
274// TRACE_CUR("Not-Top-M After:\n");
275// print_global_list(sem->not_top_m.root, 1);
276 }
277 else {
278 TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n",
279 t->comm, t->pid);
280// TRACE_CUR("Not-Top-M Before:\n");
281// print_global_list(sem->not_top_m.root, 1);
282
283 binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node);
284
285// TRACE_CUR("Not-Top-M After:\n");
286// print_global_list(sem->not_top_m.root, 1);
287 }
288}
289
290
291static void ikglp_del_global_list(struct ikglp_semaphore *sem,
292 struct task_struct *t,
293 ikglp_heap_node_t *node)
294{
295 BUG_ON(!binheap_is_in_heap(&node->node));
296
297 TRACE_CUR("Removing %s/%d from global list.\n", t->comm, t->pid);
298
299 if(binheap_is_in_this_heap(&node->node, &sem->top_m)) {
300 TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid);
301
302// TRACE_CUR("Not-Top-M Before:\n");
303// print_global_list(sem->not_top_m.root, 1);
304// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
305// print_global_list(sem->top_m.root, 1);
306
307
308 binheap_delete(&node->node, &sem->top_m);
309
310 if(!binheap_empty(&sem->not_top_m)) {
311 ikglp_heap_node_t *promoted =
312 binheap_top_entry(&sem->not_top_m, ikglp_heap_node_t, node);
313
314 TRACE_CUR("Promoting %s/%d to top-m\n",
315 promoted->task->comm, promoted->task->pid);
316
317 binheap_delete_root(&sem->not_top_m, ikglp_heap_node_t, node);
318 INIT_BINHEAP_NODE(&promoted->node);
319
320 binheap_add(&promoted->node, &sem->top_m, ikglp_heap_node_t, node);
321 }
322 else {
323 TRACE_CUR("No one to promote to top-m.\n");
324 --(sem->top_m_size);
325 }
326
327// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
328// print_global_list(sem->top_m.root, 1);
329// TRACE_CUR("Not-Top-M After:\n");
330// print_global_list(sem->not_top_m.root, 1);
331 }
332 else {
333 TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid);
334// TRACE_CUR("Not-Top-M Before:\n");
335// print_global_list(sem->not_top_m.root, 1);
336
337 binheap_delete(&node->node, &sem->not_top_m);
338
339// TRACE_CUR("Not-Top-M After:\n");
340// print_global_list(sem->not_top_m.root, 1);
341 }
342}
343
344
345static void ikglp_add_donees(struct ikglp_semaphore *sem,
346 struct fifo_queue *fq,
347 struct task_struct *t,
348 ikglp_donee_heap_node_t* node)
349{
350// TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid);
351// TRACE_CUR("donees Before:\n");
352// print_donees(sem, sem->donees.root, 1);
353
354 node->task = t;
355 node->donor_info = NULL;
356 node->fq = fq;
357 INIT_BINHEAP_NODE(&node->node);
358
359 binheap_add(&node->node, &sem->donees, ikglp_donee_heap_node_t, node);
360
361// TRACE_CUR("donees After:\n");
362// print_donees(sem, sem->donees.root, 1);
363}
364
365
366static void ikglp_refresh_owners_prio_increase(struct task_struct *t,
367 struct fifo_queue *fq,
368 struct ikglp_semaphore *sem,
369 unsigned long flags)
370{
371 // priority of 't' has increased (note: 't' might already be hp_waiter).
372 if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) {
373 struct task_struct *old_max_eff_prio;
374 struct task_struct *new_max_eff_prio;
375 struct task_struct *new_prio = NULL;
376 struct task_struct *owner = fq->owner;
377
378 if(fq->hp_waiter)
379 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
380 fq->hp_waiter->comm, fq->hp_waiter->pid);
381 else
382 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
383
384 if(owner)
385 {
386 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
387
388// TRACE_TASK(owner, "Heap Before:\n");
389// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
390
391 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
392
393 fq->hp_waiter = t;
394 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
395
396 binheap_decrease(&fq->nest.hp_binheap_node,
397 &tsk_rt(owner)->hp_blocked_tasks);
398
399// TRACE_TASK(owner, "Heap After:\n");
400// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
401
402 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
403
404 if(new_max_eff_prio != old_max_eff_prio) {
405 TRACE_TASK(t, "is new hp_waiter.\n");
406
407 if ((effective_priority(owner) == old_max_eff_prio) ||
408 (litmus->__compare(new_max_eff_prio, BASE,
409 owner, EFFECTIVE))){
410 new_prio = new_max_eff_prio;
411 }
412 }
413 else {
414 TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
415 }
416
417 if(new_prio) {
418 // set new inheritance and propagate
419 TRACE_TASK(t, "Effective priority changed for owner %s/%d to %s/%d\n",
420 owner->comm, owner->pid,
421 new_prio->comm, new_prio->pid);
422 litmus->nested_increase_prio(owner, new_prio, &sem->lock,
423 flags); // unlocks lock.
424 }
425 else {
426 TRACE_TASK(t, "No change in effective priority (is %s/%d). Propagation halted.\n",
427 new_max_eff_prio->comm, new_max_eff_prio->pid);
428 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
429 unlock_fine_irqrestore(&sem->lock, flags);
430 }
431 }
432 else {
433 fq->hp_waiter = t;
434 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
435
436 TRACE_TASK(t, "no owner.\n");
437 unlock_fine_irqrestore(&sem->lock, flags);
438 }
439 }
440 else {
441 TRACE_TASK(t, "hp_waiter is unaffected.\n");
442 unlock_fine_irqrestore(&sem->lock, flags);
443 }
444}
445
446// hp_waiter has decreased
447static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq,
448 struct ikglp_semaphore *sem,
449 unsigned long flags)
450{
451 struct task_struct *owner = fq->owner;
452
453 struct task_struct *old_max_eff_prio;
454 struct task_struct *new_max_eff_prio;
455
456 if(!owner) {
457 TRACE_CUR("No owner. Returning.\n");
458 unlock_fine_irqrestore(&sem->lock, flags);
459 return;
460 }
461
462 TRACE_CUR("ikglp_refresh_owners_prio_decrease\n");
463
464 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
465
466 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
467
468 binheap_delete(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
469 fq->nest.hp_waiter_eff_prio = fq->hp_waiter;
470 binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks,
471 struct nested_info, hp_binheap_node);
472
473 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
474
475 if((old_max_eff_prio != new_max_eff_prio) &&
476 (effective_priority(owner) == old_max_eff_prio))
477 {
478 // Need to set new effective_priority for owner
479 struct task_struct *decreased_prio;
480
481 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
482 ikglp_get_idx(sem, fq));
483
484 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
485 TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n",
486 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
487 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
488 owner->comm,
489 owner->pid,
490 ikglp_get_idx(sem, fq));
491
492 decreased_prio = new_max_eff_prio;
493 }
494 else {
495 TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of fq %d.\n",
496 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
497 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
498 owner->comm,
499 owner->pid,
500 ikglp_get_idx(sem, fq));
501
502 decreased_prio = NULL;
503 }
504
505 // beware: recursion
506 litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
507 }
508 else {
509 TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
510 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
511 unlock_fine_irqrestore(&sem->lock, flags);
512 }
513}
514
515
516static void ikglp_remove_donation_from_owner(struct binheap_node *n,
517 struct fifo_queue *fq,
518 struct ikglp_semaphore *sem,
519 unsigned long flags)
520{
521 struct task_struct *owner = fq->owner;
522
523 struct task_struct *old_max_eff_prio;
524 struct task_struct *new_max_eff_prio;
525
526 BUG_ON(!owner);
527
528 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
529
530 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
531
532 binheap_delete(n, &tsk_rt(owner)->hp_blocked_tasks);
533
534 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
535
536 if((old_max_eff_prio != new_max_eff_prio) &&
537 (effective_priority(owner) == old_max_eff_prio))
538 {
539 // Need to set new effective_priority for owner
540 struct task_struct *decreased_prio;
541
542 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
543 ikglp_get_idx(sem, fq));
544
545 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
546 TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n",
547 ikglp_get_idx(sem, fq));
548 decreased_prio = new_max_eff_prio;
549 }
550 else {
551 TRACE_CUR("has lesser base priority than base priority of owner of fq %d.\n",
552 ikglp_get_idx(sem, fq));
553 decreased_prio = NULL;
554 }
555
556 // beware: recursion
557 litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
558 }
559 else {
560 TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
561 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
562 unlock_fine_irqrestore(&sem->lock, flags);
563 }
564}
565
566static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t,
567 struct binheap_node *n)
568{
569 struct task_struct *old_max_eff_prio;
570 struct task_struct *new_max_eff_prio;
571
572 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
573
574 old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
575
576 binheap_delete(n, &tsk_rt(t)->hp_blocked_tasks);
577
578 new_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
579
580 if((old_max_eff_prio != new_max_eff_prio) &&
581 (effective_priority(t) == old_max_eff_prio))
582 {
583 // Need to set new effective_priority for owner
584 struct task_struct *decreased_prio;
585
586 if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) {
587 decreased_prio = new_max_eff_prio;
588 }
589 else {
590 decreased_prio = NULL;
591 }
592
593 tsk_rt(t)->inh_task = decreased_prio;
594 }
595
596 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
597}
598
599static void ikglp_get_immediate(struct task_struct* t,
600 struct fifo_queue *fq,
601 struct ikglp_semaphore *sem,
602 unsigned long flags)
603{
604 // resource available now
605 TRACE_CUR("queue %d: acquired immediately\n", ikglp_get_idx(sem, fq));
606
607 fq->owner = t;
608
609 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
610 binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
611 struct nested_info, hp_binheap_node);
612 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
613
614 ++(fq->count);
615
616 ikglp_add_global_list(sem, t, &fq->global_heap_node);
617 ikglp_add_donees(sem, fq, t, &fq->donee_heap_node);
618
619 sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue);
620
621#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
622 if(sem->aff_obs) {
623 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
624 sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t);
625 }
626#endif
627
628 unlock_fine_irqrestore(&sem->lock, flags);
629}
630
631
632
633
634
635static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem,
636 struct fifo_queue* fq,
637 struct task_struct* t,
638 wait_queue_t *wait,
639 ikglp_heap_node_t *global_heap_node,
640 ikglp_donee_heap_node_t *donee_heap_node)
641{
642 /* resource is not free => must suspend and wait */
643 TRACE_TASK(t, "Enqueuing on fq %d.\n",
644 ikglp_get_idx(sem, fq));
645
646 init_waitqueue_entry(wait, t);
647
648 __add_wait_queue_tail_exclusive(&fq->wait, wait);
649
650 ++(fq->count);
651 ++(sem->nr_in_fifos);
652
653 // update global list.
654 if(likely(global_heap_node)) {
655 if(binheap_is_in_heap(&global_heap_node->node)) {
656 WARN_ON(1);
657 ikglp_del_global_list(sem, t, global_heap_node);
658 }
659 ikglp_add_global_list(sem, t, global_heap_node);
660 }
661 // update donor eligiblity list.
662 if(likely(donee_heap_node)) {
663// if(binheap_is_in_heap(&donee_heap_node->node)) {
664// WARN_ON(1);
665// }
666 ikglp_add_donees(sem, fq, t, donee_heap_node);
667 }
668
669 if(sem->shortest_fifo_queue == fq) {
670 sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq);
671 }
672
673#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
674 if(sem->aff_obs) {
675 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
676 }
677#endif
678
679 TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq));
680}
681
682
683static void ikglp_enqueue_on_fq(
684 struct ikglp_semaphore *sem,
685 struct fifo_queue *fq,
686 ikglp_wait_state_t *wait,
687 unsigned long flags)
688{
689 /* resource is not free => must suspend and wait */
690 TRACE_TASK(wait->task, "queue %d: Resource is not free => must suspend and wait.\n",
691 ikglp_get_idx(sem, fq));
692
693 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
694 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
695
696 __ikglp_enqueue_on_fq(sem, fq, wait->task, &wait->fq_node,
697 &wait->global_heap_node, &wait->donee_heap_node);
698
699 ikglp_refresh_owners_prio_increase(wait->task, fq, sem, flags); // unlocks sem->lock
700}
701
702
703static void __ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
704 ikglp_wait_state_t *wait)
705{
706 TRACE_TASK(wait->task, "goes to PQ.\n");
707
708 wait->pq_node.task = wait->task; // copy over task (little redundant...)
709
710 binheap_add(&wait->pq_node.node, &sem->priority_queue,
711 ikglp_heap_node_t, node);
712}
713
714static void ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
715 ikglp_wait_state_t *wait)
716{
717 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
718 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
719 INIT_BINHEAP_NODE(&wait->pq_node.node);
720
721 __ikglp_enqueue_on_pq(sem, wait);
722}
723
724static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
725 ikglp_wait_state_t* wait,
726 unsigned long flags)
727{
728 struct task_struct *t = wait->task;
729 ikglp_donee_heap_node_t *donee_node = NULL;
730 struct task_struct *donee;
731
732 struct task_struct *old_max_eff_prio;
733 struct task_struct *new_max_eff_prio;
734 struct task_struct *new_prio = NULL;
735
736 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
737 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
738 INIT_BINHEAP_NODE(&wait->pq_node.node);
739 INIT_BINHEAP_NODE(&wait->node);
740
741// TRACE_CUR("Adding %s/%d as donor.\n", t->comm, t->pid);
742// TRACE_CUR("donors Before:\n");
743// print_donors(sem->donors.root, 1);
744
745 // Add donor to the global list.
746 ikglp_add_global_list(sem, t, &wait->global_heap_node);
747
748 // Select a donee
749#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
750 donee_node = (sem->aff_obs) ?
751 sem->aff_obs->ops->advise_donee_selection(sem->aff_obs, t) :
752 binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
753#else
754 donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
755#endif
756
757 donee = donee_node->task;
758
759 TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid);
760
761 TRACE_CUR("Temporarily removing %s/%d to donee list.\n",
762 donee->comm, donee->pid);
763// TRACE_CUR("donees Before:\n");
764// print_donees(sem, sem->donees.root, 1);
765
766 //binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly
767 binheap_delete(&donee_node->node, &sem->donees);
768
769// TRACE_CUR("donees After:\n");
770// print_donees(sem, sem->donees.root, 1);
771
772
773 wait->donee_info = donee_node;
774
775 // Add t to donor heap.
776 binheap_add(&wait->node, &sem->donors, ikglp_wait_state_t, node);
777
778 // Now adjust the donee's priority.
779
780 // Lock the donee's inheritance heap.
781 raw_spin_lock(&tsk_rt(donee)->hp_blocked_tasks_lock);
782
783 old_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
784
785 if(donee_node->donor_info) {
786 // Steal donation relation. Evict old donor to PQ.
787
788 // Remove old donor from donor heap
789 ikglp_wait_state_t *old_wait = donee_node->donor_info;
790 struct task_struct *old_donor = old_wait->task;
791
792 TRACE_TASK(t, "Donee (%s/%d) had donor %s/%d. Moving old donor to PQ.\n",
793 donee->comm, donee->pid, old_donor->comm, old_donor->pid);
794
795 binheap_delete(&old_wait->node, &sem->donors);
796
797 // Remove donation from donee's inheritance heap.
798 binheap_delete(&old_wait->prio_donation.hp_binheap_node,
799 &tsk_rt(donee)->hp_blocked_tasks);
800 // WARNING: have not updated inh_prio!
801
802 // Add old donor to PQ.
803 __ikglp_enqueue_on_pq(sem, old_wait);
804
805 // Remove old donor from the global heap.
806 ikglp_del_global_list(sem, old_donor, &old_wait->global_heap_node);
807 }
808
809 // Add back donee's node to the donees heap with increased prio
810 donee_node->donor_info = wait;
811 INIT_BINHEAP_NODE(&donee_node->node);
812
813
814 TRACE_CUR("Adding %s/%d back to donee list.\n", donee->comm, donee->pid);
815// TRACE_CUR("donees Before:\n");
816// print_donees(sem, sem->donees.root, 1);
817
818 binheap_add(&donee_node->node, &sem->donees, ikglp_donee_heap_node_t, node);
819
820// TRACE_CUR("donees After:\n");
821// print_donees(sem, sem->donees.root, 1);
822
823 // Add an inheritance/donation to the donee's inheritance heap.
824 wait->prio_donation.lock = (struct litmus_lock*)sem;
825 wait->prio_donation.hp_waiter_eff_prio = t;
826 wait->prio_donation.hp_waiter_ptr = NULL;
827 INIT_BINHEAP_NODE(&wait->prio_donation.hp_binheap_node);
828
829 binheap_add(&wait->prio_donation.hp_binheap_node,
830 &tsk_rt(donee)->hp_blocked_tasks,
831 struct nested_info, hp_binheap_node);
832
833 new_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
834
835 if(new_max_eff_prio != old_max_eff_prio) {
836 if ((effective_priority(donee) == old_max_eff_prio) ||
837 (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){
838 TRACE_TASK(t, "Donation increases %s/%d's effective priority\n",
839 donee->comm, donee->pid);
840 new_prio = new_max_eff_prio;
841 }
842// else {
843// // should be bug. donor would not be in top-m.
844// TRACE_TASK(t, "Donation is not greater than base prio of %s/%d?\n", donee->comm, donee->pid);
845// WARN_ON(1);
846// }
847// }
848// else {
849// // should be bug. donor would not be in top-m.
850// TRACE_TASK(t, "No change in %s/%d's inheritance heap?\n", donee->comm, donee->pid);
851// WARN_ON(1);
852 }
853
854 if(new_prio) {
855 struct fifo_queue *donee_fq = donee_node->fq;
856
857 if(donee != donee_fq->owner) {
858 TRACE_TASK(t, "%s/%d is not the owner. Propagating priority to owner %s/%d.\n",
859 donee->comm, donee->pid,
860 donee_fq->owner->comm, donee_fq->owner->pid);
861
862 raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
863 ikglp_refresh_owners_prio_increase(donee, donee_fq, sem, flags); // unlocks sem->lock
864 }
865 else {
866 TRACE_TASK(t, "%s/%d is the owner. Progatating priority immediatly.\n",
867 donee->comm, donee->pid);
868 litmus->nested_increase_prio(donee, new_prio, &sem->lock, flags); // unlocks sem->lock and donee's heap lock
869 }
870 }
871 else {
872 TRACE_TASK(t, "No change in effective priority (it is %d/%s). BUG?\n",
873 new_max_eff_prio->comm, new_max_eff_prio->pid);
874 raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
875 unlock_fine_irqrestore(&sem->lock, flags);
876 }
877
878
879// TRACE_CUR("donors After:\n");
880// print_donors(sem->donors.root, 1);
881}
882
883int ikglp_lock(struct litmus_lock* l)
884{
885 struct task_struct* t = current;
886 struct ikglp_semaphore *sem = ikglp_from_lock(l);
887 unsigned long flags = 0, real_flags;
888 struct fifo_queue *fq = NULL;
889 int replica = -EINVAL;
890
891#ifdef CONFIG_LITMUS_DGL_SUPPORT
892 raw_spinlock_t *dgl_lock;
893#endif
894
895 ikglp_wait_state_t wait;
896
897 if (!is_realtime(t))
898 return -EPERM;
899
900#ifdef CONFIG_LITMUS_DGL_SUPPORT
901 dgl_lock = litmus->get_dgl_spinlock(t);
902#endif
903
904 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
905
906 lock_global_irqsave(dgl_lock, flags);
907 lock_fine_irqsave(&sem->lock, flags);
908
909 if(sem->nr_in_fifos < sem->m) {
910 // enqueue somwhere
911#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
912 fq = (sem->aff_obs) ?
913 sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
914 sem->shortest_fifo_queue;
915#else
916 fq = sem->shortest_fifo_queue;
917#endif
918 if(fq->count == 0) {
919 // take available resource
920 replica = ikglp_get_idx(sem, fq);
921
922 ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
923
924 unlock_global_irqrestore(dgl_lock, flags);
925 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
926 goto acquired;
927 }
928 else {
929 wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
930
931 tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
932 mb();
933
934 /* FIXME: interruptible would be nice some day */
935 set_task_state(t, TASK_UNINTERRUPTIBLE);
936
937 ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
938 }
939 }
940 else {
941 // donor!
942 wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
943
944 tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
945 mb();
946
947 /* FIXME: interruptible would be nice some day */
948 set_task_state(t, TASK_UNINTERRUPTIBLE);
949
950 if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
951 // enqueue on PQ
952 ikglp_enqueue_on_pq(sem, &wait);
953 unlock_fine_irqrestore(&sem->lock, flags);
954 }
955 else {
956 // enqueue as donor
957 ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock
958 }
959 }
960
961 unlock_global_irqrestore(dgl_lock, flags);
962 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
963
964 TS_LOCK_SUSPEND;
965
966 suspend_for_lock();
967
968 TS_LOCK_RESUME;
969
970 fq = ikglp_get_queue(sem, t);
971 BUG_ON(!fq);
972
973 replica = ikglp_get_idx(sem, fq);
974
975acquired:
976 TRACE_CUR("Acquired lock %d, queue %d\n",
977 l->ident, replica);
978
979#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
980 if(sem->aff_obs) {
981 return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
982 }
983#endif
984
985 return replica;
986}
987
988//int ikglp_lock(struct litmus_lock* l)
989//{
990// struct task_struct* t = current;
991// struct ikglp_semaphore *sem = ikglp_from_lock(l);
992// unsigned long flags = 0, real_flags;
993// struct fifo_queue *fq = NULL;
994// int replica = -EINVAL;
995//
996//#ifdef CONFIG_LITMUS_DGL_SUPPORT
997// raw_spinlock_t *dgl_lock;
998//#endif
999//
1000// ikglp_wait_state_t wait;
1001//
1002// if (!is_realtime(t))
1003// return -EPERM;
1004//
1005//#ifdef CONFIG_LITMUS_DGL_SUPPORT
1006// dgl_lock = litmus->get_dgl_spinlock(t);
1007//#endif
1008//
1009// raw_spin_lock_irqsave(&sem->real_lock, real_flags);
1010//
1011// lock_global_irqsave(dgl_lock, flags);
1012// lock_fine_irqsave(&sem->lock, flags);
1013//
1014//
1015//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1016// fq = (sem->aff_obs) ?
1017// sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
1018// sem->shortest_fifo_queue;
1019//#else
1020// fq = sem->shortest_fifo_queue;
1021//#endif
1022//
1023// if(fq->count == 0) {
1024// // take available resource
1025// replica = ikglp_get_idx(sem, fq);
1026//
1027// ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
1028//
1029// unlock_global_irqrestore(dgl_lock, flags);
1030// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1031// }
1032// else
1033// {
1034// // we have to suspend.
1035//
1036// wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
1037//
1038// tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
1039// mb();
1040//
1041// /* FIXME: interruptible would be nice some day */
1042// set_task_state(t, TASK_UNINTERRUPTIBLE);
1043//
1044// if(fq->count < sem->max_fifo_len) {
1045// // enqueue on fq
1046// ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
1047// }
1048// else {
1049//
1050// TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n");
1051//
1052// // no room in fifos. Go to PQ or donors.
1053//
1054// if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
1055// // enqueue on PQ
1056// ikglp_enqueue_on_pq(sem, &wait);
1057// unlock_fine_irqrestore(&sem->lock, flags);
1058// }
1059// else {
1060// // enqueue as donor
1061// ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock
1062// }
1063// }
1064//
1065// unlock_global_irqrestore(dgl_lock, flags);
1066// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1067//
1068// TS_LOCK_SUSPEND;
1069//
1070// schedule();
1071//
1072// TS_LOCK_RESUME;
1073//
1074// fq = ikglp_get_queue(sem, t);
1075// BUG_ON(!fq);
1076//
1077// replica = ikglp_get_idx(sem, fq);
1078// }
1079//
1080// TRACE_CUR("Acquired lock %d, queue %d\n",
1081// l->ident, replica);
1082//
1083//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1084// if(sem->aff_obs) {
1085// return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
1086// }
1087//#endif
1088//
1089// return replica;
1090//}
1091
1092static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem,
1093 struct fifo_queue *fq,
1094 ikglp_wait_state_t *donor_info)
1095{
1096 struct task_struct *t = donor_info->task;
1097
1098 TRACE_CUR("Donor %s/%d being moved to fq %d\n",
1099 t->comm,
1100 t->pid,
1101 ikglp_get_idx(sem, fq));
1102
1103 binheap_delete(&donor_info->node, &sem->donors);
1104
1105 __ikglp_enqueue_on_fq(sem, fq, t,
1106 &donor_info->fq_node,
1107 NULL, // already in global_list, so pass null to prevent adding 2nd time.
1108 &donor_info->donee_heap_node);
1109
1110 // warning:
1111 // ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
1112}
1113
1114static void ikglp_move_pq_to_fq(struct ikglp_semaphore *sem,
1115 struct fifo_queue *fq,
1116 ikglp_wait_state_t *wait)
1117{
1118 struct task_struct *t = wait->task;
1119
1120 TRACE_CUR("PQ request %s/%d being moved to fq %d\n",
1121 t->comm,
1122 t->pid,
1123 ikglp_get_idx(sem, fq));
1124
1125 binheap_delete(&wait->pq_node.node, &sem->priority_queue);
1126
1127 __ikglp_enqueue_on_fq(sem, fq, t,
1128 &wait->fq_node,
1129 &wait->global_heap_node,
1130 &wait->donee_heap_node);
1131 // warning:
1132 // ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
1133}
1134
1135static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal(
1136 struct ikglp_semaphore* sem)
1137{
1138 /* must hold sem->lock */
1139
1140 struct fifo_queue *fq = NULL;
1141 struct list_head *pos;
1142 struct task_struct *queued;
1143 int i;
1144
1145 for(i = 0; i < sem->nr_replicas; ++i) {
1146 if( (sem->fifo_queues[i].count > 1) &&
1147 (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) {
1148
1149 TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n",
1150 ikglp_get_idx(sem, &sem->fifo_queues[i]),
1151 sem->fifo_queues[i].hp_waiter->comm,
1152 sem->fifo_queues[i].hp_waiter->pid,
1153 (fq) ? ikglp_get_idx(sem, fq) : -1,
1154 (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->comm : "nil") : "nilXX",
1155 (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->pid : -1) : -2);
1156
1157 fq = &sem->fifo_queues[i];
1158
1159 WARN_ON(!(fq->hp_waiter));
1160 }
1161 }
1162
1163 if(fq) {
1164 struct task_struct *max_hp = fq->hp_waiter;
1165 ikglp_wait_state_t* ret = NULL;
1166
1167 TRACE_CUR("Searching for %s/%d on fq %d\n",
1168 max_hp->comm,
1169 max_hp->pid,
1170 ikglp_get_idx(sem, fq));
1171
1172 BUG_ON(!max_hp);
1173
1174 list_for_each(pos, &fq->wait.task_list) {
1175 wait_queue_t *wait = list_entry(pos, wait_queue_t, task_list);
1176
1177 queued = (struct task_struct*) wait->private;
1178
1179 TRACE_CUR("fq %d entry: %s/%d\n",
1180 ikglp_get_idx(sem, fq),
1181 queued->comm,
1182 queued->pid);
1183
1184 /* Compare task prios, find high prio task. */
1185 if (queued == max_hp) {
1186 TRACE_CUR("Found it!\n");
1187 ret = container_of(wait, ikglp_wait_state_t, fq_node);
1188 }
1189 }
1190
1191 WARN_ON(!ret);
1192 return ret;
1193 }
1194
1195 return(NULL);
1196}
1197
1198static void ikglp_steal_to_fq(struct ikglp_semaphore *sem,
1199 struct fifo_queue *fq,
1200 ikglp_wait_state_t *fq_wait)
1201{
1202 struct task_struct *t = fq_wait->task;
1203 struct fifo_queue *fq_steal = fq_wait->donee_heap_node.fq;
1204
1205 TRACE_CUR("FQ request %s/%d being moved to fq %d\n",
1206 t->comm,
1207 t->pid,
1208 ikglp_get_idx(sem, fq));
1209
1210 fq_wait->donee_heap_node.fq = fq; // just to be safe
1211
1212
1213 __remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node);
1214 --(fq_steal->count);
1215
1216#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1217 if(sem->aff_obs) {
1218 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t);
1219 }
1220#endif
1221
1222 if(t == fq_steal->hp_waiter) {
1223 fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL);
1224 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1225 ikglp_get_idx(sem, fq_steal),
1226 (fq_steal->hp_waiter) ? fq_steal->hp_waiter->comm : "nil",
1227 (fq_steal->hp_waiter) ? fq_steal->hp_waiter->pid : -1);
1228 }
1229
1230
1231 // Update shortest.
1232 if(fq_steal->count < sem->shortest_fifo_queue->count) {
1233 sem->shortest_fifo_queue = fq_steal;
1234 }
1235
1236 __ikglp_enqueue_on_fq(sem, fq, t,
1237 &fq_wait->fq_node,
1238 NULL,
1239 NULL);
1240
1241 // warning: We have not checked the priority inheritance of fq's owner yet.
1242}
1243
1244
1245static void ikglp_migrate_fq_to_owner_heap_nodes(struct ikglp_semaphore *sem,
1246 struct fifo_queue *fq,
1247 ikglp_wait_state_t *old_wait)
1248{
1249 struct task_struct *t = old_wait->task;
1250
1251 BUG_ON(old_wait->donee_heap_node.fq != fq);
1252
1253 TRACE_TASK(t, "Migrating wait_state to memory of queue %d.\n",
1254 ikglp_get_idx(sem, fq));
1255
1256 // need to migrate global_heap_node and donee_heap_node off of the stack
1257 // to the nodes allocated for the owner of this fq.
1258
1259 // TODO: Enhance binheap() to perform this operation in place.
1260
1261 ikglp_del_global_list(sem, t, &old_wait->global_heap_node); // remove
1262 fq->global_heap_node = old_wait->global_heap_node; // copy
1263 ikglp_add_global_list(sem, t, &fq->global_heap_node); // re-add
1264
1265 binheap_delete(&old_wait->donee_heap_node.node, &sem->donees); // remove
1266 fq->donee_heap_node = old_wait->donee_heap_node; // copy
1267
1268 if(fq->donee_heap_node.donor_info) {
1269 // let donor know that our location has changed
1270 BUG_ON(fq->donee_heap_node.donor_info->donee_info->task != t); // validate cross-link
1271 fq->donee_heap_node.donor_info->donee_info = &fq->donee_heap_node;
1272 }
1273 INIT_BINHEAP_NODE(&fq->donee_heap_node.node);
1274 binheap_add(&fq->donee_heap_node.node, &sem->donees,
1275 ikglp_donee_heap_node_t, node); // re-add
1276}
1277
1278int ikglp_unlock(struct litmus_lock* l)
1279{
1280 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1281 struct task_struct *t = current;
1282 struct task_struct *donee = NULL;
1283 struct task_struct *next = NULL;
1284 struct task_struct *new_on_fq = NULL;
1285 struct fifo_queue *fq_of_new_on_fq = NULL;
1286
1287 ikglp_wait_state_t *other_donor_info = NULL;
1288 struct fifo_queue *to_steal = NULL;
1289 int need_steal_prio_reeval = 0;
1290 struct fifo_queue *fq;
1291
1292#ifdef CONFIG_LITMUS_DGL_SUPPORT
1293 raw_spinlock_t *dgl_lock;
1294#endif
1295
1296 unsigned long flags = 0, real_flags;
1297
1298 int err = 0;
1299
1300 fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner.
1301
1302 if (!fq) {
1303 err = -EINVAL;
1304 goto out;
1305 }
1306
1307#ifdef CONFIG_LITMUS_DGL_SUPPORT
1308 dgl_lock = litmus->get_dgl_spinlock(t);
1309#endif
1310 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
1311
1312 lock_global_irqsave(dgl_lock, flags); // TODO: Push this deeper
1313 lock_fine_irqsave(&sem->lock, flags);
1314
1315 TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq));
1316
1317
1318 // Remove 't' from the heaps, but data in nodes will still be good.
1319 ikglp_del_global_list(sem, t, &fq->global_heap_node);
1320 binheap_delete(&fq->donee_heap_node.node, &sem->donees);
1321
1322 fq->owner = NULL; // no longer owned!!
1323 --(fq->count);
1324 if(fq->count < sem->shortest_fifo_queue->count) {
1325 sem->shortest_fifo_queue = fq;
1326 }
1327 --(sem->nr_in_fifos);
1328
1329#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1330 if(sem->aff_obs) {
1331 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t);
1332 sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t);
1333 }
1334#endif
1335
1336 // Move the next request into the FQ and update heaps as needed.
1337 // We defer re-evaluation of priorities to later in the function.
1338 if(fq->donee_heap_node.donor_info) { // move my donor to FQ
1339 ikglp_wait_state_t *donor_info = fq->donee_heap_node.donor_info;
1340
1341 new_on_fq = donor_info->task;
1342
1343 // donor moved to FQ
1344 donee = t;
1345
1346#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1347 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1348 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1349 if(fq_of_new_on_fq->count == 0) {
1350 // ignore it?
1351// fq_of_new_on_fq = fq;
1352 }
1353 }
1354 else {
1355 fq_of_new_on_fq = fq;
1356 }
1357#else
1358 fq_of_new_on_fq = fq;
1359#endif
1360
1361 TRACE_TASK(t, "Moving MY donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1362 new_on_fq->comm, new_on_fq->pid,
1363 ikglp_get_idx(sem, fq_of_new_on_fq),
1364 ikglp_get_idx(sem, fq));
1365
1366
1367 ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, donor_info);
1368 }
1369 else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ
1370 // move other donor to FQ
1371 // Select a donor
1372#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1373 other_donor_info = (sem->aff_obs) ?
1374 sem->aff_obs->ops->advise_donor_to_fq(sem->aff_obs, fq) :
1375 binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
1376#else
1377 other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
1378#endif
1379
1380 new_on_fq = other_donor_info->task;
1381 donee = other_donor_info->donee_info->task;
1382
1383 // update the donee's heap position.
1384 other_donor_info->donee_info->donor_info = NULL; // clear the cross-link
1385 binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
1386
1387#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1388 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1389 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1390 if(fq_of_new_on_fq->count == 0) {
1391 // ignore it?
1392// fq_of_new_on_fq = fq;
1393 }
1394 }
1395 else {
1396 fq_of_new_on_fq = fq;
1397 }
1398#else
1399 fq_of_new_on_fq = fq;
1400#endif
1401
1402 TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1403 new_on_fq->comm, new_on_fq->pid,
1404 ikglp_get_idx(sem, fq_of_new_on_fq),
1405 ikglp_get_idx(sem, fq));
1406
1407 ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, other_donor_info);
1408 }
1409 else if(!binheap_empty(&sem->priority_queue)) { // No donors, so move PQ
1410 ikglp_heap_node_t *pq_node = binheap_top_entry(&sem->priority_queue,
1411 ikglp_heap_node_t, node);
1412 ikglp_wait_state_t *pq_wait = container_of(pq_node, ikglp_wait_state_t,
1413 pq_node);
1414
1415 new_on_fq = pq_wait->task;
1416
1417#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1418 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1419 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1420 if(fq_of_new_on_fq->count == 0) {
1421 // ignore it?
1422// fq_of_new_on_fq = fq;
1423 }
1424 }
1425 else {
1426 fq_of_new_on_fq = fq;
1427 }
1428#else
1429 fq_of_new_on_fq = fq;
1430#endif
1431
1432 TRACE_TASK(t, "Moving a pq waiter (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1433 new_on_fq->comm, new_on_fq->pid,
1434 ikglp_get_idx(sem, fq_of_new_on_fq),
1435 ikglp_get_idx(sem, fq));
1436
1437 ikglp_move_pq_to_fq(sem, fq_of_new_on_fq, pq_wait);
1438 }
1439 else if(fq->count == 0) { // No PQ and this queue is empty, so steal.
1440 ikglp_wait_state_t *fq_wait;
1441
1442 TRACE_TASK(t, "Looking to steal a request for fq %d...\n",
1443 ikglp_get_idx(sem, fq));
1444
1445#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1446 fq_wait = (sem->aff_obs) ?
1447 sem->aff_obs->ops->advise_steal(sem->aff_obs, fq) :
1448 ikglp_find_hp_waiter_to_steal(sem);
1449#else
1450 fq_wait = ikglp_find_hp_waiter_to_steal(sem);
1451#endif
1452
1453 if(fq_wait) {
1454 to_steal = fq_wait->donee_heap_node.fq;
1455
1456 new_on_fq = fq_wait->task;
1457 fq_of_new_on_fq = fq;
1458 need_steal_prio_reeval = (new_on_fq == to_steal->hp_waiter);
1459
1460 TRACE_TASK(t, "Found %s/%d of fq %d to steal for fq %d...\n",
1461 new_on_fq->comm, new_on_fq->pid,
1462 ikglp_get_idx(sem, to_steal),
1463 ikglp_get_idx(sem, fq));
1464
1465 ikglp_steal_to_fq(sem, fq, fq_wait);
1466 }
1467 else {
1468 TRACE_TASK(t, "Found nothing to steal for fq %d.\n",
1469 ikglp_get_idx(sem, fq));
1470 }
1471 }
1472 else { // move no one
1473 }
1474
1475 // 't' must drop all priority and clean up data structures before hand-off.
1476
1477 // DROP ALL INHERITANCE. IKGLP MUST BE OUTER-MOST
1478 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
1479 {
1480 int count = 0;
1481 while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) {
1482 binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks,
1483 struct nested_info, hp_binheap_node);
1484 ++count;
1485 }
1486 litmus->decrease_prio(t, NULL);
1487 WARN_ON(count > 2); // should not be greater than 2. only local fq inh and donation can be possible.
1488 }
1489 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
1490
1491
1492
1493 // Now patch up other priorities.
1494 //
1495 // At most one of the following:
1496 // if(donee && donee != t), decrease prio, propagate to owner, or onward
1497 // if(to_steal), update owner's prio (hp_waiter has already been set)
1498 //
1499
1500 BUG_ON((other_donor_info != NULL) && (to_steal != NULL));
1501
1502 if(other_donor_info) {
1503 struct fifo_queue *other_fq = other_donor_info->donee_info->fq;
1504
1505 BUG_ON(!donee);
1506 BUG_ON(donee == t);
1507
1508 TRACE_TASK(t, "Terminating donation relation of donor %s/%d to donee %s/%d!\n",
1509 other_donor_info->task->comm, other_donor_info->task->pid,
1510 donee->comm, donee->pid);
1511
1512 // need to terminate donation relation.
1513 if(donee == other_fq->owner) {
1514 TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n",
1515 donee->comm, donee->pid,
1516 ikglp_get_idx(sem, other_fq));
1517
1518 ikglp_remove_donation_from_owner(&other_donor_info->prio_donation.hp_binheap_node, other_fq, sem, flags);
1519 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1520 }
1521 else {
1522 TRACE_TASK(t, "Donee %s/%d is an blocked in of fq %d.\n",
1523 donee->comm, donee->pid,
1524 ikglp_get_idx(sem, other_fq));
1525
1526 ikglp_remove_donation_from_fq_waiter(donee, &other_donor_info->prio_donation.hp_binheap_node);
1527 if(donee == other_fq->hp_waiter) {
1528 TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. Rechecking hp_waiter.\n",
1529 donee->comm, donee->pid,
1530 ikglp_get_idx(sem, other_fq));
1531
1532 other_fq->hp_waiter = ikglp_find_hp_waiter(other_fq, NULL);
1533 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1534 ikglp_get_idx(sem, other_fq),
1535 (other_fq->hp_waiter) ? other_fq->hp_waiter->comm : "nil",
1536 (other_fq->hp_waiter) ? other_fq->hp_waiter->pid : -1);
1537
1538 ikglp_refresh_owners_prio_decrease(other_fq, sem, flags); // unlocks sem->lock. reacquire it.
1539 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1540 }
1541 }
1542 }
1543 else if(to_steal) {
1544 TRACE_TASK(t, "Rechecking priority inheritance of fq %d, triggered by stealing.\n",
1545 ikglp_get_idx(sem, to_steal));
1546
1547 if(need_steal_prio_reeval) {
1548 ikglp_refresh_owners_prio_decrease(to_steal, sem, flags); // unlocks sem->lock. reacquire it.
1549 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1550 }
1551 }
1552
1553 // check for new HP waiter.
1554 if(new_on_fq) {
1555 if(fq == fq_of_new_on_fq) {
1556 // fq->owner is null, so just update the hp_waiter without locking.
1557 if(new_on_fq == fq->hp_waiter) {
1558 TRACE_TASK(t, "new_on_fq is already hp_waiter.\n",
1559 fq->hp_waiter->comm, fq->hp_waiter->pid);
1560 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); // set this just to be sure...
1561 }
1562 else if(litmus->compare(new_on_fq, fq->hp_waiter)) {
1563 if(fq->hp_waiter)
1564 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
1565 fq->hp_waiter->comm, fq->hp_waiter->pid);
1566 else
1567 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
1568
1569 fq->hp_waiter = new_on_fq;
1570 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
1571
1572 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1573 ikglp_get_idx(sem, fq),
1574 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1575 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1576 }
1577 }
1578 else {
1579 ikglp_refresh_owners_prio_increase(new_on_fq, fq_of_new_on_fq, sem, flags); // unlocks sem->lock. reacquire it.
1580 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1581 }
1582 }
1583
1584wake_kludge:
1585 if(waitqueue_active(&fq->wait))
1586 {
1587 wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
1588 ikglp_wait_state_t *fq_wait = container_of(wait, ikglp_wait_state_t, fq_node);
1589 next = (struct task_struct*) wait->private;
1590
1591 __remove_wait_queue(&fq->wait, wait);
1592
1593 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
1594 ikglp_get_idx(sem, fq),
1595 next->comm, next->pid);
1596
1597 // migrate wait-state to fifo-memory.
1598 ikglp_migrate_fq_to_owner_heap_nodes(sem, fq, fq_wait);
1599
1600 /* next becomes the resouce holder */
1601 fq->owner = next;
1602 tsk_rt(next)->blocked_lock = NULL;
1603
1604#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1605 if(sem->aff_obs) {
1606 sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next);
1607 }
1608#endif
1609
1610 /* determine new hp_waiter if necessary */
1611 if (next == fq->hp_waiter) {
1612
1613 TRACE_TASK(next, "was highest-prio waiter\n");
1614 /* next has the highest priority --- it doesn't need to
1615 * inherit. However, we need to make sure that the
1616 * next-highest priority in the queue is reflected in
1617 * hp_waiter. */
1618 fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL);
1619 TRACE_TASK(next, "New hp_waiter for fq %d is %s/%d!\n",
1620 ikglp_get_idx(sem, fq),
1621 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1622 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1623
1624 fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ?
1625 effective_priority(fq->hp_waiter) : NULL;
1626
1627 if (fq->hp_waiter)
1628 TRACE_TASK(fq->hp_waiter, "is new highest-prio waiter\n");
1629 else
1630 TRACE("no further waiters\n");
1631
1632 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
1633
1634// TRACE_TASK(next, "Heap Before:\n");
1635// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
1636
1637 binheap_add(&fq->nest.hp_binheap_node,
1638 &tsk_rt(next)->hp_blocked_tasks,
1639 struct nested_info,
1640 hp_binheap_node);
1641
1642// TRACE_TASK(next, "Heap After:\n");
1643// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
1644
1645 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
1646 }
1647 else {
1648 /* Well, if 'next' is not the highest-priority waiter,
1649 * then it (probably) ought to inherit the highest-priority
1650 * waiter's priority. */
1651 TRACE_TASK(next, "is not hp_waiter of replica %d. hp_waiter is %s/%d\n",
1652 ikglp_get_idx(sem, fq),
1653 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1654 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1655
1656 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
1657
1658 binheap_add(&fq->nest.hp_binheap_node,
1659 &tsk_rt(next)->hp_blocked_tasks,
1660 struct nested_info,
1661 hp_binheap_node);
1662
1663 /* It is possible that 'next' *should* be the hp_waiter, but isn't
1664 * because that update hasn't yet executed (update operation is
1665 * probably blocked on mutex->lock). So only inherit if the top of
1666 * 'next's top heap node is indeed the effective prio. of hp_waiter.
1667 * (We use fq->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
1668 * since the effective priority of hp_waiter can change (and the
1669 * update has not made it to this lock).)
1670 */
1671 if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
1672 fq->nest.hp_waiter_eff_prio))
1673 {
1674 if(fq->nest.hp_waiter_eff_prio)
1675 litmus->increase_prio(next, fq->nest.hp_waiter_eff_prio);
1676 else
1677 WARN_ON(1);
1678 }
1679
1680 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
1681 }
1682
1683
1684 // wake up the new resource holder!
1685 wake_up_process(next);
1686 }
1687 if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) {
1688 // The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?)
1689 // Wake up the new guy too.
1690
1691 BUG_ON(fq_of_new_on_fq->owner != NULL);
1692
1693 fq = fq_of_new_on_fq;
1694 fq_of_new_on_fq = NULL;
1695 goto wake_kludge;
1696 }
1697
1698 unlock_fine_irqrestore(&sem->lock, flags);
1699 unlock_global_irqrestore(dgl_lock, flags);
1700
1701 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1702
1703out:
1704 return err;
1705}
1706
1707
1708
1709int ikglp_close(struct litmus_lock* l)
1710{
1711 struct task_struct *t = current;
1712 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1713 unsigned long flags;
1714
1715 int owner = 0;
1716 int i;
1717
1718 raw_spin_lock_irqsave(&sem->real_lock, flags);
1719
1720 for(i = 0; i < sem->nr_replicas; ++i) {
1721 if(sem->fifo_queues[i].owner == t) {
1722 owner = 1;
1723 break;
1724 }
1725 }
1726
1727 raw_spin_unlock_irqrestore(&sem->real_lock, flags);
1728
1729 if (owner)
1730 ikglp_unlock(l);
1731
1732 return 0;
1733}
1734
1735void ikglp_free(struct litmus_lock* l)
1736{
1737 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1738
1739 kfree(sem->fifo_queues);
1740 kfree(sem);
1741}
1742
1743
1744
1745struct litmus_lock* ikglp_new(int m,
1746 struct litmus_lock_ops* ops,
1747 void* __user arg)
1748{
1749 struct ikglp_semaphore* sem;
1750 int nr_replicas = 0;
1751 int i;
1752
1753 if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas)))
1754 {
1755 return(NULL);
1756 }
1757 if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas)))
1758 {
1759 return(NULL);
1760 }
1761 if(nr_replicas < 1)
1762 {
1763 return(NULL);
1764 }
1765
1766 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1767 if(!sem)
1768 {
1769 return NULL;
1770 }
1771
1772 sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL);
1773 if(!sem->fifo_queues)
1774 {
1775 kfree(sem);
1776 return NULL;
1777 }
1778
1779 sem->litmus_lock.ops = ops;
1780
1781#ifdef CONFIG_DEBUG_SPINLOCK
1782 {
1783 __raw_spin_lock_init(&sem->lock, ((struct litmus_lock*)sem)->cheat_lockdep, &((struct litmus_lock*)sem)->key);
1784 }
1785#else
1786 raw_spin_lock_init(&sem->lock);
1787#endif
1788
1789 raw_spin_lock_init(&sem->real_lock);
1790
1791 sem->nr_replicas = nr_replicas;
1792 sem->m = m;
1793 sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0);
1794 sem->nr_in_fifos = 0;
1795
1796 TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n",
1797 sem->m,
1798 sem->nr_replicas,
1799 sem->max_fifo_len);
1800
1801 for(i = 0; i < nr_replicas; ++i)
1802 {
1803 struct fifo_queue* q = &(sem->fifo_queues[i]);
1804
1805 q->owner = NULL;
1806 q->hp_waiter = NULL;
1807 init_waitqueue_head(&q->wait);
1808 q->count = 0;
1809
1810 q->global_heap_node.task = NULL;
1811 INIT_BINHEAP_NODE(&q->global_heap_node.node);
1812
1813 q->donee_heap_node.task = NULL;
1814 q->donee_heap_node.donor_info = NULL;
1815 q->donee_heap_node.fq = NULL;
1816 INIT_BINHEAP_NODE(&q->donee_heap_node.node);
1817
1818 q->nest.lock = (struct litmus_lock*)sem;
1819 q->nest.hp_waiter_eff_prio = NULL;
1820 q->nest.hp_waiter_ptr = &q->hp_waiter;
1821 INIT_BINHEAP_NODE(&q->nest.hp_binheap_node);
1822 }
1823
1824 sem->shortest_fifo_queue = &sem->fifo_queues[0];
1825
1826 sem->top_m_size = 0;
1827
1828 // init heaps
1829 INIT_BINHEAP_HANDLE(&sem->top_m, ikglp_min_heap_base_priority_order);
1830 INIT_BINHEAP_HANDLE(&sem->not_top_m, ikglp_max_heap_base_priority_order);
1831 INIT_BINHEAP_HANDLE(&sem->donees, ikglp_min_heap_donee_order);
1832 INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order);
1833 INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order);
1834
1835#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1836 sem->aff_obs = NULL;
1837#endif
1838
1839 return &sem->litmus_lock;
1840}
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
1871
1872static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
1873{
1874 int gpu = replica % aff->nr_rsrc;
1875 return gpu;
1876}
1877
1878static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica)
1879{
1880 int gpu = __replica_to_gpu(aff, replica) + aff->offset;
1881 return gpu;
1882}
1883
1884static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
1885{
1886 int replica = gpu - aff->offset;
1887 return replica;
1888}
1889
1890static inline int same_gpu(struct ikglp_affinity* aff, int replica_a, int replica_b)
1891{
1892 return(replica_to_gpu(aff, replica_a) == replica_to_gpu(aff, replica_b));
1893}
1894
1895static inline int has_affinity(struct ikglp_affinity* aff, struct task_struct* t, int replica)
1896{
1897 if(tsk_rt(t)->last_gpu >= 0)
1898 {
1899 return (tsk_rt(t)->last_gpu == replica_to_gpu(aff, replica));
1900 }
1901 return 0;
1902}
1903
1904int ikglp_aff_obs_close(struct affinity_observer* obs)
1905{
1906 return 0;
1907}
1908
1909void ikglp_aff_obs_free(struct affinity_observer* obs)
1910{
1911 struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs);
1912
1913 // make sure the thread destroying this semaphore will not
1914 // call the exit callback on a destroyed lock.
1915 struct task_struct *t = current;
1916 if (is_realtime(t) && tsk_rt(t)->rsrc_exit_cb_args == ikglp_aff)
1917 {
1918 tsk_rt(t)->rsrc_exit_cb = NULL;
1919 tsk_rt(t)->rsrc_exit_cb_args = NULL;
1920 }
1921
1922 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1923 kfree(ikglp_aff->nr_aff_on_rsrc);
1924 kfree(ikglp_aff->q_info);
1925 kfree(ikglp_aff);
1926}
1927
1928static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops,
1929 struct ikglp_affinity_ops* ikglp_ops,
1930 void* __user args)
1931{
1932 struct ikglp_affinity* ikglp_aff;
1933 struct gpu_affinity_observer_args aff_args;
1934 struct ikglp_semaphore* sem;
1935 int i;
1936 unsigned long flags;
1937
1938 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
1939 return(NULL);
1940 }
1941 if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
1942 return(NULL);
1943 }
1944
1945 sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
1946
1947 if(sem->litmus_lock.type != IKGLP_SEM) {
1948 TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
1949 return(NULL);
1950 }
1951
1952 if((aff_args.nr_simult_users <= 0) ||
1953 (sem->nr_replicas%aff_args.nr_simult_users != 0)) {
1954 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
1955 "(%d) per replica. #replicas should be evenly divisible "
1956 "by #simult_users.\n",
1957 sem->litmus_lock.ident,
1958 sem->nr_replicas,
1959 aff_args.nr_simult_users);
1960 return(NULL);
1961 }
1962
1963// if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
1964// TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
1965// NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
1966//// return(NULL);
1967// }
1968
1969 ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
1970 if(!ikglp_aff) {
1971 return(NULL);
1972 }
1973
1974 ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL);
1975 if(!ikglp_aff->q_info) {
1976 kfree(ikglp_aff);
1977 return(NULL);
1978 }
1979
1980 ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
1981 if(!ikglp_aff->nr_cur_users_on_rsrc) {
1982 kfree(ikglp_aff->q_info);
1983 kfree(ikglp_aff);
1984 return(NULL);
1985 }
1986
1987 ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
1988 if(!ikglp_aff->nr_aff_on_rsrc) {
1989 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1990 kfree(ikglp_aff->q_info);
1991 kfree(ikglp_aff);
1992 return(NULL);
1993 }
1994
1995 affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs);
1996
1997 ikglp_aff->ops = ikglp_ops;
1998 ikglp_aff->offset = aff_args.replica_to_gpu_offset;
1999 ikglp_aff->nr_simult = aff_args.nr_simult_users;
2000 ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
2001 ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0;
2002
2003 TRACE_CUR("GPU affinity_observer: offset = %d, nr_simult = %d, "
2004 "nr_rsrc = %d, relaxed_fifo_len = %d\n",
2005 ikglp_aff->offset, ikglp_aff->nr_simult, ikglp_aff->nr_rsrc,
2006 ikglp_aff->relax_max_fifo_len);
2007
2008 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
2009 memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc));
2010
2011 for(i = 0; i < sem->nr_replicas; ++i) {
2012 ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
2013 ikglp_aff->q_info[i].estimated_len = 0;
2014
2015 // multiple q_info's will point to the same resource (aka GPU) if
2016 // aff_args.nr_simult_users > 1
2017 ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
2018 ikglp_aff->q_info[i].nr_aff_users = &ikglp_aff->nr_aff_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
2019 }
2020
2021 // attach observer to the lock
2022 raw_spin_lock_irqsave(&sem->real_lock, flags);
2023 sem->aff_obs = ikglp_aff;
2024 raw_spin_unlock_irqrestore(&sem->real_lock, flags);
2025
2026 return &ikglp_aff->obs;
2027}
2028
2029
2030
2031
2032static int gpu_replica_to_resource(struct ikglp_affinity* aff,
2033 struct fifo_queue* fq) {
2034 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2035 return(replica_to_gpu(aff, ikglp_get_idx(sem, fq)));
2036}
2037
2038
2039// Smart IKGLP Affinity
2040
2041//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff)
2042//{
2043// struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2044// struct ikglp_queue_info *shortest = &aff->q_info[0];
2045// int i;
2046//
2047// for(i = 1; i < sem->nr_replicas; ++i) {
2048// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
2049// shortest = &aff->q_info[i];
2050// }
2051// }
2052//
2053// return(shortest);
2054//}
2055
2056struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
2057{
2058 // advise_enqueue must be smart as not not break IKGLP rules:
2059 // * No queue can be greater than ceil(m/k) in length. We may return
2060 // such a queue, but IKGLP will be smart enough as to send requests
2061 // to donors or PQ.
2062 // * Cannot let a queue idle if there exist waiting PQ/donors
2063 // -- needed to guarantee parallel progress of waiters.
2064 //
2065 // We may be able to relax some of these constraints, but this will have to
2066 // be carefully evaluated.
2067 //
2068 // Huristic strategy: Find the shortest queue that is not full.
2069
2070 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2071 lt_t min_len;
2072 int min_nr_users, min_nr_aff_users;
2073 struct ikglp_queue_info *shortest, *aff_queue;
2074 struct fifo_queue *to_enqueue;
2075 int i;
2076 int affinity_gpu;
2077
2078 int max_fifo_len = (aff->relax_max_fifo_len) ?
2079 sem->m : sem->max_fifo_len;
2080
2081 // if we have no affinity, find the GPU with the least number of users
2082 // with active affinity
2083 if(unlikely(tsk_rt(t)->last_gpu < 0)) {
2084 int temp_min = aff->nr_aff_on_rsrc[0];
2085 affinity_gpu = aff->offset;
2086
2087 for(i = 1; i < aff->nr_rsrc; ++i) {
2088 if(aff->nr_aff_on_rsrc[i] < temp_min) {
2089 affinity_gpu = aff->offset + i;
2090 }
2091 }
2092
2093 TRACE_CUR("no affinity. defaulting to %d with %d aff users.\n",
2094 affinity_gpu, temp_min);
2095 }
2096 else {
2097 affinity_gpu = tsk_rt(t)->last_gpu;
2098 }
2099
2100 // all things being equal, let's start with the queue with which we have
2101 // affinity. this helps us maintain affinity even when we don't have
2102 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
2103 aff_queue = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
2104 shortest = aff_queue;
2105
2106 // if(shortest == aff->shortest_queue) {
2107 // TRACE_CUR("special case: have affinity with shortest queue\n");
2108 // goto out;
2109 // }
2110
2111 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
2112 min_nr_users = *(shortest->nr_cur_users);
2113 min_nr_aff_users = *(shortest->nr_aff_users);
2114
2115
2116 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
2117 get_gpu_estimate(t, MIG_LOCAL),
2118 ikglp_get_idx(sem, shortest->q),
2119 shortest->q->count,
2120 min_len);
2121
2122 for(i = 0; i < sem->nr_replicas; ++i) {
2123 if(&aff->q_info[i] != shortest) {
2124 if(aff->q_info[i].q->count < max_fifo_len) {
2125 int want = 0;
2126
2127 lt_t migration =
2128 get_gpu_estimate(t,
2129 gpu_migration_distance(tsk_rt(t)->last_gpu,
2130 replica_to_gpu(aff, i)));
2131 lt_t est_len = aff->q_info[i].estimated_len + migration;
2132
2133 // queue is smaller, or they're equal and the other has a smaller number
2134 // of total users.
2135 //
2136 // tie-break on the shortest number of simult users. this only kicks in
2137 // when there are more than 1 empty queues.
2138
2139 // TODO: Make "est_len < min_len" a fuzzy function that allows
2140 // queues "close enough" in length to be considered equal.
2141
2142 /* NOTE: 'shortest' starts out with affinity GPU */
2143 if(unlikely(shortest->q->count >= max_fifo_len)) { /* 'shortest' is full and i-th queue is not */
2144 want = 1;
2145 }
2146 else if(est_len < min_len) {
2147 want = 1; /* i-th queue has shortest length */
2148 }
2149 else if(unlikely(est_len == min_len)) { /* equal lengths */
2150 if(!has_affinity(aff, t, ikglp_get_idx(sem, shortest->q))) { /* don't sacrifice affinity on tie */
2151 if(has_affinity(aff, t, i)) {
2152 want = 1; /* switch to maintain affinity */
2153 }
2154 else if(*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) { /* favor one with less affinity load */
2155 want = 1;
2156 }
2157 else if((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && /* equal number of affinity */
2158 (*(aff->q_info[i].nr_cur_users) < min_nr_users)) { /* favor one with current fewer users */
2159 want = 1;
2160 }
2161 }
2162 }
2163
2164 if(want) {
2165 shortest = &aff->q_info[i];
2166 min_len = est_len;
2167 min_nr_users = *(aff->q_info[i].nr_cur_users);
2168 min_nr_aff_users = *(aff->q_info[i].nr_aff_users);
2169 }
2170
2171 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
2172 get_gpu_estimate(t,
2173 gpu_migration_distance(tsk_rt(t)->last_gpu,
2174 replica_to_gpu(aff, i))),
2175 ikglp_get_idx(sem, aff->q_info[i].q),
2176 aff->q_info[i].q->count,
2177 est_len);
2178 }
2179 else {
2180 TRACE_CUR("queue %d is too long. ineligible for enqueue.\n",
2181 ikglp_get_idx(sem, aff->q_info[i].q));
2182 }
2183 }
2184 }
2185
2186 if(shortest->q->count >= max_fifo_len) {
2187 TRACE_CUR("selected fq %d is too long, but returning it anyway.\n",
2188 ikglp_get_idx(sem, shortest->q));
2189 }
2190
2191 to_enqueue = shortest->q;
2192 TRACE_CUR("enqueue on fq %d (count = %d) (non-aff wanted fq %d)\n",
2193 ikglp_get_idx(sem, to_enqueue),
2194 to_enqueue->count,
2195 ikglp_get_idx(sem, sem->shortest_fifo_queue));
2196
2197 return to_enqueue;
2198
2199 //return(sem->shortest_fifo_queue);
2200}
2201
2202
2203
2204
2205static ikglp_wait_state_t* pick_steal(struct ikglp_affinity* aff,
2206 int dest_gpu,
2207 struct fifo_queue* fq)
2208{
2209 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2210 ikglp_wait_state_t *wait = NULL;
2211 int max_improvement = -(MIG_NONE+1);
2212 int replica = ikglp_get_idx(sem, fq);
2213
2214 if(waitqueue_active(&fq->wait)) {
2215 int this_gpu = replica_to_gpu(aff, replica);
2216 struct list_head *pos;
2217
2218 list_for_each(pos, &fq->wait.task_list) {
2219 wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
2220 ikglp_wait_state_t *tmp_wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2221
2222 int tmp_improvement =
2223 gpu_migration_distance(this_gpu, tsk_rt(tmp_wait->task)->last_gpu) -
2224 gpu_migration_distance(dest_gpu, tsk_rt(tmp_wait->task)->last_gpu);
2225
2226 if(tmp_improvement > max_improvement) {
2227 wait = tmp_wait;
2228 max_improvement = tmp_improvement;
2229
2230 if(max_improvement >= (MIG_NONE-1)) {
2231 goto out;
2232 }
2233 }
2234 }
2235
2236 BUG_ON(!wait);
2237 }
2238 else {
2239 TRACE_CUR("fq %d is empty!\n", replica);
2240 }
2241
2242out:
2243
2244 TRACE_CUR("Candidate victim from fq %d is %s/%d. aff improvement = %d.\n",
2245 replica,
2246 (wait) ? wait->task->comm : "nil",
2247 (wait) ? wait->task->pid : -1,
2248 max_improvement);
2249
2250 return wait;
2251}
2252
2253
2254ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
2255 struct fifo_queue* dst)
2256{
2257 // Huristic strategy: Find task with greatest improvement in affinity.
2258 //
2259 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2260 ikglp_wait_state_t *to_steal_state = NULL;
2261// ikglp_wait_state_t *default_to_steal_state = ikglp_find_hp_waiter_to_steal(sem);
2262 int max_improvement = -(MIG_NONE+1);
2263 int replica, i;
2264 int dest_gpu;
2265
2266 replica = ikglp_get_idx(sem, dst);
2267 dest_gpu = replica_to_gpu(aff, replica);
2268
2269 for(i = 0; i < sem->nr_replicas; ++i) {
2270 ikglp_wait_state_t *tmp_to_steal_state =
2271 pick_steal(aff, dest_gpu, &sem->fifo_queues[i]);
2272
2273 if(tmp_to_steal_state) {
2274 int tmp_improvement =
2275 gpu_migration_distance(replica_to_gpu(aff, i), tsk_rt(tmp_to_steal_state->task)->last_gpu) -
2276 gpu_migration_distance(dest_gpu, tsk_rt(tmp_to_steal_state->task)->last_gpu);
2277
2278 if(tmp_improvement > max_improvement) {
2279 to_steal_state = tmp_to_steal_state;
2280 max_improvement = tmp_improvement;
2281
2282 if(max_improvement >= (MIG_NONE-1)) {
2283 goto out;
2284 }
2285 }
2286 }
2287 }
2288
2289out:
2290 if(!to_steal_state) {
2291 TRACE_CUR("Could not find anyone to steal.\n");
2292 }
2293 else {
2294 TRACE_CUR("Selected victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
2295 to_steal_state->task->comm, to_steal_state->task->pid,
2296 ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq),
2297 replica_to_gpu(aff, ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq)),
2298 ikglp_get_idx(sem, dst),
2299 dest_gpu,
2300 max_improvement);
2301
2302// TRACE_CUR("Non-aff wanted to select victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
2303// default_to_steal_state->task->comm, default_to_steal_state->task->pid,
2304// ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq),
2305// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
2306// ikglp_get_idx(sem, dst),
2307// replica_to_gpu(aff, ikglp_get_idx(sem, dst)),
2308//
2309// gpu_migration_distance(
2310// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
2311// tsk_rt(default_to_steal_state->task)->last_gpu) -
2312// gpu_migration_distance(dest_gpu, tsk_rt(default_to_steal_state->task)->last_gpu));
2313 }
2314
2315 return(to_steal_state);
2316}
2317
2318
2319static inline int has_donor(wait_queue_t* fq_wait)
2320{
2321 ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2322 return(wait->donee_heap_node.donor_info != NULL);
2323}
2324
2325static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff,
2326 struct fifo_queue* fq,
2327 int* dist_from_head)
2328{
2329 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2330 struct task_struct *donee;
2331 ikglp_donee_heap_node_t *donee_node;
2332 struct task_struct *mth_highest = ikglp_mth_highest(sem);
2333
2334// lt_t now = litmus_clock();
2335//
2336// TRACE_CUR("fq %d: mth_highest: %s/%d, deadline = %d: (donor) = ??? ",
2337// ikglp_get_idx(sem, fq),
2338// mth_highest->comm, mth_highest->pid,
2339// (int)get_deadline(mth_highest) - now);
2340
2341 if(fq->owner &&
2342 fq->donee_heap_node.donor_info == NULL &&
2343 mth_highest != fq->owner &&
2344 litmus->__compare(mth_highest, BASE, fq->owner, BASE)) {
2345 donee = fq->owner;
2346 donee_node = &(fq->donee_heap_node);
2347 *dist_from_head = 0;
2348
2349 BUG_ON(donee != donee_node->task);
2350
2351 TRACE_CUR("picked owner of fq %d as donee\n",
2352 ikglp_get_idx(sem, fq));
2353
2354 goto out;
2355 }
2356 else if(waitqueue_active(&fq->wait)) {
2357 struct list_head *pos;
2358
2359
2360// TRACE_CUR("fq %d: owner: %s/%d, deadline = %d: (donor) = %s/%d "
2361// "(mth_highest != fq->owner) = %d "
2362// "(mth_highest > fq->owner) = %d\n",
2363// ikglp_get_idx(sem, fq),
2364// (fq->owner) ? fq->owner->comm : "nil",
2365// (fq->owner) ? fq->owner->pid : -1,
2366// (fq->owner) ? (int)get_deadline(fq->owner) - now : -999,
2367// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->comm : "nil",
2368// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->pid : -1,
2369// (mth_highest != fq->owner),
2370// (litmus->__compare(mth_highest, BASE, fq->owner, BASE)));
2371
2372
2373 *dist_from_head = 1;
2374
2375 // iterating from the start of the queue is nice since this means
2376 // the donee will be closer to obtaining a resource.
2377 list_for_each(pos, &fq->wait.task_list) {
2378 wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
2379 ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2380
2381// TRACE_CUR("fq %d: waiter %d: %s/%d, deadline = %d (donor) = %s/%d "
2382// "(mth_highest != wait->task) = %d "
2383// "(mth_highest > wait->task) = %d\n",
2384// ikglp_get_idx(sem, fq),
2385// dist_from_head,
2386// wait->task->comm, wait->task->pid,
2387// (int)get_deadline(wait->task) - now,
2388// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->comm : "nil",
2389// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->pid : -1,
2390// (mth_highest != wait->task),
2391// (litmus->__compare(mth_highest, BASE, wait->task, BASE)));
2392
2393
2394 if(!has_donor(fq_wait) &&
2395 mth_highest != wait->task &&
2396 litmus->__compare(mth_highest, BASE, wait->task, BASE)) {
2397 donee = (struct task_struct*) fq_wait->private;
2398 donee_node = &wait->donee_heap_node;
2399
2400 BUG_ON(donee != donee_node->task);
2401
2402 TRACE_CUR("picked waiter in fq %d as donee\n",
2403 ikglp_get_idx(sem, fq));
2404
2405 goto out;
2406 }
2407 ++(*dist_from_head);
2408 }
2409 }
2410
2411 donee = NULL;
2412 donee_node = NULL;
2413 //*dist_from_head = sem->max_fifo_len + 1;
2414 *dist_from_head = IKGLP_INVAL_DISTANCE;
2415
2416 TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq));
2417
2418out:
2419
2420 TRACE_CUR("Candidate donee for fq %d is %s/%d (dist_from_head = %d)\n",
2421 ikglp_get_idx(sem, fq),
2422 (donee) ? (donee)->comm : "nil",
2423 (donee) ? (donee)->pid : -1,
2424 *dist_from_head);
2425
2426 return donee_node;
2427}
2428
2429ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection(
2430 struct ikglp_affinity* aff,
2431 struct task_struct* donor)
2432{
2433 // Huristic strategy: Find the highest-priority donee that is waiting on
2434 // a queue closest to our affinity. (1) The donee CANNOT already have a
2435 // donor (exception: donee is the lowest-prio task in the donee heap).
2436 // (2) Requests in 'top_m' heap are ineligible.
2437 //
2438 // Further strategy: amongst elible donees waiting for the same GPU, pick
2439 // the one closest to the head of the FIFO queue (including owners).
2440 //
2441 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2442 ikglp_donee_heap_node_t *donee_node;
2443 gpu_migration_dist_t distance;
2444 int start, i, j;
2445
2446 ikglp_donee_heap_node_t *default_donee;
2447 ikglp_wait_state_t *default_donee_donor_info;
2448
2449 if(tsk_rt(donor)->last_gpu < 0) {
2450 // no affinity. just return the min prio, like standard IKGLP
2451 // TODO: Find something closer to the head of the queue??
2452 donee_node = binheap_top_entry(&sem->donees,
2453 ikglp_donee_heap_node_t,
2454 node);
2455 goto out;
2456 }
2457
2458
2459 // Temporarily break any donation relation the default donee (the lowest
2460 // prio task in the FIFO queues) to make it eligible for selection below.
2461 //
2462 // NOTE: The original donor relation *must* be restored, even if we select
2463 // the default donee throug affinity-aware selection, before returning
2464 // from this function so we don't screw up our heap ordering.
2465 // The standard IKGLP algorithm will steal the donor relationship if needed.
2466 default_donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
2467 default_donee_donor_info = default_donee->donor_info; // back-up donor relation
2468 default_donee->donor_info = NULL; // temporarily break any donor relation.
2469
2470 // initialize our search
2471 donee_node = NULL;
2472 distance = MIG_NONE;
2473
2474 // TODO: The below search logic may work well for locating nodes to steal
2475 // when an FQ goes idle. Validate this code and apply it to stealing.
2476
2477 // begin search with affinity GPU.
2478 start = gpu_to_base_replica(aff, tsk_rt(donor)->last_gpu);
2479 i = start;
2480 do { // "for each gpu" / "for each aff->nr_rsrc"
2481 gpu_migration_dist_t temp_distance = gpu_migration_distance(start, i);
2482
2483 // only interested in queues that will improve our distance
2484 if(temp_distance < distance || donee_node == NULL) {
2485 int dist_from_head = IKGLP_INVAL_DISTANCE;
2486
2487 TRACE_CUR("searching for donor on GPU %d", i);
2488
2489 // visit each queue and pick a donee. bail as soon as we find
2490 // one for this class.
2491
2492 for(j = 0; j < aff->nr_simult; ++j) {
2493 int temp_dist_from_head;
2494 ikglp_donee_heap_node_t *temp_donee_node;
2495 struct fifo_queue *fq;
2496
2497 fq = &(sem->fifo_queues[i + j*aff->nr_rsrc]);
2498 temp_donee_node = pick_donee(aff, fq, &temp_dist_from_head);
2499
2500 if(temp_dist_from_head < dist_from_head)
2501 {
2502 // we check all the FQs for this GPU to spread priorities
2503 // out across the queues. does this decrease jitter?
2504 donee_node = temp_donee_node;
2505 dist_from_head = temp_dist_from_head;
2506 }
2507 }
2508
2509 if(dist_from_head != IKGLP_INVAL_DISTANCE) {
2510 TRACE_CUR("found donee %s/%d and is the %d-th waiter.\n",
2511 donee_node->task->comm, donee_node->task->pid,
2512 dist_from_head);
2513 }
2514 else {
2515 TRACE_CUR("found no eligible donors from GPU %d\n", i);
2516 }
2517 }
2518 else {
2519 TRACE_CUR("skipping GPU %d (distance = %d, best donor "
2520 "distance = %d)\n", i, temp_distance, distance);
2521 }
2522
2523 i = (i+1 < aff->nr_rsrc) ? i+1 : 0; // increment with wrap-around
2524 } while (i != start);
2525
2526
2527 // restore old donor info state.
2528 default_donee->donor_info = default_donee_donor_info;
2529
2530 if(!donee_node) {
2531 donee_node = default_donee;
2532
2533 TRACE_CUR("Could not find a donee. We have to steal one.\n");
2534 WARN_ON(default_donee->donor_info == NULL);
2535 }
2536
2537out:
2538
2539 TRACE_CUR("Selected donee %s/%d on fq %d (GPU %d) for %s/%d with affinity for GPU %d\n",
2540 donee_node->task->comm, donee_node->task->pid,
2541 ikglp_get_idx(sem, donee_node->fq),
2542 replica_to_gpu(aff, ikglp_get_idx(sem, donee_node->fq)),
2543 donor->comm, donor->pid, tsk_rt(donor)->last_gpu);
2544
2545 return(donee_node);
2546}
2547
2548
2549
2550static void __find_closest_donor(int target_gpu,
2551 struct binheap_node* donor_node,
2552 ikglp_wait_state_t** cur_closest,
2553 int* cur_dist)
2554{
2555 ikglp_wait_state_t *this_donor =
2556 binheap_entry(donor_node, ikglp_wait_state_t, node);
2557
2558 int this_dist =
2559 gpu_migration_distance(target_gpu, tsk_rt(this_donor->task)->last_gpu);
2560
2561// TRACE_CUR("%s/%d: dist from target = %d\n",
2562// this_donor->task->comm,
2563// this_donor->task->pid,
2564// this_dist);
2565
2566 if(this_dist < *cur_dist) {
2567 // take this donor
2568 *cur_dist = this_dist;
2569 *cur_closest = this_donor;
2570 }
2571 else if(this_dist == *cur_dist) {
2572 // priority tie-break. Even though this is a pre-order traversal,
2573 // this is a heap, not a binary tree, so we still need to do a priority
2574 // comparision.
2575 if(!(*cur_closest) ||
2576 litmus->compare(this_donor->task, (*cur_closest)->task)) {
2577 *cur_dist = this_dist;
2578 *cur_closest = this_donor;
2579 }
2580 }
2581
2582 if(donor_node->left) __find_closest_donor(target_gpu, donor_node->left, cur_closest, cur_dist);
2583 if(donor_node->right) __find_closest_donor(target_gpu, donor_node->right, cur_closest, cur_dist);
2584}
2585
2586ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
2587{
2588 // Huristic strategy: Find donor with the closest affinity to fq.
2589 // Tie-break on priority.
2590
2591 // We need to iterate over all the donors to do this. Unfortunatly,
2592 // our donors are organized in a heap. We'll visit each node with a
2593 // recurisve call. This is realitively safe since there are only sem->m
2594 // donors, at most. We won't recurse too deeply to have to worry about
2595 // our stack. (even with 128 CPUs, our nest depth is at most 7 deep).
2596
2597 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2598 ikglp_wait_state_t *donor = NULL;
2599 int distance = MIG_NONE;
2600 int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq));
2601
2602#ifdef CONFIG_SCHED_DEBUG_TRACE
2603 ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
2604#endif
2605
2606 __find_closest_donor(gpu, sem->donors.root, &donor, &distance);
2607
2608 TRACE_CUR("Selected donor %s/%d (distance = %d) to move to fq %d "
2609 "(non-aff wanted %s/%d). differs = %d\n",
2610 donor->task->comm, donor->task->pid,
2611 distance,
2612 ikglp_get_idx(sem, fq),
2613 default_donor->task->comm, default_donor->task->pid,
2614 (donor->task != default_donor->task)
2615 );
2616
2617 return(donor);
2618}
2619
2620
2621
2622void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2623{
2624 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2625 int replica = ikglp_get_idx(sem, fq);
2626 int gpu = replica_to_gpu(aff, replica);
2627 struct ikglp_queue_info *info = &aff->q_info[replica];
2628 lt_t est_time;
2629 lt_t est_len_before;
2630
2631 if(current == t) {
2632 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
2633 }
2634
2635 est_len_before = info->estimated_len;
2636 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2637 info->estimated_len += est_time;
2638
2639 TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
2640 ikglp_get_idx(sem, info->q),
2641 est_len_before, est_time,
2642 info->estimated_len);
2643
2644 // if(aff->shortest_queue == info) {
2645 // // we may no longer be the shortest
2646 // aff->shortest_queue = ikglp_aff_find_shortest(aff);
2647 //
2648 // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
2649 // ikglp_get_idx(sem, aff->shortest_queue->q),
2650 // aff->shortest_queue->q->count,
2651 // aff->shortest_queue->estimated_len);
2652 // }
2653}
2654
2655void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2656{
2657 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2658 int replica = ikglp_get_idx(sem, fq);
2659 int gpu = replica_to_gpu(aff, replica);
2660 struct ikglp_queue_info *info = &aff->q_info[replica];
2661 lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2662
2663 if(est_time > info->estimated_len) {
2664 WARN_ON(1);
2665 info->estimated_len = 0;
2666 }
2667 else {
2668 info->estimated_len -= est_time;
2669 }
2670
2671 TRACE_CUR("fq %d est len is now %llu\n",
2672 ikglp_get_idx(sem, info->q),
2673 info->estimated_len);
2674
2675 // check to see if we're the shortest queue now.
2676 // if((aff->shortest_queue != info) &&
2677 // (aff->shortest_queue->estimated_len > info->estimated_len)) {
2678 //
2679 // aff->shortest_queue = info;
2680 //
2681 // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
2682 // ikglp_get_idx(sem, info->q),
2683 // info->q->count,
2684 // info->estimated_len);
2685 // }
2686}
2687
2688int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t)
2689{
2690 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2691 unsigned long flags = 0, real_flags;
2692 int aff_rsrc;
2693#ifdef CONFIG_LITMUS_DGL_SUPPORT
2694 raw_spinlock_t *dgl_lock;
2695
2696 dgl_lock = litmus->get_dgl_spinlock(t);
2697#endif
2698
2699 if (tsk_rt(t)->last_gpu < 0)
2700 return 0;
2701
2702 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
2703 lock_global_irqsave(dgl_lock, flags);
2704 lock_fine_irqsave(&sem->lock, flags);
2705
2706 // decrement affinity count on old GPU
2707 aff_rsrc = tsk_rt(t)->last_gpu - aff->offset;
2708 --(aff->nr_aff_on_rsrc[aff_rsrc]);
2709// aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t);
2710
2711 if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) {
2712 WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0);
2713 aff->nr_aff_on_rsrc[aff_rsrc] = 0;
2714 }
2715
2716 unlock_fine_irqrestore(&sem->lock, flags);
2717 unlock_global_irqrestore(dgl_lock, flags);
2718 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
2719
2720 return 0;
2721}
2722
2723int gpu_ikglp_notify_exit_trampoline(struct task_struct* t)
2724{
2725 struct ikglp_affinity* aff = (struct ikglp_affinity*)tsk_rt(t)->rsrc_exit_cb_args;
2726 if(likely(aff)) {
2727 return gpu_ikglp_notify_exit(aff, t);
2728 }
2729 else {
2730 return -1;
2731 }
2732}
2733
2734void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
2735 struct fifo_queue* fq,
2736 struct task_struct* t)
2737{
2738 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2739 int replica = ikglp_get_idx(sem, fq);
2740 int gpu = replica_to_gpu(aff, replica);
2741 int last_gpu = tsk_rt(t)->last_gpu;
2742
2743 tsk_rt(t)->gpu_migration = gpu_migration_distance(last_gpu, gpu); // record the type of migration
2744
2745 TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n",
2746 t->comm, t->pid, gpu, last_gpu, tsk_rt(t)->gpu_migration);
2747
2748 // count the number or resource holders
2749 ++(*(aff->q_info[replica].nr_cur_users));
2750
2751 if(gpu != last_gpu) {
2752 if(last_gpu >= 0) {
2753 int old_rsrc = last_gpu - aff->offset;
2754 --(aff->nr_aff_on_rsrc[old_rsrc]);
2755// aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t));
2756 }
2757
2758 // increment affinity count on new GPU
2759 ++(aff->nr_aff_on_rsrc[gpu - aff->offset]);
2760// aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t));
2761 tsk_rt(t)->rsrc_exit_cb_args = aff;
2762 tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline;
2763 }
2764
2765 reg_nv_device(gpu, 1, t); // register
2766
2767 tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
2768 reset_gpu_tracker(t);
2769 start_gpu_tracker(t);
2770}
2771
2772void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
2773 struct fifo_queue* fq,
2774 struct task_struct* t)
2775{
2776 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2777 int replica = ikglp_get_idx(sem, fq);
2778 int gpu = replica_to_gpu(aff, replica);
2779 lt_t est_time;
2780
2781 stop_gpu_tracker(t); // stop the tracker before we do anything else.
2782
2783 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2784
2785 // count the number or resource holders
2786 --(*(aff->q_info[replica].nr_cur_users));
2787
2788 reg_nv_device(gpu, 0, t); // unregister
2789
2790 // update estimates
2791 update_gpu_estimate(t, get_gpu_time(t));
2792
2793 TRACE_CUR("%s/%d freed gpu %d (prev = %d). mig type = %d. actual time was %llu. "
2794 "estimated was %llu. diff is %d\n",
2795 t->comm, t->pid, gpu, tsk_rt(t)->last_gpu,
2796 tsk_rt(t)->gpu_migration,
2797 get_gpu_time(t),
2798 est_time,
2799 (long long)get_gpu_time(t) - (long long)est_time);
2800
2801 tsk_rt(t)->last_gpu = gpu;
2802}
2803
2804struct ikglp_affinity_ops gpu_ikglp_affinity =
2805{
2806 .advise_enqueue = gpu_ikglp_advise_enqueue,
2807 .advise_steal = gpu_ikglp_advise_steal,
2808 .advise_donee_selection = gpu_ikglp_advise_donee_selection,
2809 .advise_donor_to_fq = gpu_ikglp_advise_donor_to_fq,
2810
2811 .notify_enqueue = gpu_ikglp_notify_enqueue,
2812 .notify_dequeue = gpu_ikglp_notify_dequeue,
2813 .notify_acquired = gpu_ikglp_notify_acquired,
2814 .notify_freed = gpu_ikglp_notify_freed,
2815
2816 .notify_exit = gpu_ikglp_notify_exit,
2817
2818 .replica_to_resource = gpu_replica_to_resource,
2819};
2820
2821struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
2822 void* __user args)
2823{
2824 return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args);
2825}
2826
2827
2828
2829
2830
2831
2832
2833
2834// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
2835
2836struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
2837{
2838 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2839 int min_count;
2840 int min_nr_users;
2841 struct ikglp_queue_info *shortest;
2842 struct fifo_queue *to_enqueue;
2843 int i;
2844
2845 // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
2846
2847 shortest = &aff->q_info[0];
2848 min_count = shortest->q->count;
2849 min_nr_users = *(shortest->nr_cur_users);
2850
2851 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
2852 ikglp_get_idx(sem, shortest->q),
2853 shortest->q->count,
2854 min_nr_users);
2855
2856 for(i = 1; i < sem->nr_replicas; ++i) {
2857 int len = aff->q_info[i].q->count;
2858
2859 // queue is smaller, or they're equal and the other has a smaller number
2860 // of total users.
2861 //
2862 // tie-break on the shortest number of simult users. this only kicks in
2863 // when there are more than 1 empty queues.
2864 if((len < min_count) ||
2865 ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
2866 shortest = &aff->q_info[i];
2867 min_count = shortest->q->count;
2868 min_nr_users = *(aff->q_info[i].nr_cur_users);
2869 }
2870
2871 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
2872 ikglp_get_idx(sem, aff->q_info[i].q),
2873 aff->q_info[i].q->count,
2874 *(aff->q_info[i].nr_cur_users));
2875 }
2876
2877 to_enqueue = shortest->q;
2878 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
2879 ikglp_get_idx(sem, to_enqueue),
2880 ikglp_get_idx(sem, sem->shortest_fifo_queue));
2881
2882 return to_enqueue;
2883}
2884
2885ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
2886 struct fifo_queue* dst)
2887{
2888 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2889 // TRACE_CUR("Simple GPU ikglp advise_steal invoked\n");
2890 return ikglp_find_hp_waiter_to_steal(sem);
2891}
2892
2893ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff, struct task_struct* donor)
2894{
2895 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2896 ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
2897 return(donee);
2898}
2899
2900ikglp_wait_state_t* simple_gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
2901{
2902 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2903 ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
2904 return(donor);
2905}
2906
2907void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2908{
2909 // TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n");
2910}
2911
2912void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2913{
2914 // TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n");
2915}
2916
2917void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2918{
2919 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2920 int replica = ikglp_get_idx(sem, fq);
2921 int gpu = replica_to_gpu(aff, replica);
2922
2923 // TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n");
2924
2925 // count the number or resource holders
2926 ++(*(aff->q_info[replica].nr_cur_users));
2927
2928 reg_nv_device(gpu, 1, t); // register
2929}
2930
2931void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2932{
2933 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2934 int replica = ikglp_get_idx(sem, fq);
2935 int gpu = replica_to_gpu(aff, replica);
2936
2937 // TRACE_CUR("Simple GPU ikglp notify_freed invoked\n");
2938 // count the number or resource holders
2939 --(*(aff->q_info[replica].nr_cur_users));
2940
2941 reg_nv_device(gpu, 0, t); // unregister
2942}
2943
2944struct ikglp_affinity_ops simple_gpu_ikglp_affinity =
2945{
2946 .advise_enqueue = simple_gpu_ikglp_advise_enqueue,
2947 .advise_steal = simple_gpu_ikglp_advise_steal,
2948 .advise_donee_selection = simple_gpu_ikglp_advise_donee_selection,
2949 .advise_donor_to_fq = simple_gpu_ikglp_advise_donor_to_fq,
2950
2951 .notify_enqueue = simple_gpu_ikglp_notify_enqueue,
2952 .notify_dequeue = simple_gpu_ikglp_notify_dequeue,
2953 .notify_acquired = simple_gpu_ikglp_notify_acquired,
2954 .notify_freed = simple_gpu_ikglp_notify_freed,
2955
2956 .notify_exit = NULL,
2957
2958 .replica_to_resource = gpu_replica_to_resource,
2959};
2960
2961struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
2962 void* __user args)
2963{
2964 return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args);
2965}
2966
2967#endif
2968
2969
2970
2971
2972
2973
2974
2975
2976