aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-04-18 23:18:32 -0400
committerGlenn Elliott <gelliott@cs.unc.edu>2012-04-18 23:18:32 -0400
commitc6d04216a123f8e0b50eb78bbb1eaf646a1ca4e0 (patch)
treec6db90c6fd95a308db4849abebcb09a0dafaedd8
parent149ef3b424a49e6b928c5e23fea83380ed95ea38 (diff)
Added hooks for IKGLP affinity and a little logic.
simple IKGLP is already done. it does: 1) auto gpu de/registration. 2) distruption amongst simultanous users across queues 3) calls default IKGLP routines when appropriate. Remaining work: 1) FQ advisement. 2) Donor stealing advisement. 3) Donee selection advisement.
-rw-r--r--include/litmus/ikglp_lock.h8
-rw-r--r--litmus/ikglp_lock.c730
-rw-r--r--litmus/kfmlp_lock.c39
3 files changed, 705 insertions, 72 deletions
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
index 08e73332c3d4..3fa23251b539 100644
--- a/include/litmus/ikglp_lock.h
+++ b/include/litmus/ikglp_lock.h
@@ -118,9 +118,9 @@ struct ikglp_queue_info
118struct ikglp_affinity_ops 118struct ikglp_affinity_ops
119{ 119{
120 struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t); // select FIFO 120 struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t); // select FIFO
121 struct task_struct* (*advise_steal)(struct ikglp_affinity* aff, wait_queue_t** to_steal, struct fifo_queue** to_steal_from); // select steal from FIFO 121 ikglp_wait_state_t* (*advise_steal)(struct ikglp_affinity* aff); // select steal from FIFO
122 struct task_struct* (*advise_donee_selection)(struct ikglp_affinity* aff, wait_queue_t** donee, struct fifo_queue** donee_queue); // select a donee 122 ikglp_donee_heap_node_t* (*advise_donee_selection)(struct ikglp_affinity* aff); // select a donee
123 struct task_struct* (*advise_doner_to_fq)(struct ikglp_affinity* aff, ikglp_wait_state_t** donor); // select a donor to move to PQ 123 ikglp_wait_state_t* (*advise_doner_to_fq)(struct ikglp_affinity* aff, struct fifo_queue* dst); // select a donor to move to PQ
124 124
125 void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo enqueue 125 void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo enqueue
126 void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo dequeue 126 void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo dequeue
@@ -133,7 +133,7 @@ struct ikglp_affinity
133{ 133{
134 struct affinity_observer obs; 134 struct affinity_observer obs;
135 struct ikglp_affinity_ops *ops; 135 struct ikglp_affinity_ops *ops;
136 struct fifo_queue *q_info; 136 struct ikglp_queue_info *q_info;
137 int *nr_cur_users_on_rsrc; 137 int *nr_cur_users_on_rsrc;
138 int offset; 138 int offset;
139 int nr_simult; 139 int nr_simult;
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 94c954464a96..0e07841b86ba 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -3,9 +3,14 @@
3 3
4#include <litmus/trace.h> 4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h> 5#include <litmus/sched_plugin.h>
6#include <litmus/ikglp_lock.h> 6#include <litmus/fdso.h>
7 7
8//#include <litmus/edf_common.h> 8#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
9#include <litmus/gpu_affinity.h>
10#include <litmus/nvidia_info.h>
11#endif
12
13#include <litmus/ikglp_lock.h>
9 14
10int ikglp_max_heap_base_priority_order(struct binheap_node *a, 15int ikglp_max_heap_base_priority_order(struct binheap_node *a,
11 struct binheap_node *b) 16 struct binheap_node *b)
@@ -16,7 +21,6 @@ int ikglp_max_heap_base_priority_order(struct binheap_node *a,
16 BUG_ON(!d_a); 21 BUG_ON(!d_a);
17 BUG_ON(!d_b); 22 BUG_ON(!d_b);
18 23
19 //return __edf_higher_prio(d_a->task, BASE, d_b->task, BASE);
20 return litmus->__compare(d_a->task, BASE, d_b->task, BASE); 24 return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
21} 25}
22 26
@@ -26,7 +30,6 @@ int ikglp_min_heap_base_priority_order(struct binheap_node *a,
26 ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node); 30 ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
27 ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node); 31 ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
28 32
29 //return __edf_higher_prio(d_b->task, BASE, d_a->task, BASE);
30 return litmus->__compare(d_b->task, BASE, d_a->task, BASE); 33 return litmus->__compare(d_b->task, BASE, d_a->task, BASE);
31} 34}
32 35
@@ -36,7 +39,6 @@ int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a,
36 ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node); 39 ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node);
37 ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node); 40 ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node);
38 41
39 //return __edf_higher_prio(d_a->task, BASE, d_b->task, BASE);
40 return litmus->__compare(d_a->task, BASE, d_b->task, BASE); 42 return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
41} 43}
42 44
@@ -68,7 +70,6 @@ int ikglp_min_heap_donee_order(struct binheap_node *a,
68 } 70 }
69 71
70 // note reversed order 72 // note reversed order
71 //return __edf_higher_prio(prio_b, BASE, prio_a, BASE);
72 return litmus->__compare(prio_b, BASE, prio_a, BASE); 73 return litmus->__compare(prio_b, BASE, prio_a, BASE);
73} 74}
74 75
@@ -103,7 +104,6 @@ static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
103 wait_queue_t, task_list)->private; 104 wait_queue_t, task_list)->private;
104 105
105 /* Compare task prios, find high prio task. */ 106 /* Compare task prios, find high prio task. */
106 //if (queued != skip && edf_higher_prio(queued, found))
107 if(queued != skip && litmus->compare(queued, found)) 107 if(queued != skip && litmus->compare(queued, found))
108 found = queued; 108 found = queued;
109 } 109 }
@@ -246,7 +246,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
246// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size); 246// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
247// print_global_list(sem->top_m.root, 1); 247// print_global_list(sem->top_m.root, 1);
248 } 248 }
249 //else if(__edf_higher_prio(t, BASE, ikglp_mth_highest(sem), BASE)) {
250 else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) { 249 else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
251 ikglp_heap_node_t *evicted = 250 ikglp_heap_node_t *evicted =
252 binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node); 251 binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node);
@@ -367,7 +366,6 @@ static void ikglp_refresh_owners_prio_increase(struct task_struct *t,
367 unsigned long flags) 366 unsigned long flags)
368{ 367{
369 // priority of 't' has increased (note: 't' might already be hp_waiter). 368 // priority of 't' has increased (note: 't' might already be hp_waiter).
370 // if ((t == fq->hp_waiter) || edf_higher_prio(t, fq->hp_waiter)) {
371 if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) { 369 if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) {
372 struct task_struct *old_max_eff_prio; 370 struct task_struct *old_max_eff_prio;
373 struct task_struct *new_max_eff_prio; 371 struct task_struct *new_max_eff_prio;
@@ -478,7 +476,6 @@ static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq,
478 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", 476 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
479 ikglp_get_idx(sem, fq)); 477 ikglp_get_idx(sem, fq));
480 478
481 //if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) {
482 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { 479 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
483 TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n", 480 TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n",
484 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil", 481 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
@@ -540,7 +537,6 @@ static void ikglp_remove_donation_from_owner(struct binheap_node *n,
540 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", 537 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
541 ikglp_get_idx(sem, fq)); 538 ikglp_get_idx(sem, fq));
542 539
543 //if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) {
544 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { 540 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
545 TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n", 541 TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n",
546 ikglp_get_idx(sem, fq)); 542 ikglp_get_idx(sem, fq));
@@ -582,7 +578,6 @@ static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t,
582 // Need to set new effective_priority for owner 578 // Need to set new effective_priority for owner
583 struct task_struct *decreased_prio; 579 struct task_struct *decreased_prio;
584 580
585 //if(__edf_higher_prio(new_max_eff_prio, BASE, t, BASE)) {
586 if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) { 581 if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) {
587 decreased_prio = new_max_eff_prio; 582 decreased_prio = new_max_eff_prio;
588 } 583 }
@@ -618,6 +613,13 @@ static void ikglp_get_immediate(struct task_struct* t,
618 613
619 sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue); 614 sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue);
620 615
616#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
617 if(sem->aff_obs) {
618 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
619 sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t);
620 }
621#endif
622
621 unlock_fine_irqrestore(&sem->lock, flags); 623 unlock_fine_irqrestore(&sem->lock, flags);
622} 624}
623 625
@@ -662,6 +664,12 @@ static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem,
662 sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq); 664 sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq);
663 } 665 }
664 666
667#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
668 if(sem->aff_obs) {
669 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
670 }
671#endif
672
665 TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq)); 673 TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq));
666} 674}
667 675
@@ -732,8 +740,14 @@ static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
732 ikglp_add_global_list(sem, t, &wait->global_heap_node); 740 ikglp_add_global_list(sem, t, &wait->global_heap_node);
733 741
734 // Select a donee 742 // Select a donee
735 donee_node = binheap_top_entry(&sem->donees, 743#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
736 ikglp_donee_heap_node_t, node); 744 donee_node = (sem->aff_obs) ?
745 sem->aff_obs->ops->advise_donee_selection(sem->aff_obs) :
746 binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
747#else
748 donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
749#endif
750
737 donee = donee_node->task; 751 donee = donee_node->task;
738 752
739 TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid); 753 TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid);
@@ -743,7 +757,8 @@ static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
743// TRACE_CUR("donees Before:\n"); 757// TRACE_CUR("donees Before:\n");
744// print_donees(sem, sem->donees.root, 1); 758// print_donees(sem, sem->donees.root, 1);
745 759
746 binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly 760 //binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly
761 binheap_delete(&donee_node->node, &sem->donees);
747 762
748// TRACE_CUR("donees After:\n"); 763// TRACE_CUR("donees After:\n");
749// print_donees(sem, sem->donees.root, 1); 764// print_donees(sem, sem->donees.root, 1);
@@ -813,7 +828,6 @@ static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
813 828
814 if(new_max_eff_prio != old_max_eff_prio) { 829 if(new_max_eff_prio != old_max_eff_prio) {
815 if ((effective_priority(donee) == old_max_eff_prio) || 830 if ((effective_priority(donee) == old_max_eff_prio) ||
816 //(__edf_higher_prio(new_max_eff_prio, BASE, donee, EFFECTIVE))){
817 (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){ 831 (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){
818 TRACE_TASK(t, "Donation increases %s/%d's effective priority\n", 832 TRACE_TASK(t, "Donation increases %s/%d's effective priority\n",
819 donee->comm, donee->pid); 833 donee->comm, donee->pid);
@@ -887,11 +901,20 @@ int ikglp_lock(struct litmus_lock* l)
887 lock_global_irqsave(dgl_lock, flags); 901 lock_global_irqsave(dgl_lock, flags);
888 lock_fine_irqsave(&sem->lock, flags); 902 lock_fine_irqsave(&sem->lock, flags);
889 903
890 if(sem->shortest_fifo_queue->count == 0) { 904
905#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
906 fq = (sem->aff_obs) ?
907 sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
908 sem->shortest_fifo_queue;
909#else
910 fq = sem->shortest_fifo_queue;
911#endif
912
913 if(fq->count == 0) {
891 // take available resource 914 // take available resource
892 replica = ikglp_get_idx(sem, sem->shortest_fifo_queue); 915 //replica = ikglp_get_idx(sem, fq);
893 916
894 ikglp_get_immediate(t, sem->shortest_fifo_queue, sem, flags); // unlocks sem->lock 917 ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
895 918
896 unlock_global_irqrestore(dgl_lock, flags); 919 unlock_global_irqrestore(dgl_lock, flags);
897 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); 920 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
@@ -908,17 +931,16 @@ int ikglp_lock(struct litmus_lock* l)
908 /* FIXME: interruptible would be nice some day */ 931 /* FIXME: interruptible would be nice some day */
909 set_task_state(t, TASK_UNINTERRUPTIBLE); 932 set_task_state(t, TASK_UNINTERRUPTIBLE);
910 933
911 if(sem->shortest_fifo_queue->count < sem->max_fifo_len) { 934 if(fq->count < sem->max_fifo_len) {
912 // enqueue on fq 935 // enqueue on fq
913 ikglp_enqueue_on_fq(sem, sem->shortest_fifo_queue, &wait, flags); // unlocks sem->lock 936 ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
914 } 937 }
915 else { 938 else {
916 939
917 TRACE_CUR("IKGLP fifo queues are full.\n"); 940 TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n");
918 941
919 // no room in fifos. Go to PQ or donors. 942 // no room in fifos. Go to PQ or donors.
920 943
921 //if(__edf_higher_prio(ikglp_mth_highest(sem), BASE, t, BASE)) {
922 if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) { 944 if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
923 // enqueue on PQ 945 // enqueue on PQ
924 ikglp_enqueue_on_pq(sem, &wait); 946 ikglp_enqueue_on_pq(sem, &wait);
@@ -942,13 +964,19 @@ int ikglp_lock(struct litmus_lock* l)
942 fq = ikglp_get_queue(sem, t); 964 fq = ikglp_get_queue(sem, t);
943 BUG_ON(!fq); 965 BUG_ON(!fq);
944 966
945 replica = ikglp_get_idx(sem, fq); 967 //replica = ikglp_get_idx(sem, fq);
946 } 968 }
947 969
948 TRACE_CUR("Acquired lock %d, queue %d\n", 970 TRACE_CUR("Acquired lock %d, queue %d\n",
949 l->ident, replica); 971 l->ident, replica);
950 972
951 return replica; 973#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
974 if(sem->aff_obs) {
975 return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
976 }
977#endif
978
979 return ikglp_get_idx(sem, fq);
952} 980}
953 981
954static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem, 982static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem,
@@ -1006,7 +1034,6 @@ static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal(
1006 1034
1007 for(i = 0; i < sem->nr_replicas; ++i) { 1035 for(i = 0; i < sem->nr_replicas; ++i) {
1008 if( (sem->fifo_queues[i].count > 1) && 1036 if( (sem->fifo_queues[i].count > 1) &&
1009 //(!fq || edf_higher_prio(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) {
1010 (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) { 1037 (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) {
1011 1038
1012 TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n", 1039 TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n",
@@ -1078,6 +1105,12 @@ static void ikglp_steal_to_fq(struct ikglp_semaphore *sem,
1078 __remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node); 1105 __remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node);
1079 --(fq_steal->count); 1106 --(fq_steal->count);
1080 1107
1108#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1109 if(sem->aff_obs) {
1110 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t);
1111 }
1112#endif
1113
1081 fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL); 1114 fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL);
1082 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", 1115 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1083 ikglp_get_idx(sem, fq_steal), 1116 ikglp_get_idx(sem, fq_steal),
@@ -1152,14 +1185,6 @@ int ikglp_unlock(struct litmus_lock* l)
1152 1185
1153 int err = 0; 1186 int err = 0;
1154 1187
1155#ifdef CONFIG_LITMUS_DGL_SUPPORT
1156 dgl_lock = litmus->get_dgl_spinlock(t);
1157#endif
1158
1159 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
1160
1161 lock_global_irqsave(dgl_lock, flags); // TODO: Push this deeper
1162 lock_fine_irqsave(&sem->lock, flags);
1163 1188
1164 fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner. 1189 fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner.
1165 1190
@@ -1168,6 +1193,14 @@ int ikglp_unlock(struct litmus_lock* l)
1168 goto out; 1193 goto out;
1169 } 1194 }
1170 1195
1196#ifdef CONFIG_LITMUS_DGL_SUPPORT
1197 dgl_lock = litmus->get_dgl_spinlock(t);
1198#endif
1199 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
1200
1201 lock_global_irqsave(dgl_lock, flags); // TODO: Push this deeper
1202 lock_fine_irqsave(&sem->lock, flags);
1203
1171 TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq)); 1204 TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq));
1172 1205
1173 1206
@@ -1175,6 +1208,19 @@ int ikglp_unlock(struct litmus_lock* l)
1175 ikglp_del_global_list(sem, t, &fq->global_heap_node); 1208 ikglp_del_global_list(sem, t, &fq->global_heap_node);
1176 binheap_delete(&fq->donee_heap_node.node, &sem->donees); 1209 binheap_delete(&fq->donee_heap_node.node, &sem->donees);
1177 1210
1211 fq->owner = NULL; // no longer owned!!
1212 --(fq->count);
1213 if(fq->count < sem->shortest_fifo_queue->count) {
1214 sem->shortest_fifo_queue = fq;
1215 }
1216
1217#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1218 if(sem->aff_obs) {
1219 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t);
1220 sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t);
1221 }
1222#endif
1223
1178 // Move the next request into the FQ and update heaps as needed. 1224 // Move the next request into the FQ and update heaps as needed.
1179 // We defer re-evaluation of priorities to later in the function. 1225 // We defer re-evaluation of priorities to later in the function.
1180 if(fq->donee_heap_node.donor_info) { // move my doner to FQ 1226 if(fq->donee_heap_node.donor_info) { // move my doner to FQ
@@ -1191,8 +1237,14 @@ int ikglp_unlock(struct litmus_lock* l)
1191 } 1237 }
1192 else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ 1238 else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ
1193 // move other donor to FQ 1239 // move other donor to FQ
1194 other_donor_info = binheap_top_entry(&sem->donors, 1240 // Select a donor
1195 ikglp_wait_state_t, node); 1241#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1242 other_donor_info = (sem->aff_obs) ?
1243 sem->aff_obs->ops->advise_doner_to_fq(sem->aff_obs, fq) :
1244 binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
1245#else
1246 other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
1247#endif
1196 1248
1197 new_on_fq = other_donor_info->task; 1249 new_on_fq = other_donor_info->task;
1198 donee = other_donor_info->donee_info->task; 1250 donee = other_donor_info->donee_info->task;
@@ -1201,7 +1253,6 @@ int ikglp_unlock(struct litmus_lock* l)
1201 other_donor_info->donee_info->donor_info = NULL; // clear the cross-link 1253 other_donor_info->donee_info->donor_info = NULL; // clear the cross-link
1202 binheap_decrease(&other_donor_info->donee_info->node, &sem->donees); 1254 binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
1203 1255
1204
1205 TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d.\n", 1256 TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d.\n",
1206 new_on_fq->comm, new_on_fq->pid, 1257 new_on_fq->comm, new_on_fq->pid,
1207 ikglp_get_idx(sem, fq)); 1258 ikglp_get_idx(sem, fq));
@@ -1222,14 +1273,20 @@ int ikglp_unlock(struct litmus_lock* l)
1222 1273
1223 ikglp_move_pq_to_fq(sem, fq, pq_wait); 1274 ikglp_move_pq_to_fq(sem, fq, pq_wait);
1224 } 1275 }
1225 else if(fq->count == 1) { // No PQ and this queue is empty, so steal 1276 else if(fq->count == 0) { // No PQ and this queue is empty, so steal.
1226 // steal.
1227 ikglp_wait_state_t *fq_wait; 1277 ikglp_wait_state_t *fq_wait;
1228 1278
1229 TRACE_TASK(t, "Looking to steal a request for fq %d...\n", 1279 TRACE_TASK(t, "Looking to steal a request for fq %d...\n",
1230 ikglp_get_idx(sem, fq)); 1280 ikglp_get_idx(sem, fq));
1231 1281
1282#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1283 fq_wait = (sem->aff_obs) ?
1284 sem->aff_obs->ops->advise_steal(sem->aff_obs) :
1285 ikglp_find_hp_waiter_to_steal(sem);
1286#else
1232 fq_wait = ikglp_find_hp_waiter_to_steal(sem); 1287 fq_wait = ikglp_find_hp_waiter_to_steal(sem);
1288#endif
1289
1233 if(fq_wait) { 1290 if(fq_wait) {
1234 to_steal = fq_wait->donee_heap_node.fq; 1291 to_steal = fq_wait->donee_heap_node.fq;
1235 1292
@@ -1267,15 +1324,6 @@ int ikglp_unlock(struct litmus_lock* l)
1267 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); 1324 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
1268 1325
1269 1326
1270 // Updating the owner and updating sem->shortest_fifo_queue
1271 // could have been done sooner, but it is deffered, hoping
1272 // that it will reduce thrashing of sem->shortest_fifo_queue
1273 // assignment.
1274 fq->owner = NULL; // no longer owned!!
1275 --(fq->count);
1276 if(fq->count < sem->shortest_fifo_queue->count) {
1277 sem->shortest_fifo_queue = fq;
1278 }
1279 1327
1280 // Now patch up other priorities. 1328 // Now patch up other priorities.
1281 // 1329 //
@@ -1344,7 +1392,6 @@ int ikglp_unlock(struct litmus_lock* l)
1344 fq->hp_waiter->comm, fq->hp_waiter->pid); 1392 fq->hp_waiter->comm, fq->hp_waiter->pid);
1345 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); // set this just to be sure... 1393 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); // set this just to be sure...
1346 } 1394 }
1347 //else if(edf_higher_prio(new_on_fq, fq->hp_waiter)) {
1348 else if(litmus->compare(new_on_fq, fq->hp_waiter)) { 1395 else if(litmus->compare(new_on_fq, fq->hp_waiter)) {
1349 if(fq->hp_waiter) 1396 if(fq->hp_waiter)
1350 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n", 1397 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
@@ -1382,6 +1429,11 @@ int ikglp_unlock(struct litmus_lock* l)
1382 fq->owner = next; 1429 fq->owner = next;
1383 tsk_rt(next)->blocked_lock = NULL; 1430 tsk_rt(next)->blocked_lock = NULL;
1384 1431
1432#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1433 if(sem->aff_obs) {
1434 sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next);
1435 }
1436#endif
1385 1437
1386 /* determine new hp_waiter if necessary */ 1438 /* determine new hp_waiter if necessary */
1387 if (next == fq->hp_waiter) { 1439 if (next == fq->hp_waiter) {
@@ -1461,12 +1513,12 @@ int ikglp_unlock(struct litmus_lock* l)
1461 wake_up_process(next); 1513 wake_up_process(next);
1462 } 1514 }
1463 1515
1464out:
1465 unlock_fine_irqrestore(&sem->lock, flags); 1516 unlock_fine_irqrestore(&sem->lock, flags);
1466 unlock_global_irqrestore(dgl_lock, flags); 1517 unlock_global_irqrestore(dgl_lock, flags);
1467 1518
1468 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); 1519 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1469 1520
1521out:
1470 return err; 1522 return err;
1471} 1523}
1472 1524
@@ -1597,5 +1649,583 @@ struct litmus_lock* ikglp_new(int m,
1597 INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order); 1649 INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order);
1598 INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order); 1650 INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order);
1599 1651
1652#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1653 sem->aff_obs = NULL;
1654#endif
1655
1600 return &sem->litmus_lock; 1656 return &sem->litmus_lock;
1601} 1657}
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
1688
1689static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
1690{
1691 int gpu = replica % aff->nr_rsrc;
1692 return gpu;
1693}
1694
1695static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica)
1696{
1697 int gpu = __replica_to_gpu(aff, replica) + aff->offset;
1698 return gpu;
1699}
1700
1701static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
1702{
1703 int replica = gpu - aff->offset;
1704 return replica;
1705}
1706
1707
1708int ikglp_aff_obs_close(struct affinity_observer* obs)
1709{
1710 return 0;
1711}
1712
1713void ikglp_aff_obs_free(struct affinity_observer* obs)
1714{
1715 struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs);
1716 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1717 kfree(ikglp_aff->q_info);
1718 kfree(ikglp_aff);
1719}
1720
1721static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops,
1722 struct ikglp_affinity_ops* ikglp_ops,
1723 void* __user args)
1724{
1725 struct ikglp_affinity* ikglp_aff;
1726 struct gpu_affinity_observer_args aff_args;
1727 struct ikglp_semaphore* sem;
1728 int i;
1729 unsigned long flags;
1730
1731 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
1732 return(NULL);
1733 }
1734 if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
1735 return(NULL);
1736 }
1737
1738 sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
1739
1740 if(sem->litmus_lock.type != IKGLP_SEM) {
1741 TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
1742 return(NULL);
1743 }
1744
1745 if((aff_args.nr_simult_users <= 0) ||
1746 (sem->nr_replicas%aff_args.nr_simult_users != 0)) {
1747 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
1748 "(%d) per replica. #replicas should be evenly divisible "
1749 "by #simult_users.\n",
1750 sem->litmus_lock.ident,
1751 sem->nr_replicas,
1752 aff_args.nr_simult_users);
1753 return(NULL);
1754 }
1755
1756 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
1757 TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
1758 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
1759 return(NULL);
1760 }
1761
1762 ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
1763 if(!ikglp_aff) {
1764 return(NULL);
1765 }
1766
1767 ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL);
1768 if(!ikglp_aff->q_info) {
1769 kfree(ikglp_aff);
1770 return(NULL);
1771 }
1772
1773 ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
1774 if(!ikglp_aff->nr_cur_users_on_rsrc) {
1775 kfree(ikglp_aff->q_info);
1776 kfree(ikglp_aff);
1777 return(NULL);
1778 }
1779
1780 affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs);
1781
1782 ikglp_aff->ops = ikglp_ops;
1783 ikglp_aff->offset = aff_args.replica_to_gpu_offset;
1784 ikglp_aff->nr_simult = aff_args.nr_simult_users;
1785 ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
1786
1787 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->nr_replicas / ikglp_aff->nr_rsrc));
1788
1789 for(i = 0; i < sem->nr_replicas; ++i) {
1790 ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
1791 ikglp_aff->q_info[i].estimated_len = 0;
1792
1793 // multiple q_info's will point to the same resource (aka GPU) if
1794 // aff_args.nr_simult_users > 1
1795 ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
1796 }
1797
1798 // attach observer to the lock
1799 raw_spin_lock_irqsave(&sem->real_lock, flags);
1800 sem->aff_obs = ikglp_aff;
1801 raw_spin_unlock_irqrestore(&sem->real_lock, flags);
1802
1803 return &ikglp_aff->obs;
1804}
1805
1806
1807
1808
1809static int gpu_replica_to_resource(struct ikglp_affinity* aff,
1810 struct fifo_queue* fq) {
1811 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
1812 return(replica_to_gpu(aff, ikglp_get_idx(sem, fq)));
1813}
1814
1815
1816// Smart IKGLP Affinity
1817
1818//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff)
1819//{
1820// struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
1821// struct ikglp_queue_info *shortest = &aff->q_info[0];
1822// int i;
1823//
1824// for(i = 1; i < sem->nr_replicas; ++i) {
1825// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
1826// shortest = &aff->q_info[i];
1827// }
1828// }
1829//
1830// return(shortest);
1831//}
1832
1833struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
1834{
1835 // advise_enqueue must be smart as not not break IKGLP rules:
1836 // * Total number of waiters cannot exceed ceil(m/k)*k.
1837 // * Cannot let a queue idle if there exist waiting PQ/donors
1838 // -- needed to guarantee parallel progress of waiters.
1839 //
1840 // Locking protocol is smart enough to noticed that a queue we return is
1841 // full and send new requests to Donors/PQ.
1842 //
1843 // We may be able to relax some of these constraints, but this will have to
1844 // be carefully evaluated.
1845
1846 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
1847
1848 /*
1849 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
1850 lt_t min_len;
1851 int min_nr_users;
1852 struct ikglp_queue_info *shortest;
1853 struct ikglp_queue *to_enqueue;
1854 int i;
1855 int affinity_gpu;
1856
1857 // simply pick the shortest queue if, we have no affinity, or we have
1858 // affinity with the shortest
1859 if(unlikely(tsk_rt(t)->last_gpu < 0)) {
1860 affinity_gpu = aff->offset; // first gpu
1861 TRACE_CUR("no affinity\n");
1862 }
1863 else {
1864 affinity_gpu = tsk_rt(t)->last_gpu;
1865 }
1866
1867 // all things being equal, let's start with the queue with which we have
1868 // affinity. this helps us maintain affinity even when we don't have
1869 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
1870 shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
1871
1872 // if(shortest == aff->shortest_queue) {
1873 // TRACE_CUR("special case: have affinity with shortest queue\n");
1874 // goto out;
1875 // }
1876
1877 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
1878 min_nr_users = *(shortest->nr_cur_users);
1879
1880 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
1881 get_gpu_estimate(t, MIG_LOCAL),
1882 ikglp_get_idx(sem, shortest->q),
1883 min_len);
1884
1885 for(i = 0; i < sem->nr_replicas; ++i) {
1886 if(&aff->q_info[i] != shortest) {
1887
1888 lt_t est_len =
1889 aff->q_info[i].estimated_len +
1890 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
1891
1892 // queue is smaller, or they're equal and the other has a smaller number
1893 // of total users.
1894 //
1895 // tie-break on the shortest number of simult users. this only kicks in
1896 // when there are more than 1 empty queues.
1897 if((est_len < min_len) ||
1898 ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
1899 shortest = &aff->q_info[i];
1900 min_len = est_len;
1901 min_nr_users = *(aff->q_info[i].nr_cur_users);
1902 }
1903
1904 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
1905 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
1906 ikglp_get_idx(sem, aff->q_info[i].q),
1907 est_len);
1908 }
1909 }
1910
1911 to_enqueue = shortest->q;
1912 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
1913 ikglp_get_idx(sem, to_enqueue),
1914 ikglp_get_idx(sem, sem->shortest_queue));
1915
1916 return to_enqueue;
1917 */
1918 return(sem->shortest_fifo_queue);
1919}
1920
1921ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff)
1922{
1923 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
1924
1925 // For now, just steal highest priority waiter
1926 // TODO: Implement affinity-aware stealing.
1927
1928 return ikglp_find_hp_waiter_to_steal(sem);
1929}
1930
1931ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff)
1932{
1933 // TODO: MAKE THIS SMARTER
1934 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
1935 ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
1936 return(donee);
1937}
1938
1939ikglp_wait_state_t* gpu_ikglp_advise_doner_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
1940{
1941 // TODO: MAKE THIS SMARTER
1942 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
1943 ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
1944 return(donor);
1945}
1946
1947
1948
1949void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
1950{
1951 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
1952 int replica = ikglp_get_idx(sem, fq);
1953 int gpu = replica_to_gpu(aff, replica);
1954 struct ikglp_queue_info *info = &aff->q_info[replica];
1955 lt_t est_time;
1956 lt_t est_len_before;
1957
1958 if(current == t) {
1959 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
1960 }
1961
1962 est_len_before = info->estimated_len;
1963 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
1964 info->estimated_len += est_time;
1965
1966 TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
1967 ikglp_get_idx(sem, info->q),
1968 est_len_before, est_time,
1969 info->estimated_len);
1970
1971 // if(aff->shortest_queue == info) {
1972 // // we may no longer be the shortest
1973 // aff->shortest_queue = ikglp_aff_find_shortest(aff);
1974 //
1975 // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
1976 // ikglp_get_idx(sem, aff->shortest_queue->q),
1977 // aff->shortest_queue->q->count,
1978 // aff->shortest_queue->estimated_len);
1979 // }
1980}
1981
1982void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
1983{
1984 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
1985 int replica = ikglp_get_idx(sem, fq);
1986 int gpu = replica_to_gpu(aff, replica);
1987 struct ikglp_queue_info *info = &aff->q_info[replica];
1988 lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
1989
1990 if(est_time > info->estimated_len) {
1991 WARN_ON(1);
1992 info->estimated_len = 0;
1993 }
1994 else {
1995 info->estimated_len -= est_time;
1996 }
1997
1998 TRACE_CUR("fq %d est len is now %llu\n",
1999 ikglp_get_idx(sem, info->q),
2000 info->estimated_len);
2001
2002 // check to see if we're the shortest queue now.
2003 // if((aff->shortest_queue != info) &&
2004 // (aff->shortest_queue->estimated_len > info->estimated_len)) {
2005 //
2006 // aff->shortest_queue = info;
2007 //
2008 // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
2009 // ikglp_get_idx(sem, info->q),
2010 // info->q->count,
2011 // info->estimated_len);
2012 // }
2013}
2014
2015void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2016{
2017 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2018 int replica = ikglp_get_idx(sem, fq);
2019 int gpu = replica_to_gpu(aff, replica);
2020
2021 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
2022
2023 TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n",
2024 t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
2025
2026 // count the number or resource holders
2027 ++(*(aff->q_info[replica].nr_cur_users));
2028
2029 reg_nv_device(gpu, 1, t); // register
2030
2031 tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
2032 reset_gpu_tracker(t);
2033 start_gpu_tracker(t);
2034}
2035
2036void gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2037{
2038 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2039 int replica = ikglp_get_idx(sem, fq);
2040 int gpu = replica_to_gpu(aff, replica);
2041 lt_t est_time;
2042
2043 stop_gpu_tracker(t); // stop the tracker before we do anything else.
2044
2045 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2046
2047 tsk_rt(t)->last_gpu = gpu;
2048
2049 // count the number or resource holders
2050 --(*(aff->q_info[replica].nr_cur_users));
2051
2052 reg_nv_device(gpu, 0, t); // unregister
2053
2054 // update estimates
2055 update_gpu_estimate(t, get_gpu_time(t));
2056
2057 TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. estimated was %llu. diff is %d\n",
2058 t->comm, t->pid, gpu,
2059 get_gpu_time(t),
2060 est_time,
2061 (long long)get_gpu_time(t) - (long long)est_time);
2062}
2063
2064struct ikglp_affinity_ops gpu_ikglp_affinity =
2065{
2066 .advise_enqueue = gpu_ikglp_advise_enqueue,
2067 .advise_steal = gpu_ikglp_advise_steal,
2068 .advise_donee_selection = gpu_ikglp_advise_donee_selection,
2069 .advise_doner_to_fq = gpu_ikglp_advise_doner_to_fq,
2070
2071 .notify_enqueue = gpu_ikglp_notify_enqueue,
2072 .notify_dequeue = gpu_ikglp_notify_dequeue,
2073 .notify_acquired = gpu_ikglp_notify_acquired,
2074 .notify_freed = gpu_ikglp_notify_freed,
2075
2076 .replica_to_resource = gpu_replica_to_resource,
2077};
2078
2079struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
2080 void* __user args)
2081{
2082 return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args);
2083}
2084
2085
2086
2087
2088
2089
2090
2091
2092// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
2093
2094struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
2095{
2096 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2097 int min_count;
2098 int min_nr_users;
2099 struct ikglp_queue_info *shortest;
2100 struct fifo_queue *to_enqueue;
2101 int i;
2102
2103 // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
2104
2105 shortest = &aff->q_info[0];
2106 min_count = shortest->q->count;
2107 min_nr_users = *(shortest->nr_cur_users);
2108
2109 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
2110 ikglp_get_idx(sem, shortest->q),
2111 shortest->q->count,
2112 min_nr_users);
2113
2114 for(i = 1; i < sem->nr_replicas; ++i) {
2115 int len = aff->q_info[i].q->count;
2116
2117 // queue is smaller, or they're equal and the other has a smaller number
2118 // of total users.
2119 //
2120 // tie-break on the shortest number of simult users. this only kicks in
2121 // when there are more than 1 empty queues.
2122 if((len < min_count) ||
2123 ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
2124 shortest = &aff->q_info[i];
2125 min_count = shortest->q->count;
2126 min_nr_users = *(aff->q_info[i].nr_cur_users);
2127 }
2128
2129 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
2130 ikglp_get_idx(sem, aff->q_info[i].q),
2131 aff->q_info[i].q->count,
2132 *(aff->q_info[i].nr_cur_users));
2133 }
2134
2135 to_enqueue = shortest->q;
2136 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
2137 ikglp_get_idx(sem, to_enqueue),
2138 ikglp_get_idx(sem, sem->shortest_fifo_queue));
2139
2140 return to_enqueue;
2141}
2142
2143ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff)
2144{
2145 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2146 // TRACE_CUR("Simple GPU ikglp advise_steal invoked\n");
2147 return ikglp_find_hp_waiter_to_steal(sem);
2148}
2149
2150ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff)
2151{
2152 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2153 ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
2154 return(donee);
2155}
2156
2157ikglp_wait_state_t* simple_gpu_ikglp_advise_doner_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
2158{
2159 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2160 ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
2161 return(donor);
2162}
2163
2164void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2165{
2166 // TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n");
2167}
2168
2169void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2170{
2171 // TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n");
2172}
2173
2174void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2175{
2176 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2177 int replica = ikglp_get_idx(sem, fq);
2178 int gpu = replica_to_gpu(aff, replica);
2179
2180 // TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n");
2181
2182 // count the number or resource holders
2183 ++(*(aff->q_info[replica].nr_cur_users));
2184
2185 reg_nv_device(gpu, 1, t); // register
2186}
2187
2188void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2189{
2190 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2191 int replica = ikglp_get_idx(sem, fq);
2192 int gpu = replica_to_gpu(aff, replica);
2193
2194 // TRACE_CUR("Simple GPU ikglp notify_freed invoked\n");
2195 // count the number or resource holders
2196 --(*(aff->q_info[replica].nr_cur_users));
2197
2198 reg_nv_device(gpu, 0, t); // unregister
2199}
2200
2201struct ikglp_affinity_ops simple_gpu_ikglp_affinity =
2202{
2203 .advise_enqueue = simple_gpu_ikglp_advise_enqueue,
2204 .advise_steal = simple_gpu_ikglp_advise_steal,
2205 .advise_donee_selection = simple_gpu_ikglp_advise_donee_selection,
2206 .advise_doner_to_fq = simple_gpu_ikglp_advise_doner_to_fq,
2207
2208 .notify_enqueue = simple_gpu_ikglp_notify_enqueue,
2209 .notify_dequeue = simple_gpu_ikglp_notify_dequeue,
2210 .notify_acquired = simple_gpu_ikglp_notify_acquired,
2211 .notify_freed = simple_gpu_ikglp_notify_freed,
2212
2213 .replica_to_resource = gpu_replica_to_resource,
2214};
2215
2216struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
2217 void* __user args)
2218{
2219 return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args);
2220}
2221
2222#endif
2223
2224
2225
2226
2227
2228
2229
2230
2231
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index d0a6bd364c43..0b64977789a6 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -5,13 +5,13 @@
5#include <litmus/sched_plugin.h> 5#include <litmus/sched_plugin.h>
6#include <litmus/fdso.h> 6#include <litmus/fdso.h>
7 7
8#include <litmus/kfmlp_lock.h>
9
10#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) 8#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
11#include <litmus/gpu_affinity.h> 9#include <litmus/gpu_affinity.h>
12#include <litmus/nvidia_info.h> 10#include <litmus/nvidia_info.h>
13#endif 11#endif
14 12
13#include <litmus/kfmlp_lock.h>
14
15static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, 15static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
16 struct kfmlp_queue* queue) 16 struct kfmlp_queue* queue)
17{ 17{
@@ -508,6 +508,10 @@ struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
508 508
509 sem->shortest_queue = &sem->queues[0]; 509 sem->shortest_queue = &sem->queues[0];
510 510
511#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
512 sem->aff_obs = NULL;
513#endif
514
511 return &sem->litmus_lock; 515 return &sem->litmus_lock;
512} 516}
513 517
@@ -584,7 +588,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
584 } 588 }
585 589
586 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { 590 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
587 TRACE_CUR("System does not support #simult_users >%d. %d requested.\n", 591 TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
588 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); 592 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
589 return(NULL); 593 return(NULL);
590 } 594 }
@@ -628,7 +632,6 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
628 // attach observer to the lock 632 // attach observer to the lock
629 spin_lock_irqsave(&sem->lock, flags); 633 spin_lock_irqsave(&sem->lock, flags);
630 sem->aff_obs = kfmlp_aff; 634 sem->aff_obs = kfmlp_aff;
631 //kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)];
632 spin_unlock_irqrestore(&sem->lock, flags); 635 spin_unlock_irqrestore(&sem->lock, flags);
633 636
634 return &kfmlp_aff->obs; 637 return &kfmlp_aff->obs;
@@ -646,20 +649,20 @@ static int gpu_replica_to_resource(struct kfmlp_affinity* aff,
646 649
647// Smart KFMLP Affinity 650// Smart KFMLP Affinity
648 651
649static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff) 652//static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
650{ 653//{
651 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 654// struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
652 struct kfmlp_queue_info *shortest = &aff->q_info[0]; 655// struct kfmlp_queue_info *shortest = &aff->q_info[0];
653 int i; 656// int i;
654 657//
655 for(i = 1; i < sem->num_resources; ++i) { 658// for(i = 1; i < sem->num_resources; ++i) {
656 if(aff->q_info[i].estimated_len < shortest->estimated_len) { 659// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
657 shortest = &aff->q_info[i]; 660// shortest = &aff->q_info[i];
658 } 661// }
659 } 662// }
660 663//
661 return(shortest); 664// return(shortest);
662} 665//}
663 666
664struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) 667struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
665{ 668{