Added hooks for IKGLP affinity and a little logic.

simple IKGLP is already done. it does: 1) auto gpu de/registration. 2) distruption amongst simultanous users across queues 3) calls default IKGLP routines when appropriate. Remaining work: 1) FQ advisement. 2) Donor stealing advisement. 3) Donee selection advisement.
author: Glenn Elliott <gelliott@cs.unc.edu> 2012-04-18 23:18:32 -0400
committer: Glenn Elliott <gelliott@cs.unc.edu> 2012-04-18 23:18:32 -0400
commit: c6d04216a123f8e0b50eb78bbb1eaf646a1ca4e0 (patch)
tree: c6db90c6fd95a308db4849abebcb09a0dafaedd8
parent: 149ef3b424a49e6b928c5e23fea83380ed95ea38 (diff)
3 files changed, 705 insertions, 72 deletions
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
index 08e73332c3d4..3fa23251b539 100644
--- a/include/litmus/ikglp_lock.h
+++ b/include/litmus/ikglp_lock.h
@@ -118,9 +118,9 @@ struct ikglp_queue_info
 struct ikglp_affinity_ops
 {
        struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t);        // select FIFO
-        struct task_struct* (*advise_steal)(struct ikglp_affinity* aff, wait_queue_t** to_steal, struct fifo_queue** to_steal_from);    // select steal from FIFO
+        ikglp_wait_state_t* (*advise_steal)(struct ikglp_affinity* aff);        // select steal from FIFO
-        struct task_struct* (*advise_donee_selection)(struct ikglp_affinity* aff, wait_queue_t** donee, struct fifo_queue** donee_queue);       // select a donee
+        ikglp_donee_heap_node_t* (*advise_donee_selection)(struct ikglp_affinity* aff); // select a donee
-        struct task_struct* (*advise_doner_to_fq)(struct ikglp_affinity* aff, ikglp_wait_state_t** donor);      // select a donor to move to PQ
+        ikglp_wait_state_t* (*advise_doner_to_fq)(struct ikglp_affinity* aff, struct fifo_queue* dst);  // select a donor to move to PQ
        void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);       // fifo enqueue
        void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);       // fifo dequeue
@@ -133,7 +133,7 @@ struct ikglp_affinity
 {
        struct affinity_observer obs;
        struct ikglp_affinity_ops *ops;
-        struct fifo_queue *q_info;
+        struct ikglp_queue_info *q_info;
        int *nr_cur_users_on_rsrc;
        int offset;
        int nr_simult;
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 94c954464a96..0e07841b86ba 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -3,9 +3,14 @@
 #include <litmus/trace.h>
 #include <litmus/sched_plugin.h>
-#include <litmus/ikglp_lock.h>
+#include <litmus/fdso.h>
-//#include <litmus/edf_common.h>
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#include <litmus/nvidia_info.h>
+#endif
+#include <litmus/ikglp_lock.h>
 int ikglp_max_heap_base_priority_order(struct binheap_node *a,
                                                                                   struct binheap_node *b)
@@ -16,7 +21,6 @@ int ikglp_max_heap_base_priority_order(struct binheap_node *a,
        BUG_ON(!d_a);
        BUG_ON(!d_b);
-        //return __edf_higher_prio(d_a->task, BASE, d_b->task, BASE);
        return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
 }
@@ -26,7 +30,6 @@ int ikglp_min_heap_base_priority_order(struct binheap_node *a,
        ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
        ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
-        //return __edf_higher_prio(d_b->task, BASE, d_a->task, BASE);
        return litmus->__compare(d_b->task, BASE, d_a->task, BASE);
 }
@@ -36,7 +39,6 @@ int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a,
        ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node);
        ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node);
-        //return __edf_higher_prio(d_a->task, BASE, d_b->task, BASE);
        return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
 }
@@ -68,7 +70,6 @@ int ikglp_min_heap_donee_order(struct binheap_node *a,
        }
        // note reversed order
-        //return __edf_higher_prio(prio_b, BASE, prio_a, BASE);
        return litmus->__compare(prio_b, BASE, prio_a, BASE);
 }
@@ -103,7 +104,6 @@ static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
                                                                                        wait_queue_t, task_list)->private;
                /* Compare task prios, find high prio task. */
-                //if (queued != skip && edf_higher_prio(queued, found))
                if(queued != skip && litmus->compare(queued, found))
                        found = queued;
        }
@@ -246,7 +246,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
 //              TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
 //              print_global_list(sem->top_m.root, 1);
        }
-        //else if(__edf_higher_prio(t, BASE, ikglp_mth_highest(sem), BASE)) {
        else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
                ikglp_heap_node_t *evicted =
                        binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node);
@@ -367,7 +366,6 @@ static void ikglp_refresh_owners_prio_increase(struct task_struct *t,
                                                                                           unsigned long flags)
 {
        // priority of 't' has increased (note: 't' might already be hp_waiter).
-        //      if ((t == fq->hp_waiter) || edf_higher_prio(t, fq->hp_waiter)) {
        if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) {
                struct task_struct *old_max_eff_prio;
                struct task_struct *new_max_eff_prio;
@@ -478,7 +476,6 @@ static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq,
                TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
                                  ikglp_get_idx(sem, fq));
-                //if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) {
                if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
                        TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n",
                                          (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
@@ -540,7 +537,6 @@ static void ikglp_remove_donation_from_owner(struct binheap_node *n,
                TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
                                  ikglp_get_idx(sem, fq));
-                //if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) {
                if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
                        TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n",
                                          ikglp_get_idx(sem, fq));
@@ -582,7 +578,6 @@ static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t,
                // Need to set new effective_priority for owner
                struct task_struct *decreased_prio;
-                //if(__edf_higher_prio(new_max_eff_prio, BASE, t, BASE)) {
                if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) {
                        decreased_prio = new_max_eff_prio;
                }
@@ -618,6 +613,13 @@ static void ikglp_get_immediate(struct task_struct* t,
        sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue);
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        if(sem->aff_obs) {
+                sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
+                sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t);
+        }
+#endif
        unlock_fine_irqrestore(&sem->lock, flags);
 }
@@ -662,6 +664,12 @@ static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem,
                sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq);
        }
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        if(sem->aff_obs) {
+                sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
+        }
+#endif
        TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq));
 }
@@ -732,8 +740,14 @@ static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
        ikglp_add_global_list(sem, t, &wait->global_heap_node);
        // Select a donee
-        donee_node = binheap_top_entry(&sem->donees,
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-                                                                   ikglp_donee_heap_node_t, node);
+        donee_node = (sem->aff_obs) ?
+                sem->aff_obs->ops->advise_donee_selection(sem->aff_obs) :
+                binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
+#else
+        donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
+#endif
        donee = donee_node->task;
        TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid);
@@ -743,7 +757,8 @@ static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
 //      TRACE_CUR("donees Before:\n");
 //      print_donees(sem, sem->donees.root, 1);
-        binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node);  // will re-add it shortly
+        //binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node);  // will re-add it shortly
+        binheap_delete(&donee_node->node, &sem->donees);
 //      TRACE_CUR("donees After:\n");
 //      print_donees(sem, sem->donees.root, 1);
@@ -813,7 +828,6 @@ static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
        if(new_max_eff_prio != old_max_eff_prio) {
                if ((effective_priority(donee) == old_max_eff_prio) ||
-                        //(__edf_higher_prio(new_max_eff_prio, BASE, donee, EFFECTIVE))){
                        (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){
                        TRACE_TASK(t, "Donation increases %s/%d's effective priority\n",
                                           donee->comm, donee->pid);
@@ -887,11 +901,20 @@ int ikglp_lock(struct litmus_lock* l)
        lock_global_irqsave(dgl_lock, flags);
        lock_fine_irqsave(&sem->lock, flags);
-        if(sem->shortest_fifo_queue->count == 0) {
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        fq = (sem->aff_obs) ?
+                sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
+                sem->shortest_fifo_queue;
+#else
+        fq = sem->shortest_fifo_queue;
+#endif
+        if(fq->count == 0) {
                // take available resource
-                replica = ikglp_get_idx(sem, sem->shortest_fifo_queue);
+                //replica = ikglp_get_idx(sem, fq);
-                ikglp_get_immediate(t, sem->shortest_fifo_queue, sem, flags);  // unlocks sem->lock
+                ikglp_get_immediate(t, fq, sem, flags);  // unlocks sem->lock
                unlock_global_irqrestore(dgl_lock, flags);
                raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
@@ -908,17 +931,16 @@ int ikglp_lock(struct litmus_lock* l)
                /* FIXME: interruptible would be nice some day */
                set_task_state(t, TASK_UNINTERRUPTIBLE);
-                if(sem->shortest_fifo_queue->count < sem->max_fifo_len) {
+                if(fq->count < sem->max_fifo_len) {
                        // enqueue on fq
-                        ikglp_enqueue_on_fq(sem, sem->shortest_fifo_queue, &wait, flags);  // unlocks sem->lock
+                        ikglp_enqueue_on_fq(sem, fq, &wait, flags);  // unlocks sem->lock
                }
                else {
-                        TRACE_CUR("IKGLP fifo queues are full.\n");
+                        TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n");
                        // no room in fifos.  Go to PQ or donors.
-                        //if(__edf_higher_prio(ikglp_mth_highest(sem), BASE, t, BASE)) {
                        if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
                                // enqueue on PQ
                                ikglp_enqueue_on_pq(sem, &wait);
@@ -942,13 +964,19 @@ int ikglp_lock(struct litmus_lock* l)
                fq = ikglp_get_queue(sem, t);
                BUG_ON(!fq);
-                replica = ikglp_get_idx(sem, fq);
+                //replica = ikglp_get_idx(sem, fq);
        }
        TRACE_CUR("Acquired lock %d, queue %d\n",
                          l->ident, replica);
-        return replica;
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        if(sem->aff_obs) {
+                return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
+        }
+#endif
+        return ikglp_get_idx(sem, fq);
 }
 static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem,
@@ -1006,7 +1034,6 @@ static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal(
        for(i = 0; i < sem->nr_replicas; ++i) {
                if( (sem->fifo_queues[i].count > 1) &&
-                   //(!fq || edf_higher_prio(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) {
                   (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) {
                        TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n",
@@ -1078,6 +1105,12 @@ static void ikglp_steal_to_fq(struct ikglp_semaphore *sem,
        __remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node);
        --(fq_steal->count);
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        if(sem->aff_obs) {
+                sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t);
+        }
+#endif
        fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL);
        TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
                           ikglp_get_idx(sem, fq_steal),
@@ -1152,14 +1185,6 @@ int ikglp_unlock(struct litmus_lock* l)
        int err = 0;
-#ifdef CONFIG_LITMUS_DGL_SUPPORT
-        dgl_lock = litmus->get_dgl_spinlock(t);
-#endif
-        raw_spin_lock_irqsave(&sem->real_lock, real_flags);
-        lock_global_irqsave(dgl_lock, flags);  // TODO: Push this deeper
-        lock_fine_irqsave(&sem->lock, flags);
        fq = ikglp_get_queue(sem, t);  // returns NULL if 't' is not owner.
@@ -1168,6 +1193,14 @@ int ikglp_unlock(struct litmus_lock* l)
                goto out;
        }
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+        dgl_lock = litmus->get_dgl_spinlock(t);
+#endif
+        raw_spin_lock_irqsave(&sem->real_lock, real_flags);
+        lock_global_irqsave(dgl_lock, flags);  // TODO: Push this deeper
+        lock_fine_irqsave(&sem->lock, flags);
        TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq));
@@ -1175,6 +1208,19 @@ int ikglp_unlock(struct litmus_lock* l)
        ikglp_del_global_list(sem, t, &fq->global_heap_node);
        binheap_delete(&fq->donee_heap_node.node, &sem->donees);
+        fq->owner = NULL;  // no longer owned!!
+        --(fq->count);
+        if(fq->count < sem->shortest_fifo_queue->count) {
+                sem->shortest_fifo_queue = fq;
+        }
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        if(sem->aff_obs) {
+                sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t);
+                sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t);
+        }
+#endif
        // Move the next request into the FQ and update heaps as needed.
        // We defer re-evaluation of priorities to later in the function.
        if(fq->donee_heap_node.donor_info) {  // move my doner to FQ
@@ -1191,8 +1237,14 @@ int ikglp_unlock(struct litmus_lock* l)
        }
        else if(!binheap_empty(&sem->donors)) {  // No donor, so move any donor to FQ
                                                                                         // move other donor to FQ
-                other_donor_info = binheap_top_entry(&sem->donors,
+                // Select a donor
-                                                                                         ikglp_wait_state_t, node);
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+                other_donor_info = (sem->aff_obs) ?
+                        sem->aff_obs->ops->advise_doner_to_fq(sem->aff_obs, fq) :
+                        binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
+#else
+                other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
+#endif
                new_on_fq = other_donor_info->task;
                donee = other_donor_info->donee_info->task;
@@ -1201,7 +1253,6 @@ int ikglp_unlock(struct litmus_lock* l)
                other_donor_info->donee_info->donor_info = NULL;  // clear the cross-link
                binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
                TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d.\n",
                                   new_on_fq->comm, new_on_fq->pid,
                                   ikglp_get_idx(sem, fq));
@@ -1222,14 +1273,20 @@ int ikglp_unlock(struct litmus_lock* l)
                ikglp_move_pq_to_fq(sem, fq, pq_wait);
        }
-        else if(fq->count == 1) {  // No PQ and this queue is empty, so steal
+        else if(fq->count == 0) {  // No PQ and this queue is empty, so steal.
-                                                           // steal.
                ikglp_wait_state_t *fq_wait;
                TRACE_TASK(t, "Looking to steal a request for fq %d...\n",
                                   ikglp_get_idx(sem, fq));
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+                fq_wait = (sem->aff_obs) ?
+                        sem->aff_obs->ops->advise_steal(sem->aff_obs) :
+                        ikglp_find_hp_waiter_to_steal(sem);
+#else
                fq_wait = ikglp_find_hp_waiter_to_steal(sem);
+#endif
                if(fq_wait) {
                        to_steal = fq_wait->donee_heap_node.fq;
@@ -1267,15 +1324,6 @@ int ikglp_unlock(struct litmus_lock* l)
        raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
-        // Updating the owner and updating sem->shortest_fifo_queue
-        // could have been done sooner, but it is deffered, hoping
-        // that it will reduce thrashing of sem->shortest_fifo_queue
-        // assignment.
-        fq->owner = NULL;  // no longer owned!!
-        --(fq->count);
-        if(fq->count < sem->shortest_fifo_queue->count) {
-                sem->shortest_fifo_queue = fq;
-        }
        // Now patch up other priorities.
        //
@@ -1344,7 +1392,6 @@ int ikglp_unlock(struct litmus_lock* l)
                                           fq->hp_waiter->comm, fq->hp_waiter->pid);
                        fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);  // set this just to be sure...
                }
-                //else if(edf_higher_prio(new_on_fq, fq->hp_waiter)) {
                else if(litmus->compare(new_on_fq, fq->hp_waiter)) {
                        if(fq->hp_waiter)
                                TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
@@ -1382,6 +1429,11 @@ int ikglp_unlock(struct litmus_lock* l)
                fq->owner = next;
                tsk_rt(next)->blocked_lock = NULL;
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+                if(sem->aff_obs) {
+                        sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next);
+                }
+#endif
                /* determine new hp_waiter if necessary */
                if (next == fq->hp_waiter) {
@@ -1461,12 +1513,12 @@ int ikglp_unlock(struct litmus_lock* l)
                wake_up_process(next);
        }
-out:
        unlock_fine_irqrestore(&sem->lock, flags);
        unlock_global_irqrestore(dgl_lock, flags);
        raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
+out:
        return err;
 }
@@ -1597,5 +1649,583 @@ struct litmus_lock* ikglp_new(int m,
        INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order);
        INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order);
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        sem->aff_obs = NULL;
+#endif
        return &sem->litmus_lock;
 }
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
+{
+        int gpu = replica % aff->nr_rsrc;
+        return gpu;
+}
+static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica)
+{
+        int gpu = __replica_to_gpu(aff, replica) + aff->offset;
+        return gpu;
+}
+static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
+{
+        int replica = gpu - aff->offset;
+        return replica;
+}
+int ikglp_aff_obs_close(struct affinity_observer* obs)
+{
+        return 0;
+}
+void ikglp_aff_obs_free(struct affinity_observer* obs)
+{
+        struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs);
+        kfree(ikglp_aff->nr_cur_users_on_rsrc);
+        kfree(ikglp_aff->q_info);
+        kfree(ikglp_aff);
+}
+static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops,
+                                                                                                   struct ikglp_affinity_ops* ikglp_ops,
+                                                                                                   void* __user args)
+{
+        struct ikglp_affinity* ikglp_aff;
+        struct gpu_affinity_observer_args aff_args;
+        struct ikglp_semaphore* sem;
+        int i;
+        unsigned long flags;
+        if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
+                return(NULL);
+        }
+        if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
+                return(NULL);
+        }
+        sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
+        if(sem->litmus_lock.type != IKGLP_SEM) {
+                TRACE_CUR("Lock type not supported.  Type = %d\n", sem->litmus_lock.type);
+                return(NULL);
+        }
+        if((aff_args.nr_simult_users <= 0) ||
+           (sem->nr_replicas%aff_args.nr_simult_users != 0)) {
+                TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
+                                  "(%d) per replica.  #replicas should be evenly divisible "
+                                  "by #simult_users.\n",
+                                  sem->litmus_lock.ident,
+                                  sem->nr_replicas,
+                                  aff_args.nr_simult_users);
+                return(NULL);
+        }
+        if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
+                TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
+                                  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
+                return(NULL);
+        }
+        ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
+        if(!ikglp_aff) {
+                return(NULL);
+        }
+        ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL);
+        if(!ikglp_aff->q_info) {
+                kfree(ikglp_aff);
+                return(NULL);
+        }
+        ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
+        if(!ikglp_aff->nr_cur_users_on_rsrc) {
+                kfree(ikglp_aff->q_info);
+                kfree(ikglp_aff);
+                return(NULL);
+        }
+        affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs);
+        ikglp_aff->ops = ikglp_ops;
+        ikglp_aff->offset = aff_args.replica_to_gpu_offset;
+        ikglp_aff->nr_simult = aff_args.nr_simult_users;
+        ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
+        memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->nr_replicas / ikglp_aff->nr_rsrc));
+        for(i = 0; i < sem->nr_replicas; ++i) {
+                ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
+                ikglp_aff->q_info[i].estimated_len = 0;
+                // multiple q_info's will point to the same resource (aka GPU) if
+                // aff_args.nr_simult_users > 1
+                ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
+        }
+        // attach observer to the lock
+        raw_spin_lock_irqsave(&sem->real_lock, flags);
+        sem->aff_obs = ikglp_aff;
+        raw_spin_unlock_irqrestore(&sem->real_lock, flags);
+        return &ikglp_aff->obs;
+}
+static int gpu_replica_to_resource(struct ikglp_affinity* aff,
+                                                                   struct fifo_queue* fq) {
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        return(replica_to_gpu(aff, ikglp_get_idx(sem, fq)));
+}
+// Smart IKGLP Affinity
+//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff)
+//{
+//      struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+//      struct ikglp_queue_info *shortest = &aff->q_info[0];
+//      int i;
+//
+//      for(i = 1; i < sem->nr_replicas; ++i) {
+//              if(aff->q_info[i].estimated_len < shortest->estimated_len) {
+//                      shortest = &aff->q_info[i];
+//              }
+//      }
+//
+//      return(shortest);
+//}
+struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
+{
+        // advise_enqueue must be smart as not not break IKGLP rules:
+        //  * Total number of waiters cannot exceed ceil(m/k)*k.
+        //  * Cannot let a queue idle if there exist waiting PQ/donors
+        //      -- needed to guarantee parallel progress of waiters.
+        //
+        // Locking protocol is smart enough to noticed that a queue we return is
+        // full and send new requests to Donors/PQ.
+        //
+        // We may be able to relax some of these constraints, but this will have to
+        // be carefully evaluated.
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        /*
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        lt_t min_len;
+        int min_nr_users;
+        struct ikglp_queue_info *shortest;
+        struct ikglp_queue *to_enqueue;
+        int i;
+        int affinity_gpu;
+        // simply pick the shortest queue if, we have no affinity, or we have
+        // affinity with the shortest
+        if(unlikely(tsk_rt(t)->last_gpu < 0)) {
+                affinity_gpu = aff->offset;  // first gpu
+                TRACE_CUR("no affinity\n");
+        }
+        else {
+                affinity_gpu = tsk_rt(t)->last_gpu;
+        }
+        // all things being equal, let's start with the queue with which we have
+        // affinity.  this helps us maintain affinity even when we don't have
+        // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
+        shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
+        //      if(shortest == aff->shortest_queue) {
+        //              TRACE_CUR("special case: have affinity with shortest queue\n");
+        //              goto out;
+        //      }
+        min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
+        min_nr_users = *(shortest->nr_cur_users);
+        TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
+                          get_gpu_estimate(t, MIG_LOCAL),
+                          ikglp_get_idx(sem, shortest->q),
+                          min_len);
+        for(i = 0; i < sem->nr_replicas; ++i) {
+                if(&aff->q_info[i] != shortest) {
+                        lt_t est_len =
+                        aff->q_info[i].estimated_len +
+                        get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
+                        // queue is smaller, or they're equal and the other has a smaller number
+                        // of total users.
+                        //
+                        // tie-break on the shortest number of simult users.  this only kicks in
+                        // when there are more than 1 empty queues.
+                        if((est_len < min_len) ||
+                           ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
+                                shortest = &aff->q_info[i];
+                                min_len = est_len;
+                                min_nr_users = *(aff->q_info[i].nr_cur_users);
+                        }
+                        TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
+                                          get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
+                                          ikglp_get_idx(sem, aff->q_info[i].q),
+                                          est_len);
+                }
+        }
+        to_enqueue = shortest->q;
+        TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
+                          ikglp_get_idx(sem, to_enqueue),
+                          ikglp_get_idx(sem, sem->shortest_queue));
+        return to_enqueue;
+         */
+        return(sem->shortest_fifo_queue);
+}
+ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff)
+{
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        // For now, just steal highest priority waiter
+        // TODO: Implement affinity-aware stealing.
+        return ikglp_find_hp_waiter_to_steal(sem);
+}
+ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff)
+{
+        // TODO: MAKE THIS SMARTER
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
+        return(donee);
+}
+ikglp_wait_state_t* gpu_ikglp_advise_doner_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
+{
+        // TODO: MAKE THIS SMARTER
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
+        return(donor);
+}
+void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        int replica = ikglp_get_idx(sem, fq);
+        int gpu = replica_to_gpu(aff, replica);
+        struct ikglp_queue_info *info = &aff->q_info[replica];
+        lt_t est_time;
+        lt_t est_len_before;
+        if(current == t) {
+                tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
+        }
+        est_len_before = info->estimated_len;
+        est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+        info->estimated_len += est_time;
+        TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
+                          ikglp_get_idx(sem, info->q),
+                          est_len_before, est_time,
+                          info->estimated_len);
+        //      if(aff->shortest_queue == info) {
+        //              // we may no longer be the shortest
+        //              aff->shortest_queue = ikglp_aff_find_shortest(aff);
+        //
+        //              TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+        //                                ikglp_get_idx(sem, aff->shortest_queue->q),
+        //                                aff->shortest_queue->q->count,
+        //                                aff->shortest_queue->estimated_len);
+        //      }
+}
+void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        int replica = ikglp_get_idx(sem, fq);
+        int gpu = replica_to_gpu(aff, replica);
+        struct ikglp_queue_info *info = &aff->q_info[replica];
+        lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+        if(est_time > info->estimated_len) {
+                WARN_ON(1);
+                info->estimated_len = 0;
+        }
+        else {
+                info->estimated_len -= est_time;
+        }
+        TRACE_CUR("fq %d est len is now %llu\n",
+                          ikglp_get_idx(sem, info->q),
+                          info->estimated_len);
+        // check to see if we're the shortest queue now.
+        //      if((aff->shortest_queue != info) &&
+        //         (aff->shortest_queue->estimated_len > info->estimated_len)) {
+        //
+        //              aff->shortest_queue = info;
+        //
+        //              TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+        //                                ikglp_get_idx(sem, info->q),
+        //                                info->q->count,
+        //                                info->estimated_len);
+        //      }
+}
+void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        int replica = ikglp_get_idx(sem, fq);
+        int gpu = replica_to_gpu(aff, replica);
+        tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
+        TRACE_CUR("%s/%d acquired gpu %d.  migration type = %d\n",
+                          t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
+        // count the number or resource holders
+        ++(*(aff->q_info[replica].nr_cur_users));
+        reg_nv_device(gpu, 1, t);  // register
+        tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
+        reset_gpu_tracker(t);
+        start_gpu_tracker(t);
+}
+void gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        int replica = ikglp_get_idx(sem, fq);
+        int gpu = replica_to_gpu(aff, replica);
+        lt_t est_time;
+        stop_gpu_tracker(t);  // stop the tracker before we do anything else.
+        est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+        tsk_rt(t)->last_gpu = gpu;
+        // count the number or resource holders
+        --(*(aff->q_info[replica].nr_cur_users));
+        reg_nv_device(gpu, 0, t);       // unregister
+        // update estimates
+        update_gpu_estimate(t, get_gpu_time(t));
+        TRACE_CUR("%s/%d freed gpu %d.  actual time was %llu.  estimated was %llu.  diff is %d\n",
+                          t->comm, t->pid, gpu,
+                          get_gpu_time(t),
+                          est_time,
+                          (long long)get_gpu_time(t) - (long long)est_time);
+}
+struct ikglp_affinity_ops gpu_ikglp_affinity =
+{
+        .advise_enqueue = gpu_ikglp_advise_enqueue,
+        .advise_steal = gpu_ikglp_advise_steal,
+        .advise_donee_selection = gpu_ikglp_advise_donee_selection,
+        .advise_doner_to_fq = gpu_ikglp_advise_doner_to_fq,
+        .notify_enqueue = gpu_ikglp_notify_enqueue,
+        .notify_dequeue = gpu_ikglp_notify_dequeue,
+        .notify_acquired = gpu_ikglp_notify_acquired,
+        .notify_freed = gpu_ikglp_notify_freed,
+        .replica_to_resource = gpu_replica_to_resource,
+};
+struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
+                                                                                                void* __user args)
+{
+        return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args);
+}
+// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
+struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
+{
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        int min_count;
+        int min_nr_users;
+        struct ikglp_queue_info *shortest;
+        struct fifo_queue *to_enqueue;
+        int i;
+        //      TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
+        shortest = &aff->q_info[0];
+        min_count = shortest->q->count;
+        min_nr_users = *(shortest->nr_cur_users);
+        TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
+                          ikglp_get_idx(sem, shortest->q),
+                          shortest->q->count,
+                          min_nr_users);
+        for(i = 1; i < sem->nr_replicas; ++i) {
+                int len = aff->q_info[i].q->count;
+                // queue is smaller, or they're equal and the other has a smaller number
+                // of total users.
+                //
+                // tie-break on the shortest number of simult users.  this only kicks in
+                // when there are more than 1 empty queues.
+                if((len < min_count) ||
+                   ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
+                        shortest = &aff->q_info[i];
+                        min_count = shortest->q->count;
+                        min_nr_users = *(aff->q_info[i].nr_cur_users);
+                }
+                TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
+                                  ikglp_get_idx(sem, aff->q_info[i].q),
+                                  aff->q_info[i].q->count,
+                                  *(aff->q_info[i].nr_cur_users));
+        }
+        to_enqueue = shortest->q;
+        TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
+                          ikglp_get_idx(sem, to_enqueue),
+                          ikglp_get_idx(sem, sem->shortest_fifo_queue));
+        return to_enqueue;
+}
+ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff)
+{
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        //      TRACE_CUR("Simple GPU ikglp advise_steal invoked\n");
+        return ikglp_find_hp_waiter_to_steal(sem);
+}
+ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff)
+{
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
+        return(donee);
+}
+ikglp_wait_state_t* simple_gpu_ikglp_advise_doner_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
+{
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
+        return(donor);
+}
+void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+        //      TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n");
+}
+void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+        //      TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n");
+}
+void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        int replica = ikglp_get_idx(sem, fq);
+        int gpu = replica_to_gpu(aff, replica);
+        //      TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n");
+        // count the number or resource holders
+        ++(*(aff->q_info[replica].nr_cur_users));
+        reg_nv_device(gpu, 1, t);  // register
+}
+void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+        int replica = ikglp_get_idx(sem, fq);
+        int gpu = replica_to_gpu(aff, replica);
+        //      TRACE_CUR("Simple GPU ikglp notify_freed invoked\n");
+        // count the number or resource holders
+        --(*(aff->q_info[replica].nr_cur_users));
+        reg_nv_device(gpu, 0, t);       // unregister
+}
+struct ikglp_affinity_ops simple_gpu_ikglp_affinity =
+{
+        .advise_enqueue = simple_gpu_ikglp_advise_enqueue,
+        .advise_steal = simple_gpu_ikglp_advise_steal,
+        .advise_donee_selection = simple_gpu_ikglp_advise_donee_selection,
+        .advise_doner_to_fq = simple_gpu_ikglp_advise_doner_to_fq,
+        .notify_enqueue = simple_gpu_ikglp_notify_enqueue,
+        .notify_dequeue = simple_gpu_ikglp_notify_dequeue,
+        .notify_acquired = simple_gpu_ikglp_notify_acquired,
+        .notify_freed = simple_gpu_ikglp_notify_freed,
+        .replica_to_resource = gpu_replica_to_resource,
+};
+struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
+                                                                                                           void* __user args)
+{
+        return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args);
+}
+#endif
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index d0a6bd364c43..0b64977789a6 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -5,13 +5,13 @@
 #include <litmus/sched_plugin.h>
 #include <litmus/fdso.h>
-#include <litmus/kfmlp_lock.h>
 #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
 #include <litmus/gpu_affinity.h>
 #include <litmus/nvidia_info.h>
 #endif
+#include <litmus/kfmlp_lock.h>
 static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
                                                                struct kfmlp_queue* queue)
 {
@@ -508,6 +508,10 @@ struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
        sem->shortest_queue = &sem->queues[0];
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        sem->aff_obs = NULL;
+#endif
        return &sem->litmus_lock;
 }
@@ -584,7 +588,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
        }
        if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
-                TRACE_CUR("System does not support #simult_users >%d.  %d requested.\n",
+                TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
                                  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
                return(NULL);
        }
@@ -628,7 +632,6 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
        // attach observer to the lock
        spin_lock_irqsave(&sem->lock, flags);
        sem->aff_obs = kfmlp_aff;
-        //kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)];
        spin_unlock_irqrestore(&sem->lock, flags);
        return &kfmlp_aff->obs;
@@ -646,20 +649,20 @@ static int gpu_replica_to_resource(struct kfmlp_affinity* aff,
 // Smart KFMLP Affinity
-static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
+//static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
-{
+//{
-        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+//      struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
-        struct kfmlp_queue_info *shortest = &aff->q_info[0];
+//      struct kfmlp_queue_info *shortest = &aff->q_info[0];
-        int i;
+//      int i;
+//
-        for(i = 1; i < sem->num_resources; ++i) {
+//      for(i = 1; i < sem->num_resources; ++i) {
-                if(aff->q_info[i].estimated_len < shortest->estimated_len) {
+//              if(aff->q_info[i].estimated_len < shortest->estimated_len) {
-                        shortest = &aff->q_info[i];
+//                      shortest = &aff->q_info[i];
-                }
+//              }
-        }
+//      }
+//
-        return(shortest);
+//      return(shortest);
-}
+//}
 struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
 {
author	Glenn Elliott <gelliott@cs.unc.edu>	2012-04-18 23:18:32 -0400
committer	Glenn Elliott <gelliott@cs.unc.edu>	2012-04-18 23:18:32 -0400
commit	c6d04216a123f8e0b50eb78bbb1eaf646a1ca4e0 (patch)
tree	c6db90c6fd95a308db4849abebcb09a0dafaedd8
parent	149ef3b424a49e6b928c5e23fea83380ed95ea38 (diff)