3 files changed, 194 insertions, 264 deletions
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 3fd760799a75..cab0d7f938f9 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -103,8 +103,7 @@ static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
        struct task_struct *queued, *found = NULL;
        list_for_each(pos, &kqueue->wait.task_list) {
-                queued  = (struct task_struct*) list_entry(pos,
+                queued  = (struct task_struct*) list_entry(pos, wait_queue_t, task_list)->private;
-                                                                                        wait_queue_t, task_list)->private;
                /* Compare task prios, find high prio task. */
                if(queued != skip && litmus->compare(queued, found))
@@ -232,22 +231,14 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
                                                                  struct task_struct *t,
                                                                  ikglp_heap_node_t *node)
 {
        node->task = t;
        INIT_BINHEAP_NODE(&node->node);
-        if(sem->top_m_size < sem->m) {
+        if(sem->top_m_size < sem->max_in_fifos) {
                TRACE_CUR("Trivially adding %s/%d to top-m global list.\n",
                                  t->comm, t->pid);
-//              TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
-//              print_global_list(sem->top_m.root, 1);
                binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
                ++(sem->top_m_size);
-//              TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
-//              print_global_list(sem->top_m.root, 1);
        }
        else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
                ikglp_heap_node_t *evicted =
@@ -257,12 +248,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
                                  t->comm, t->pid,
                                  evicted->task->comm, evicted->task->pid);
-//              TRACE_CUR("Not-Top-M Before:\n");
-//              print_global_list(sem->not_top_m.root, 1);
-//              TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
-//              print_global_list(sem->top_m.root, 1);
                binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node);
                INIT_BINHEAP_NODE(&evicted->node);
                binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node);
@@ -279,8 +264,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
        else {
                TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n",
                                  t->comm, t->pid);
-//              TRACE_CUR("Not-Top-M Before:\n");
-//              print_global_list(sem->not_top_m.root, 1);
                binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node);
@@ -303,12 +286,6 @@ static void ikglp_del_global_list(struct ikglp_semaphore *sem,
        if(binheap_is_in_this_heap(&node->node, &sem->top_m)) {
                TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid);
-//              TRACE_CUR("Not-Top-M Before:\n");
-//              print_global_list(sem->not_top_m.root, 1);
-//              TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
-//              print_global_list(sem->top_m.root, 1);
                binheap_delete(&node->node, &sem->top_m);
                if(!binheap_empty(&sem->not_top_m)) {
@@ -337,8 +314,6 @@ static void ikglp_del_global_list(struct ikglp_semaphore *sem,
        }
        else {
                TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid);
-//              TRACE_CUR("Not-Top-M Before:\n");
-//              print_global_list(sem->not_top_m.root, 1);
                binheap_delete(&node->node, &sem->not_top_m);
@@ -355,10 +330,6 @@ static void ikglp_add_donees(struct ikglp_semaphore *sem,
                                                         struct task_struct *t,
                                                         ikglp_donee_heap_node_t* node)
 {
-//      TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid);
-//      TRACE_CUR("donees Before:\n");
-//      print_donees(sem, sem->donees.root, 1);
        node->task = t;
        node->donor_info = NULL;
        node->fq = fq;
@@ -928,7 +899,7 @@ int ikglp_lock(struct litmus_lock* l)
        TRACE_CUR("Requesting a replica from lock %d.\n", l->ident);
-        if(sem->nr_in_fifos < sem->m) {
+        if(sem->nr_in_fifos < sem->max_in_fifos) {
                // enqueue somwhere
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
                fq = (sem->aff_obs) ?
@@ -1272,10 +1243,13 @@ int ikglp_unlock(struct litmus_lock* l)
                donee = t;
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-                if(sem->aff_obs)
+                if(sem->aff_obs) {
                        fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
-                        if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len)
+                        if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) {
-                                fq_of_new_on_fq = fq; /* discard recommendation */
+                                WARN_ON(1);
+                                fq_of_new_on_fq = fq;
+                        }
+                }
                else
                        fq_of_new_on_fq = fq;
 #else
@@ -1308,10 +1282,13 @@ int ikglp_unlock(struct litmus_lock* l)
                binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-                if(sem->aff_obs)
+                if(sem->aff_obs) {
                        fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
-                        if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len)
+                        if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) {
-                                fq_of_new_on_fq = fq; /* discard recommendation */
+                                WARN_ON(1);
+                                fq_of_new_on_fq = fq;
+                        }
+                }
                else
                        fq_of_new_on_fq = fq;
 #else
@@ -1335,10 +1312,13 @@ int ikglp_unlock(struct litmus_lock* l)
                new_on_fq = pq_wait->task;
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-                if(sem->aff_obs)
+                if(sem->aff_obs) {
                        fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
-                        if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len)
+                        if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) {
-                                fq_of_new_on_fq = fq; /* discard recommendation */
+                                WARN_ON(1);
+                                fq_of_new_on_fq = fq;
+                        }
+                }
                else
                        fq_of_new_on_fq = fq;
 #else
@@ -1663,26 +1643,44 @@ void ikglp_free(struct litmus_lock* l)
-struct litmus_lock* ikglp_new(int m,
+struct litmus_lock* ikglp_new(unsigned int m,
                                                          struct litmus_lock_ops* ops,
-                                                          void* __user arg)
+                                                          void* __user uarg)
 {
+        /* TODO: Support trivial token lock, s.t. args.nr_replicas equals some
+         * sentinel value, and implement special-case algorithms. There is currently
+         * a lot of overhead for a trivial token lock since we allocate O(n)-worth
+         * of data; this could be avoided with special-case algorithms. */
        struct ikglp_semaphore* sem;
-        int nr_replicas = 0;
+        struct ikglp_args args;
-        int i;
+        unsigned int i;
        BUG_ON(m <= 0);
-        if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas)))
+        if(!access_ok(VERIFY_READ, uarg, sizeof(args)))
-        {
+                return(NULL);
+        if(__copy_from_user(&args, uarg, sizeof(args)))
+                return(NULL);
+        /* validation */
+        /* there must be at least one resource */
+        if (args.nr_replicas < 1) {
+                printk("Invalid number of replicas.\n");
                return(NULL);
        }
-        if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas)))
+        /* IKGLP_OPTIMAL_FIFO_LEN can only be determined if nr_max_holders
-        {
+         * is IKGLP_M_HOLDERS (number of CPUs) */
+        if (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN &&
+                args.max_in_fifos != IKGLP_M_IN_FIFOS) {
+                printk("Cannot compute optimal FIFO length if max_in_fifos != IKGLP_M_IN_FIFOS\n");
                return(NULL);
        }
-        if(nr_replicas < 1)
+        if ((args.max_in_fifos != IKGLP_UNLIMITED_IN_FIFOS) &&
-        {
+                (args.max_fifo_len != IKGLP_UNLIMITED_FIFO_LEN) &&
+                (args.max_in_fifos > args.nr_replicas*args.max_fifo_len)) {
+                printk("Not enough total FIFO space for specified max requests in FIFOs.\n");
                return(NULL);
        }
@@ -1693,7 +1691,7 @@ struct litmus_lock* ikglp_new(int m,
        }
        memset(sem, 0, sizeof(*sem));
-        sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL);
+        sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*args.nr_replicas, GFP_KERNEL);
        if(!sem->fifo_queues)
        {
                kfree(sem);
@@ -1712,17 +1710,21 @@ struct litmus_lock* ikglp_new(int m,
        raw_spin_lock_init(&sem->real_lock);
-        sem->nr_replicas = nr_replicas;
+        sem->nr_replicas = args.nr_replicas;
-        sem->m = m;
+        sem->max_in_fifos = (args.max_in_fifos == IKGLP_M_IN_FIFOS) ?
-        sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0);
+                m :
+                args.max_in_fifos;
+        sem->max_fifo_len = (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN) ?
+                (sem->max_in_fifos/args.nr_replicas) + ((sem->max_in_fifos%args.nr_replicas) != 0) :
+                args.max_fifo_len;
        sem->nr_in_fifos = 0;
-        TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n",
+        TRACE_CUR("New IKGLP Sem: m = %u, k = %u, max fifo_len = %u\n",
-                  sem->m,
+                  sem->max_in_fifos,
                  sem->nr_replicas,
                  sem->max_fifo_len);
-        for(i = 0; i < nr_replicas; ++i)
+        for(i = 0; i < args.nr_replicas; ++i)
        {
                struct fifo_queue* q = &(sem->fifo_queues[i]);
@@ -1766,33 +1768,13 @@ struct litmus_lock* ikglp_new(int m,
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+/****************************************************************************/
+/*                            AFFINITY HEURISTICS                           */
+/****************************************************************************/
-#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
 static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
 {
        int gpu = replica % aff->nr_rsrc;
@@ -1856,7 +1838,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
        struct ikglp_affinity* ikglp_aff;
        struct gpu_affinity_observer_args aff_args;
        struct ikglp_semaphore* sem;
-        int i;
+        unsigned int i;
        unsigned long flags;
        if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
@@ -1873,23 +1855,17 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
                return(NULL);
        }
-        if((aff_args.nr_simult_users <= 0) ||
+        if((aff_args.rho <= 0) ||
-           (sem->nr_replicas%aff_args.nr_simult_users != 0)) {
+           (sem->nr_replicas%aff_args.rho != 0)) {
-                TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
+                TRACE_CUR("Lock %d does not support #replicas (%u) for #simult_users "
-                                  "(%d) per replica.  #replicas should be evenly divisible "
+                                  "(%u) per replica.  #replicas should be evenly divisible "
                                  "by #simult_users.\n",
                                  sem->litmus_lock.ident,
                                  sem->nr_replicas,
-                                  aff_args.nr_simult_users);
+                                  aff_args.rho);
                return(NULL);
        }
-//      if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
-//              TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
-//                                NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-////            return(NULL);
-//      }
        ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
        if(!ikglp_aff) {
                return(NULL);
@@ -1901,14 +1877,14 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
                return(NULL);
        }
-        ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
+        ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL);
        if(!ikglp_aff->nr_cur_users_on_rsrc) {
                kfree(ikglp_aff->q_info);
                kfree(ikglp_aff);
                return(NULL);
        }
-        ikglp_aff->nr_aff_on_rsrc =  kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
+        ikglp_aff->nr_aff_on_rsrc =  kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL);
        if(!ikglp_aff->nr_aff_on_rsrc) {
                kfree(ikglp_aff->nr_cur_users_on_rsrc);
                kfree(ikglp_aff->q_info);
@@ -1920,7 +1896,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
        ikglp_aff->ops = ikglp_ops;
        ikglp_aff->offset = aff_args.replica_to_gpu_offset;
-        ikglp_aff->nr_simult = aff_args.nr_simult_users;
+        ikglp_aff->nr_simult = aff_args.rho;
        ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
        ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0;
@@ -1930,7 +1906,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
                          ikglp_aff->relax_max_fifo_len);
        memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
-        memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc));
+        memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(unsigned int)*(ikglp_aff->nr_rsrc));
        for(i = 0; i < sem->nr_replicas; ++i) {
                ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
@@ -1950,9 +1926,6 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
        return &ikglp_aff->obs;
 }
 static int gpu_replica_to_resource(struct ikglp_affinity* aff,
                                                                   struct fifo_queue* fq) {
        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
@@ -1960,29 +1933,28 @@ static int gpu_replica_to_resource(struct ikglp_affinity* aff,
 }
-// Smart IKGLP Affinity
-//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff)
+/*--------------------------------------------------------------------------*/
-//{
+/*                      ADVANCED AFFINITY HEURISITICS                       */
-//      struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+/*                                                                          */
-//      struct ikglp_queue_info *shortest = &aff->q_info[0];
+/* These heuristics estimate FIFO length wait times and try to enqueue      */
-//      int i;
+/* tasks into the shortest queues. When two queues are equivlenet, the GPU  */
-//
+/* that maintains affinity is selected. When a task has no affinity, the    */
-//      for(i = 1; i < sem->nr_replicas; ++i) {
+/* heuristic tries to get the GPU with the fewest number of other tasks     */
-//              if(aff->q_info[i].estimated_len < shortest->estimated_len) {
+/* with affinity on that GPU.                                               */
-//                      shortest = &aff->q_info[i];
+/*                                                                          */
-//              }
+/* Heuristics to explore in the future:                                     */
-//      }
+/*   - Utilization                                                          */
-//
+/*   - Longest non-preemptive section                                       */
-//      return(shortest);
+/*   - Criticality                                                          */
-//}
+/*   - Task period                                                          */
+/*--------------------------------------------------------------------------*/
 struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
 {
        // advise_enqueue must be smart as not not break IKGLP rules:
-        //  * No queue can be greater than ceil(m/k) in length.  We may return
+        //  * No queue can be greater than ceil(m/k) in length, unless
-        //    such a queue, but IKGLP will be smart enough as to send requests
+        //    'relax_max_fifo_len' is asserted
-        //    to donors or PQ.
        //  * Cannot let a queue idle if there exist waiting PQ/donors
        //      -- needed to guarantee parallel progress of waiters.
        //
@@ -1993,14 +1965,15 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
        lt_t min_len;
-        int min_nr_users, min_nr_aff_users;
+        unsigned int min_nr_users, min_nr_aff_users;
        struct ikglp_queue_info *shortest, *aff_queue;
        struct fifo_queue *to_enqueue;
-        int i;
+        unsigned int i;
        int affinity_gpu;
-        int max_fifo_len = (aff->relax_max_fifo_len) ?
+        unsigned int max_fifo_len = (aff->relax_max_fifo_len) ?
-                sem->m : sem->max_fifo_len;
+                sem->max_in_fifos : /* allow possibility of all requests on same queue */
+                sem->max_fifo_len; /* constraint FIFO len */
        // if we have no affinity, find the GPU with the least number of users
        // with active affinity
@@ -2037,7 +2010,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
        min_nr_aff_users = *(shortest->nr_aff_users);
-        TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
+        TRACE_CUR("cs is %llu on queue %d (count = %u): est len = %llu\n",
                          get_gpu_estimate(t, MIG_LOCAL),
                          ikglp_get_idx(sem, shortest->q),
                          shortest->q->count,
@@ -2119,8 +2092,6 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
                          ikglp_get_idx(sem, sem->shortest_fifo_queue));
        return to_enqueue;
-        //return(sem->shortest_fifo_queue);
 }
@@ -2334,7 +2305,6 @@ static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff,
        donee = NULL;
        donee_node = NULL;
-        //*dist_from_head = sem->max_fifo_len + 1;
        *dist_from_head = IKGLP_INVAL_DISTANCE;
        TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq));
@@ -2630,7 +2600,6 @@ int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t)
        // decrement affinity count on old GPU
        aff_rsrc = tsk_rt(t)->last_gpu - aff->offset;
        --(aff->nr_aff_on_rsrc[aff_rsrc]);
-//      aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t);
        if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) {
                WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0);
@@ -2676,12 +2645,10 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
                if(last_gpu >= 0) {
                        int old_rsrc = last_gpu - aff->offset;
                        --(aff->nr_aff_on_rsrc[old_rsrc]);
-//                      aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t));
                }
                // increment affinity count on new GPU
                ++(aff->nr_aff_on_rsrc[gpu - aff->offset]);
-//              aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t));
                tsk_rt(t)->rsrc_exit_cb_args = aff;
                tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline;
        }
@@ -2751,20 +2718,18 @@ struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* op
+/*--------------------------------------------------------------------------*/
+/*                 SIMPLE LOAD-BALANCING AFFINITY HEURISTIC                 */
+/*--------------------------------------------------------------------------*/
-// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
 struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
 {
        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
-        int min_count;
+        unsigned int min_count;
-        int min_nr_users;
+        unsigned int min_nr_users;
        struct ikglp_queue_info *shortest;
        struct fifo_queue *to_enqueue;
-        int i;
+        unsigned int i;
        //      TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
@@ -2772,13 +2737,13 @@ struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, s
        min_count = shortest->q->count;
        min_nr_users = *(shortest->nr_cur_users);
-        TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
+        TRACE_CUR("queue %d: waiters = %u, total holders = %u\n",
                          ikglp_get_idx(sem, shortest->q),
                          shortest->q->count,
                          min_nr_users);
        for(i = 1; i < sem->nr_replicas; ++i) {
-                int len = aff->q_info[i].q->count;
+                unsigned int len = aff->q_info[i].q->count;
                // queue is smaller, or they're equal and the other has a smaller number
                // of total users.
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index 041561839976..7dd866185623 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -21,7 +21,7 @@ static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
 static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
                                                                                                  struct task_struct* holder)
 {
-        int i;
+        unsigned int i;
        for(i = 0; i < sem->num_resources; ++i)
                if(sem->queues[i].owner == holder)
                        return(&sem->queues[i]);
@@ -79,7 +79,7 @@ static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
 {
        /* must hold sem->lock */
-        int i;
+        unsigned int i;
        *to_steal = NULL;
        *to_steal_from = NULL;
@@ -438,7 +438,7 @@ int kfmlp_close(struct litmus_lock* l)
        struct kfmlp_queue *my_queue;
        unsigned long flags;
-        int owner;
+        unsigned int owner;
        spin_lock_irqsave(&sem->lock, flags);
@@ -465,8 +465,8 @@ void kfmlp_free(struct litmus_lock* l)
 struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
 {
        struct kfmlp_semaphore* sem;
-        int num_resources = 0;
+        unsigned int num_resources = 0;
-        int i;
+        unsigned int i;
        if(!access_ok(VERIFY_READ, args, sizeof(num_resources)))
        {
@@ -560,7 +560,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
        struct kfmlp_affinity* kfmlp_aff;
        struct gpu_affinity_observer_args aff_args;
        struct kfmlp_semaphore* sem;
-        int i;
+        unsigned int i;
        unsigned long flags;
        if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
@@ -577,14 +577,14 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
                return(NULL);
        }
-        if((aff_args.nr_simult_users <= 0) ||
+        if((aff_args.rho <= 0) ||
-           (sem->num_resources%aff_args.nr_simult_users != 0)) {
+           (sem->num_resources%aff_args.rho != 0)) {
                TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
                                  "(%d) per replica.  #replicas should be evenly divisible "
                                  "by #simult_users.\n",
                                  sem->litmus_lock.ident,
                                  sem->num_resources,
-                                  aff_args.nr_simult_users);
+                                  aff_args.rho);
                return(NULL);
        }
@@ -605,7 +605,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
                return(NULL);
        }
-        kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
+        kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->num_resources / aff_args.rho), GFP_KERNEL);
        if(!kfmlp_aff->nr_cur_users_on_rsrc) {
                kfree(kfmlp_aff->q_info);
                kfree(kfmlp_aff);
@@ -616,10 +616,10 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
        kfmlp_aff->ops = kfmlp_ops;
        kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
-        kfmlp_aff->nr_simult = aff_args.nr_simult_users;
+        kfmlp_aff->nr_simult = aff_args.rho;
        kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
-        memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
+        memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(unsigned int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
        for(i = 0; i < sem->num_resources; ++i) {
                kfmlp_aff->q_info[i].q = &sem->queues[i];
@@ -669,10 +669,10 @@ struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct
 {
        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
        lt_t min_len;
-        int min_nr_users;
+        unsigned int min_nr_users;
        struct kfmlp_queue_info *shortest;
        struct kfmlp_queue *to_enqueue;
-        int i;
+        unsigned int i;
        int affinity_gpu;
        // simply pick the shortest queue if, we have no affinity, or we have
@@ -893,11 +893,11 @@ struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* op
 struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
 {
        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
-        int min_count;
+        unsigned int min_count;
-        int min_nr_users;
+        unsigned int min_nr_users;
        struct kfmlp_queue_info *shortest;
        struct kfmlp_queue *to_enqueue;
-        int i;
+        unsigned int i;
 //      TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
@@ -911,7 +911,7 @@ struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff,
                          min_nr_users);
        for(i = 1; i < sem->num_resources; ++i) {
-                int len = aff->q_info[i].q->count;
+                unsigned int len = aff->q_info[i].q->count;
                // queue is smaller, or they're equal and the other has a smaller number
                // of total users.
diff --git a/litmus/locking.c b/litmus/locking.c
index eddc67a4d36a..8ba46f85f5c6 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -234,12 +234,12 @@ void print_hp_waiters(struct binheap_node* n, int depth)
 #ifdef CONFIG_LITMUS_DGL_SUPPORT
-struct prioq_mutex;
+struct litmus_lock* select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
-void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
 {
-        int start = dgl_wait->last_primary;
+        int num_locks = dgl_wait->size;
-        extern void __dump_prioq_lock_info(struct prioq_mutex *mutex);
+        int last = dgl_wait->last_primary;
+        int start;
+        int idx;
        /*
         We pick the next lock in reverse order. This causes inheritance propagation
@@ -250,55 +250,42 @@ void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lo
        BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock);
        // note reverse order
-        for(dgl_wait->last_primary = (dgl_wait->last_primary != 0) ?  dgl_wait->last_primary - 1 : dgl_wait->size-1;
+        // Try to enable priority on a lock that has an owner.
-                dgl_wait->last_primary != start;
+        idx = start = (last != 0) ? last - 1 : num_locks - 1;
-                dgl_wait->last_primary = (dgl_wait->last_primary != 0) ?  dgl_wait->last_primary - 1 : dgl_wait->size-1)
+        do {
-        {
+                struct litmus_lock *l = dgl_wait->locks[idx];
-                struct litmus_lock *l = dgl_wait->locks[dgl_wait->last_primary];
+                if(!l->ops->is_owner(l, dgl_wait->task) && l->ops->get_owner(l)) {
+                        dgl_wait->last_primary = idx;
-                if(!l->ops->is_owner(l, dgl_wait->task) &&
+                        tsk_rt(dgl_wait->task)->blocked_lock = l;
-                   l->ops->get_owner(l)) {
-                        tsk_rt(dgl_wait->task)->blocked_lock =
-                                        dgl_wait->locks[dgl_wait->last_primary];
                        mb();
                        TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident);
                        l->ops->enable_priority(l, dgl_wait);
+                        return(l);
-                        return;
                }
-        }
+                idx = (idx != 0) ? idx - 1 : num_locks - 1;
+        } while(idx != start);
        // There was no one to push on.  This can happen if the blocked task is
        // behind a task that is idling a prioq-mutex.
        // note reverse order
-        dgl_wait->last_primary = start;
+        idx = (last != 0) ? last - 1 : num_locks - 1;
-        for(dgl_wait->last_primary = (dgl_wait->last_primary != 0) ?  dgl_wait->last_primary - 1 : dgl_wait->size-1;
+        do {
-                dgl_wait->last_primary != start;
+                struct litmus_lock *l = dgl_wait->locks[idx];
-                dgl_wait->last_primary = (dgl_wait->last_primary != 0) ?  dgl_wait->last_primary - 1 : dgl_wait->size-1)
-        {
-                struct litmus_lock *l = dgl_wait->locks[dgl_wait->last_primary];
                if(!l->ops->is_owner(l, dgl_wait->task)) {
+                        dgl_wait->last_primary = idx;
-                        tsk_rt(dgl_wait->task)->blocked_lock =
+                        tsk_rt(dgl_wait->task)->blocked_lock = l;
-                        dgl_wait->locks[dgl_wait->last_primary];
                        mb();
                        TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident);
                        l->ops->enable_priority(l, dgl_wait);
+                        return(l);
-                        return;
                }
-        }
+                idx = (idx != 0) ? idx - 1 : num_locks - 1;
+        } while(idx != start);
-        BUG();
+        return(NULL);
 }
 int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key)
@@ -333,7 +320,12 @@ struct task_struct* __waitqueue_dgl_remove_first(wait_queue_head_t *wq,
        return task;
 }
-void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait)
+void init_dgl_wait_state(dgl_wait_state_t *dgl_wait)
+{
+        memset(dgl_wait, 0, sizeof(dgl_wait_state_t));
+}
+void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t *dgl_wait)
 {
        init_waitqueue_entry(wq_node, dgl_wait->task);
        wq_node->private = dgl_wait;
@@ -403,83 +395,62 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
        TRACE_CUR("Locking DGL with size %d: %s\n", dgl_wait->size, dglstr);
 #endif
-        dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
        BUG_ON(dgl_wait->task != current);
-        raw_spin_lock_irqsave(dgl_lock, irqflags);
        dgl_wait->nr_remaining = dgl_wait->size;
+        dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
+        raw_spin_lock_irqsave(dgl_lock, irqflags);
        // try to acquire each lock.  enqueue (non-blocking) if it is unavailable.
        for(i = 0; i < dgl_wait->size; ++i) {
-                struct litmus_lock *l = dgl_wait->locks[i];
+                struct litmus_lock *tmp = dgl_wait->locks[i];
                // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks.
-                if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) {
+                if(tmp->ops->dgl_lock(tmp, dgl_wait, &dgl_wait->wq_nodes[i])) {
                        --(dgl_wait->nr_remaining);
-                        TRACE_CUR("Acquired lock %d immediatly.\n", l->ident);
+                        TRACE_CUR("Acquired lock %d immediatly.\n", tmp->ident);
                }
        }
        if(dgl_wait->nr_remaining == 0) {
                // acquired entire group immediatly
                TRACE_CUR("Acquired all locks in DGL immediatly!\n");
+                raw_spin_unlock_irqrestore(dgl_lock, irqflags);
        }
        else {
+                struct litmus_lock *first_primary;
                TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n",
                                  dgl_wait->nr_remaining);
-#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+                first_primary = select_next_lock(dgl_wait);
-                // KLUDGE: don't count this suspension as time in the critical gpu
-                // critical section
-                if(tsk_rt(dgl_wait->task)->held_gpus) {
-                        tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1;
-                }
-#endif
-                // note reverse order.  see comments in select_next_lock for reason.
-                for(i = dgl_wait->size - 1; i >= 0; --i) {
-                        struct litmus_lock *l = dgl_wait->locks[i];
-                        if(!l->ops->is_owner(l, dgl_wait->task)) {  // double-check to be thread safe
-                                TRACE_CUR("Activating priority inheritance on lock %d\n",
-                                                  l->ident);
-                                TS_DGL_LOCK_SUSPEND;
-                                l->ops->enable_priority(l, dgl_wait);
-                                dgl_wait->last_primary = i;
-                                TRACE_CUR("Suspending for lock %d\n", l->ident);
+                if (!first_primary) {
+                        BUG();
-                                raw_spin_unlock_irqrestore(dgl_lock, irqflags);  // free dgl_lock before suspending
+//                      TRACE_CUR("We hold all the locks?\n");
+//                      raw_spin_unlock_irqrestore(dgl_lock, irqflags);
+//                      goto all_acquired;
+                }
-                                suspend_for_lock(); // suspend!!!
+                TRACE_CUR("Suspending for lock %d\n", first_primary->ident);
-                                TS_DGL_LOCK_RESUME;
+                TS_DGL_LOCK_SUSPEND;
-                                TRACE_CUR("Woken up from DGL suspension.\n");
+                raw_spin_unlock_irqrestore(dgl_lock, irqflags);  // free dgl_lock before suspending
+                suspend_for_lock();
-                                goto all_acquired;  // we should hold all locks when we wake up.
+                TS_DGL_LOCK_RESUME;
-                        }
-                }
-                TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n");
+                TRACE_CUR("Woken up from DGL suspension.\n");
-                //BUG();
        }
-        raw_spin_unlock_irqrestore(dgl_lock, irqflags);
-all_acquired:
        // FOR SANITY CHECK FOR TESTING
-//      for(i = 0; i < dgl_wait->size; ++i) {
+        for(i = 0; i < dgl_wait->size; ++i) {
-//              struct litmus_lock *l = dgl_wait->locks[i];
+                struct litmus_lock *tmp = dgl_wait->locks[i];
-//              BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
+                BUG_ON(!tmp->ops->is_owner(tmp, dgl_wait->task));
-//      }
+        }
        TRACE_CUR("Acquired entire DGL\n");
@@ -493,7 +464,6 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait)
        int i;
        unsigned long irqflags; //, dummyflags;
        raw_spinlock_t *dgl_lock;
-        struct litmus_lock *l;
        struct task_struct *t = current;
 #ifdef CONFIG_SCHED_DEBUG_TRACE
@@ -511,13 +481,19 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait)
        dgl_wait->nr_remaining = dgl_wait->size;
+        /* enqueue for all locks */
        for(i = 0; i < dgl_wait->size; ++i) {
-                struct litmus_lock *l = dgl_wait->locks[i];
+                /* dgl_lock must only enqueue.  cannot set TASK_UNINTERRUPTIBLE!!
-                // this should be a forced enqueue if atomic DGLs are needed.
+                 * Note the difference in requirements with do_litmus_dgl_lock().
-                l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i]);
+                 */
+                struct litmus_lock *tmp = dgl_wait->locks[i];
+                tmp->ops->dgl_lock(tmp, dgl_wait, &dgl_wait->wq_nodes[i]);
        }
+        /* now try to take all locks */
        if(__attempt_atomic_dgl_acquire(NULL, dgl_wait)) {
+                struct litmus_lock *l;
                /* Failed to acquire all locks at once.
                 * Pick a lock to push on and suspend. */
                TRACE_CUR("Could not atomically acquire all locks.\n");
@@ -526,26 +502,13 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait)
                 * __attempt_atomic_dgl_acquire() may actually succeed. */
                set_task_state(t, TASK_UNINTERRUPTIBLE);
-#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+                l = select_next_lock(dgl_wait);
-                // KLUDGE: don't count this suspension as time in the critical gpu
-                // critical section
-                if(tsk_rt(t)->held_gpus) {
-                        tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
-                }
-#endif
-                // select a lock to push priority on
+                TRACE_CUR("Suspending for lock %d\n", l->ident);
-                dgl_wait->last_primary = 0;  // default
-                select_next_lock(dgl_wait);  // may change value of last_primary
-                l = dgl_wait->locks[dgl_wait->last_primary];
                TS_DGL_LOCK_SUSPEND;
-                TRACE_CUR("Suspending for lock %d\n", l->ident);
                raw_spin_unlock_irqrestore(dgl_lock, irqflags);  // free dgl_lock before suspending
                suspend_for_lock(); // suspend!!!
                TS_DGL_LOCK_RESUME;
@@ -562,8 +525,8 @@ all_acquired:
        // SANITY CHECK FOR TESTING
        for(i = 0; i < dgl_wait->size; ++i) {
-                struct litmus_lock *l = dgl_wait->locks[i];
+                struct litmus_lock *tmp = dgl_wait->locks[i];
-                BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
+                BUG_ON(!tmp->ops->is_owner(tmp, dgl_wait->task));
        }
        TRACE_CUR("Acquired entire DGL\n");
@@ -603,6 +566,8 @@ asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
                err = sys_litmus_lock(dgl_ods[0]);
        }
        else {
+                init_dgl_wait_state(&dgl_wait_state);
                for(i = 0; i < dgl_size; ++i) {
                        struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]);
                        if(entry && is_lock(entry)) {