From e9b88341eb6b9fbe16139796f2f78e1f65793e5a Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Thu, 14 Feb 2013 15:35:52 -0500
Subject: Generalize IKGLP implementation

Generalized the IKGLP implementation to support non-optimal
configurations.  Parameters allow the IKGLP to be configured
as FIFO queues (aka KFMLP), a single priority queue, or
a hybrid (optimal IKGLP).  The maximum number of users within
the FIFO queues is also parameterized, allowing more than 'm'
replica holders to hold replicas concurrently (this breaks
optimality though).

Also fixed a bug in locking.c where DGL prority inheritance
is determined.
---
 litmus/ikglp_lock.c | 245 ++++++++++++++++++++++------------------------------
 litmus/kfmlp_lock.c |  36 ++++----
 litmus/locking.c    | 177 +++++++++++++++----------------------
 3 files changed, 194 insertions(+), 264 deletions(-)

(limited to 'litmus')

diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 3fd760799a75..cab0d7f938f9 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -103,8 +103,7 @@ static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
 	struct task_struct *queued, *found = NULL;
 
 	list_for_each(pos, &kqueue->wait.task_list) {
-		queued  = (struct task_struct*) list_entry(pos,
-											wait_queue_t, task_list)->private;
+		queued  = (struct task_struct*) list_entry(pos, wait_queue_t, task_list)->private;
 
 		/* Compare task prios, find high prio task. */
 		if(queued != skip && litmus->compare(queued, found))
@@ -232,22 +231,14 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
 								  struct task_struct *t,
 								  ikglp_heap_node_t *node)
 {
-
-
 	node->task = t;
 	INIT_BINHEAP_NODE(&node->node);
 
-	if(sem->top_m_size < sem->m) {
+	if(sem->top_m_size < sem->max_in_fifos) {
 		TRACE_CUR("Trivially adding %s/%d to top-m global list.\n",
 				  t->comm, t->pid);
-//		TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
-//		print_global_list(sem->top_m.root, 1);
-
 		binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
 		++(sem->top_m_size);
-
-//		TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
-//		print_global_list(sem->top_m.root, 1);
 	}
 	else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
 		ikglp_heap_node_t *evicted =
@@ -257,12 +248,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
 				  t->comm, t->pid,
 				  evicted->task->comm, evicted->task->pid);
 
-//		TRACE_CUR("Not-Top-M Before:\n");
-//		print_global_list(sem->not_top_m.root, 1);
-//		TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
-//		print_global_list(sem->top_m.root, 1);
-
-
 		binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node);
 		INIT_BINHEAP_NODE(&evicted->node);
 		binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node);
@@ -279,8 +264,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
 	else {
 		TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n",
 				  t->comm, t->pid);
-//		TRACE_CUR("Not-Top-M Before:\n");
-//		print_global_list(sem->not_top_m.root, 1);
 
 		binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node);
 
@@ -303,12 +286,6 @@ static void ikglp_del_global_list(struct ikglp_semaphore *sem,
 	if(binheap_is_in_this_heap(&node->node, &sem->top_m)) {
 		TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid);
 
-//		TRACE_CUR("Not-Top-M Before:\n");
-//		print_global_list(sem->not_top_m.root, 1);
-//		TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
-//		print_global_list(sem->top_m.root, 1);
-
-
 		binheap_delete(&node->node, &sem->top_m);
 
 		if(!binheap_empty(&sem->not_top_m)) {
@@ -337,8 +314,6 @@ static void ikglp_del_global_list(struct ikglp_semaphore *sem,
 	}
 	else {
 		TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid);
-//		TRACE_CUR("Not-Top-M Before:\n");
-//		print_global_list(sem->not_top_m.root, 1);
 
 		binheap_delete(&node->node, &sem->not_top_m);
 
@@ -355,10 +330,6 @@ static void ikglp_add_donees(struct ikglp_semaphore *sem,
 							 struct task_struct *t,
 							 ikglp_donee_heap_node_t* node)
 {
-//	TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid);
-//	TRACE_CUR("donees Before:\n");
-//	print_donees(sem, sem->donees.root, 1);
-
 	node->task = t;
 	node->donor_info = NULL;
 	node->fq = fq;
@@ -928,7 +899,7 @@ int ikglp_lock(struct litmus_lock* l)
 
 	TRACE_CUR("Requesting a replica from lock %d.\n", l->ident);
 
-	if(sem->nr_in_fifos < sem->m) {
+	if(sem->nr_in_fifos < sem->max_in_fifos) {
 		// enqueue somwhere
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 		fq = (sem->aff_obs) ?
@@ -1272,10 +1243,13 @@ int ikglp_unlock(struct litmus_lock* l)
 		donee = t;
 
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-		if(sem->aff_obs)
+		if(sem->aff_obs) {
 			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
-			if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len)
-				fq_of_new_on_fq = fq; /* discard recommendation */
+			if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) {
+				WARN_ON(1);
+				fq_of_new_on_fq = fq;
+			}
+		}
 		else
 			fq_of_new_on_fq = fq;
 #else
@@ -1308,10 +1282,13 @@ int ikglp_unlock(struct litmus_lock* l)
 		binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
 
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-		if(sem->aff_obs)
+		if(sem->aff_obs) {
 			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
-			if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len)
-				fq_of_new_on_fq = fq; /* discard recommendation */
+			if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) {
+				WARN_ON(1);
+				fq_of_new_on_fq = fq;
+			}
+		}
 		else
 			fq_of_new_on_fq = fq;
 #else
@@ -1335,10 +1312,13 @@ int ikglp_unlock(struct litmus_lock* l)
 		new_on_fq = pq_wait->task;
 
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-		if(sem->aff_obs)
+		if(sem->aff_obs) {
 			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
-			if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len)
-				fq_of_new_on_fq = fq; /* discard recommendation */
+			if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) {
+				WARN_ON(1);
+				fq_of_new_on_fq = fq;
+			}
+		}
 		else
 			fq_of_new_on_fq = fq;
 #else
@@ -1663,26 +1643,44 @@ void ikglp_free(struct litmus_lock* l)
 
 
 
-struct litmus_lock* ikglp_new(int m,
+struct litmus_lock* ikglp_new(unsigned int m,
 							  struct litmus_lock_ops* ops,
-							  void* __user arg)
+							  void* __user uarg)
 {
+	/* TODO: Support trivial token lock, s.t. args.nr_replicas equals some
+	 * sentinel value, and implement special-case algorithms. There is currently
+	 * a lot of overhead for a trivial token lock since we allocate O(n)-worth
+	 * of data; this could be avoided with special-case algorithms. */
+
 	struct ikglp_semaphore* sem;
-	int nr_replicas = 0;
-	int i;
+	struct ikglp_args args;
+	unsigned int i;
 
 	BUG_ON(m <= 0);
 
-	if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas)))
-	{
+	if(!access_ok(VERIFY_READ, uarg, sizeof(args)))
+		return(NULL);
+	if(__copy_from_user(&args, uarg, sizeof(args)))
+		return(NULL);
+
+	/* validation */
+
+	/* there must be at least one resource */
+	if (args.nr_replicas < 1) {
+		printk("Invalid number of replicas.\n");
 		return(NULL);
 	}
-	if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas)))
-	{
+	/* IKGLP_OPTIMAL_FIFO_LEN can only be determined if nr_max_holders
+	 * is IKGLP_M_HOLDERS (number of CPUs) */
+	if (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN &&
+		args.max_in_fifos != IKGLP_M_IN_FIFOS) {
+		printk("Cannot compute optimal FIFO length if max_in_fifos != IKGLP_M_IN_FIFOS\n");
 		return(NULL);
 	}
-	if(nr_replicas < 1)
-	{
+	if ((args.max_in_fifos != IKGLP_UNLIMITED_IN_FIFOS) &&
+		(args.max_fifo_len != IKGLP_UNLIMITED_FIFO_LEN) &&
+		(args.max_in_fifos > args.nr_replicas*args.max_fifo_len)) {
+		printk("Not enough total FIFO space for specified max requests in FIFOs.\n");
 		return(NULL);
 	}
 
@@ -1693,7 +1691,7 @@ struct litmus_lock* ikglp_new(int m,
 	}
 	memset(sem, 0, sizeof(*sem));
 
-	sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL);
+	sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*args.nr_replicas, GFP_KERNEL);
 	if(!sem->fifo_queues)
 	{
 		kfree(sem);
@@ -1712,17 +1710,21 @@ struct litmus_lock* ikglp_new(int m,
 
 	raw_spin_lock_init(&sem->real_lock);
 
-	sem->nr_replicas = nr_replicas;
-	sem->m = m;
-	sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0);
+	sem->nr_replicas = args.nr_replicas;
+	sem->max_in_fifos = (args.max_in_fifos == IKGLP_M_IN_FIFOS) ?
+		m :
+		args.max_in_fifos;
+	sem->max_fifo_len = (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN) ?
+		(sem->max_in_fifos/args.nr_replicas) + ((sem->max_in_fifos%args.nr_replicas) != 0) :
+		args.max_fifo_len;
 	sem->nr_in_fifos = 0;
 
-	TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n",
-		  sem->m,
+	TRACE_CUR("New IKGLP Sem: m = %u, k = %u, max fifo_len = %u\n",
+		  sem->max_in_fifos,
 		  sem->nr_replicas,
 		  sem->max_fifo_len);
 
-	for(i = 0; i < nr_replicas; ++i)
+	for(i = 0; i < args.nr_replicas; ++i)
 	{
 		struct fifo_queue* q = &(sem->fifo_queues[i]);
 
@@ -1766,33 +1768,13 @@ struct litmus_lock* ikglp_new(int m,
 
 
 
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
 
+/****************************************************************************/
+/*                            AFFINITY HEURISTICS                           */
+/****************************************************************************/
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
-
 static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
 {
 	int gpu = replica % aff->nr_rsrc;
@@ -1856,7 +1838,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 	struct ikglp_affinity* ikglp_aff;
 	struct gpu_affinity_observer_args aff_args;
 	struct ikglp_semaphore* sem;
-	int i;
+	unsigned int i;
 	unsigned long flags;
 
 	if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
@@ -1873,23 +1855,17 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 		return(NULL);
 	}
 
-	if((aff_args.nr_simult_users <= 0) ||
-	   (sem->nr_replicas%aff_args.nr_simult_users != 0)) {
-		TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
-				  "(%d) per replica.  #replicas should be evenly divisible "
+	if((aff_args.rho <= 0) ||
+	   (sem->nr_replicas%aff_args.rho != 0)) {
+		TRACE_CUR("Lock %d does not support #replicas (%u) for #simult_users "
+				  "(%u) per replica.  #replicas should be evenly divisible "
 				  "by #simult_users.\n",
 				  sem->litmus_lock.ident,
 				  sem->nr_replicas,
-				  aff_args.nr_simult_users);
+				  aff_args.rho);
 		return(NULL);
 	}
 
-//	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
-//		TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
-//				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-////		return(NULL);
-//	}
-
 	ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
 	if(!ikglp_aff) {
 		return(NULL);
@@ -1901,14 +1877,14 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 		return(NULL);
 	}
 
-	ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
+	ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL);
 	if(!ikglp_aff->nr_cur_users_on_rsrc) {
 		kfree(ikglp_aff->q_info);
 		kfree(ikglp_aff);
 		return(NULL);
 	}
 
-	ikglp_aff->nr_aff_on_rsrc =  kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
+	ikglp_aff->nr_aff_on_rsrc =  kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL);
 	if(!ikglp_aff->nr_aff_on_rsrc) {
 		kfree(ikglp_aff->nr_cur_users_on_rsrc);
 		kfree(ikglp_aff->q_info);
@@ -1920,7 +1896,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 
 	ikglp_aff->ops = ikglp_ops;
 	ikglp_aff->offset = aff_args.replica_to_gpu_offset;
-	ikglp_aff->nr_simult = aff_args.nr_simult_users;
+	ikglp_aff->nr_simult = aff_args.rho;
 	ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
 	ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0;
 
@@ -1930,7 +1906,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 			  ikglp_aff->relax_max_fifo_len);
 
 	memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
-	memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc));
+	memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(unsigned int)*(ikglp_aff->nr_rsrc));
 
 	for(i = 0; i < sem->nr_replicas; ++i) {
 		ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
@@ -1950,9 +1926,6 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 	return &ikglp_aff->obs;
 }
 
-
-
-
 static int gpu_replica_to_resource(struct ikglp_affinity* aff,
 								   struct fifo_queue* fq) {
 	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
@@ -1960,29 +1933,28 @@ static int gpu_replica_to_resource(struct ikglp_affinity* aff,
 }
 
 
-// Smart IKGLP Affinity
 
-//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff)
-//{
-//	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
-//	struct ikglp_queue_info *shortest = &aff->q_info[0];
-//	int i;
-//
-//	for(i = 1; i < sem->nr_replicas; ++i) {
-//		if(aff->q_info[i].estimated_len < shortest->estimated_len) {
-//			shortest = &aff->q_info[i];
-//		}
-//	}
-//
-//	return(shortest);
-//}
+/*--------------------------------------------------------------------------*/
+/*                      ADVANCED AFFINITY HEURISITICS                       */
+/*                                                                          */
+/* These heuristics estimate FIFO length wait times and try to enqueue      */
+/* tasks into the shortest queues. When two queues are equivlenet, the GPU  */
+/* that maintains affinity is selected. When a task has no affinity, the    */
+/* heuristic tries to get the GPU with the fewest number of other tasks     */
+/* with affinity on that GPU.                                               */
+/*                                                                          */
+/* Heuristics to explore in the future:                                     */
+/*   - Utilization                                                          */
+/*   - Longest non-preemptive section                                       */
+/*   - Criticality                                                          */
+/*   - Task period                                                          */
+/*--------------------------------------------------------------------------*/
 
 struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
 {
 	// advise_enqueue must be smart as not not break IKGLP rules:
-	//  * No queue can be greater than ceil(m/k) in length.  We may return
-	//    such a queue, but IKGLP will be smart enough as to send requests
-	//    to donors or PQ.
+	//  * No queue can be greater than ceil(m/k) in length, unless
+	//    'relax_max_fifo_len' is asserted
 	//  * Cannot let a queue idle if there exist waiting PQ/donors
 	//      -- needed to guarantee parallel progress of waiters.
 	//
@@ -1993,14 +1965,15 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
 
 	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
 	lt_t min_len;
-	int min_nr_users, min_nr_aff_users;
+	unsigned int min_nr_users, min_nr_aff_users;
 	struct ikglp_queue_info *shortest, *aff_queue;
 	struct fifo_queue *to_enqueue;
-	int i;
+	unsigned int i;
 	int affinity_gpu;
 
-	int max_fifo_len = (aff->relax_max_fifo_len) ?
-		sem->m : sem->max_fifo_len;
+	unsigned int max_fifo_len = (aff->relax_max_fifo_len) ?
+		sem->max_in_fifos : /* allow possibility of all requests on same queue */
+		sem->max_fifo_len; /* constraint FIFO len */
 
 	// if we have no affinity, find the GPU with the least number of users
 	// with active affinity
@@ -2037,7 +2010,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
 	min_nr_aff_users = *(shortest->nr_aff_users);
 
 
-	TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
+	TRACE_CUR("cs is %llu on queue %d (count = %u): est len = %llu\n",
 			  get_gpu_estimate(t, MIG_LOCAL),
 			  ikglp_get_idx(sem, shortest->q),
 			  shortest->q->count,
@@ -2119,8 +2092,6 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
 			  ikglp_get_idx(sem, sem->shortest_fifo_queue));
 
 	return to_enqueue;
-
-	//return(sem->shortest_fifo_queue);
 }
 
 
@@ -2334,7 +2305,6 @@ static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff,
 
 	donee = NULL;
 	donee_node = NULL;
-	//*dist_from_head = sem->max_fifo_len + 1;
 	*dist_from_head = IKGLP_INVAL_DISTANCE;
 
 	TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq));
@@ -2630,7 +2600,6 @@ int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t)
 	// decrement affinity count on old GPU
 	aff_rsrc = tsk_rt(t)->last_gpu - aff->offset;
 	--(aff->nr_aff_on_rsrc[aff_rsrc]);
-//	aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t);
 
 	if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) {
 		WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0);
@@ -2676,12 +2645,10 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
 		if(last_gpu >= 0) {
 			int old_rsrc = last_gpu - aff->offset;
 			--(aff->nr_aff_on_rsrc[old_rsrc]);
-//			aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t));
 		}
 
 		// increment affinity count on new GPU
 		++(aff->nr_aff_on_rsrc[gpu - aff->offset]);
-//		aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t));
 		tsk_rt(t)->rsrc_exit_cb_args = aff;
 		tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline;
 	}
@@ -2751,20 +2718,18 @@ struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* op
 
 
 
-
-
-
-
-// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
+/*--------------------------------------------------------------------------*/
+/*                 SIMPLE LOAD-BALANCING AFFINITY HEURISTIC                 */
+/*--------------------------------------------------------------------------*/
 
 struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
 {
 	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
-	int min_count;
-	int min_nr_users;
+	unsigned int min_count;
+	unsigned int min_nr_users;
 	struct ikglp_queue_info *shortest;
 	struct fifo_queue *to_enqueue;
-	int i;
+	unsigned int i;
 
 	//	TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
 
@@ -2772,13 +2737,13 @@ struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, s
 	min_count = shortest->q->count;
 	min_nr_users = *(shortest->nr_cur_users);
 
-	TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
+	TRACE_CUR("queue %d: waiters = %u, total holders = %u\n",
 			  ikglp_get_idx(sem, shortest->q),
 			  shortest->q->count,
 			  min_nr_users);
 
 	for(i = 1; i < sem->nr_replicas; ++i) {
-		int len = aff->q_info[i].q->count;
+		unsigned int len = aff->q_info[i].q->count;
 
 		// queue is smaller, or they're equal and the other has a smaller number
 		// of total users.
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index 041561839976..7dd866185623 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -21,7 +21,7 @@ static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
 static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
 												  struct task_struct* holder)
 {
-	int i;
+	unsigned int i;
 	for(i = 0; i < sem->num_resources; ++i)
 		if(sem->queues[i].owner == holder)
 			return(&sem->queues[i]);
@@ -79,7 +79,7 @@ static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
 {
 	/* must hold sem->lock */
 
-	int i;
+	unsigned int i;
 
 	*to_steal = NULL;
 	*to_steal_from = NULL;
@@ -438,7 +438,7 @@ int kfmlp_close(struct litmus_lock* l)
 	struct kfmlp_queue *my_queue;
 	unsigned long flags;
 
-	int owner;
+	unsigned int owner;
 
 	spin_lock_irqsave(&sem->lock, flags);
 
@@ -465,8 +465,8 @@ void kfmlp_free(struct litmus_lock* l)
 struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
 {
 	struct kfmlp_semaphore* sem;
-	int num_resources = 0;
-	int i;
+	unsigned int num_resources = 0;
+	unsigned int i;
 
 	if(!access_ok(VERIFY_READ, args, sizeof(num_resources)))
 	{
@@ -560,7 +560,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
 	struct kfmlp_affinity* kfmlp_aff;
 	struct gpu_affinity_observer_args aff_args;
 	struct kfmlp_semaphore* sem;
-	int i;
+	unsigned int i;
 	unsigned long flags;
 
 	if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
@@ -577,14 +577,14 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
 		return(NULL);
 	}
 
-	if((aff_args.nr_simult_users <= 0) ||
-	   (sem->num_resources%aff_args.nr_simult_users != 0)) {
+	if((aff_args.rho <= 0) ||
+	   (sem->num_resources%aff_args.rho != 0)) {
 		TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
 				  "(%d) per replica.  #replicas should be evenly divisible "
 				  "by #simult_users.\n",
 				  sem->litmus_lock.ident,
 				  sem->num_resources,
-				  aff_args.nr_simult_users);
+				  aff_args.rho);
 		return(NULL);
 	}
 
@@ -605,7 +605,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
 		return(NULL);
 	}
 
-	kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
+	kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->num_resources / aff_args.rho), GFP_KERNEL);
 	if(!kfmlp_aff->nr_cur_users_on_rsrc) {
 		kfree(kfmlp_aff->q_info);
 		kfree(kfmlp_aff);
@@ -616,10 +616,10 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
 
 	kfmlp_aff->ops = kfmlp_ops;
 	kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
-	kfmlp_aff->nr_simult = aff_args.nr_simult_users;
+	kfmlp_aff->nr_simult = aff_args.rho;
 	kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
 
-	memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
+	memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(unsigned int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
 
 	for(i = 0; i < sem->num_resources; ++i) {
 		kfmlp_aff->q_info[i].q = &sem->queues[i];
@@ -669,10 +669,10 @@ struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct
 {
 	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
 	lt_t min_len;
-	int min_nr_users;
+	unsigned int min_nr_users;
 	struct kfmlp_queue_info *shortest;
 	struct kfmlp_queue *to_enqueue;
-	int i;
+	unsigned int i;
 	int affinity_gpu;
 
 	// simply pick the shortest queue if, we have no affinity, or we have
@@ -893,11 +893,11 @@ struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* op
 struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
 {
 	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
-	int min_count;
-	int min_nr_users;
+	unsigned int min_count;
+	unsigned int min_nr_users;
 	struct kfmlp_queue_info *shortest;
 	struct kfmlp_queue *to_enqueue;
-	int i;
+	unsigned int i;
 
 //	TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
 
@@ -911,7 +911,7 @@ struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff,
 			  min_nr_users);
 
 	for(i = 1; i < sem->num_resources; ++i) {
-		int len = aff->q_info[i].q->count;
+		unsigned int len = aff->q_info[i].q->count;
 
 		// queue is smaller, or they're equal and the other has a smaller number
 		// of total users.
diff --git a/litmus/locking.c b/litmus/locking.c
index eddc67a4d36a..8ba46f85f5c6 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -234,12 +234,12 @@ void print_hp_waiters(struct binheap_node* n, int depth)
 
 #ifdef CONFIG_LITMUS_DGL_SUPPORT
 
-struct prioq_mutex;
-
-void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
+struct litmus_lock* select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
 {
-	int start = dgl_wait->last_primary;
-	extern void __dump_prioq_lock_info(struct prioq_mutex *mutex);
+	int num_locks = dgl_wait->size;
+	int last = dgl_wait->last_primary;
+	int start;
+	int idx;
 
 	/*
 	 We pick the next lock in reverse order. This causes inheritance propagation
@@ -250,55 +250,42 @@ void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lo
 	BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock);
 
 	// note reverse order
-	for(dgl_wait->last_primary = (dgl_wait->last_primary != 0) ?  dgl_wait->last_primary - 1 : dgl_wait->size-1;
-		dgl_wait->last_primary != start;
-		dgl_wait->last_primary = (dgl_wait->last_primary != 0) ?  dgl_wait->last_primary - 1 : dgl_wait->size-1)
-	{
-
-		struct litmus_lock *l = dgl_wait->locks[dgl_wait->last_primary];
-
-		if(!l->ops->is_owner(l, dgl_wait->task) &&
-		   l->ops->get_owner(l)) {
-
-			tsk_rt(dgl_wait->task)->blocked_lock =
-					dgl_wait->locks[dgl_wait->last_primary];
+	// Try to enable priority on a lock that has an owner.
+	idx = start = (last != 0) ? last - 1 : num_locks - 1;
+	do {
+		struct litmus_lock *l = dgl_wait->locks[idx];
+
+		if(!l->ops->is_owner(l, dgl_wait->task) && l->ops->get_owner(l)) {
+			dgl_wait->last_primary = idx;
+			tsk_rt(dgl_wait->task)->blocked_lock = l;
 			mb();
-
 			TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident);
-
 			l->ops->enable_priority(l, dgl_wait);
-
-			return;
+			return(l);
 		}
-	}
+		idx = (idx != 0) ? idx - 1 : num_locks - 1;
+	} while(idx != start);
 
 	// There was no one to push on.  This can happen if the blocked task is
 	// behind a task that is idling a prioq-mutex.
 
 	// note reverse order
-	dgl_wait->last_primary = start;
-	for(dgl_wait->last_primary = (dgl_wait->last_primary != 0) ?  dgl_wait->last_primary - 1 : dgl_wait->size-1;
-		dgl_wait->last_primary != start;
-		dgl_wait->last_primary = (dgl_wait->last_primary != 0) ?  dgl_wait->last_primary - 1 : dgl_wait->size-1)
-	{
-
-		struct litmus_lock *l = dgl_wait->locks[dgl_wait->last_primary];
+	idx = (last != 0) ? last - 1 : num_locks - 1;
+	do {
+		struct litmus_lock *l = dgl_wait->locks[idx];
 
 		if(!l->ops->is_owner(l, dgl_wait->task)) {
-
-			tsk_rt(dgl_wait->task)->blocked_lock =
-			dgl_wait->locks[dgl_wait->last_primary];
+			dgl_wait->last_primary = idx;
+			tsk_rt(dgl_wait->task)->blocked_lock = l;
 			mb();
-
 			TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident);
-
 			l->ops->enable_priority(l, dgl_wait);
-
-			return;
+			return(l);
 		}
-	}
+		idx = (idx != 0) ? idx - 1 : num_locks - 1;
+	} while(idx != start);
 
-	BUG();
+	return(NULL);
 }
 
 int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key)
@@ -333,7 +320,12 @@ struct task_struct* __waitqueue_dgl_remove_first(wait_queue_head_t *wq,
 	return task;
 }
 
-void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait)
+void init_dgl_wait_state(dgl_wait_state_t *dgl_wait)
+{
+	memset(dgl_wait, 0, sizeof(dgl_wait_state_t));
+}
+
+void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t *dgl_wait)
 {
 	init_waitqueue_entry(wq_node, dgl_wait->task);
 	wq_node->private = dgl_wait;
@@ -403,83 +395,62 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
 	TRACE_CUR("Locking DGL with size %d: %s\n", dgl_wait->size, dglstr);
 #endif
 
-	dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
-
 	BUG_ON(dgl_wait->task != current);
 
-	raw_spin_lock_irqsave(dgl_lock, irqflags);
-
 	dgl_wait->nr_remaining = dgl_wait->size;
 
+	dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
+	raw_spin_lock_irqsave(dgl_lock, irqflags);
+
 	// try to acquire each lock.  enqueue (non-blocking) if it is unavailable.
 	for(i = 0; i < dgl_wait->size; ++i) {
-		struct litmus_lock *l = dgl_wait->locks[i];
+		struct litmus_lock *tmp = dgl_wait->locks[i];
 
 		// dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks.
 
-		if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) {
+		if(tmp->ops->dgl_lock(tmp, dgl_wait, &dgl_wait->wq_nodes[i])) {
 			--(dgl_wait->nr_remaining);
-			TRACE_CUR("Acquired lock %d immediatly.\n", l->ident);
+			TRACE_CUR("Acquired lock %d immediatly.\n", tmp->ident);
 		}
 	}
 
 	if(dgl_wait->nr_remaining == 0) {
 		// acquired entire group immediatly
 		TRACE_CUR("Acquired all locks in DGL immediatly!\n");
+		raw_spin_unlock_irqrestore(dgl_lock, irqflags);
 	}
 	else {
+		struct litmus_lock *first_primary;
 
 		TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n",
 				  dgl_wait->nr_remaining);
 
-#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
-		// KLUDGE: don't count this suspension as time in the critical gpu
-		// critical section
-		if(tsk_rt(dgl_wait->task)->held_gpus) {
-			tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1;
-		}
-#endif
-
-		// note reverse order.  see comments in select_next_lock for reason.
-		for(i = dgl_wait->size - 1; i >= 0; --i) {
-			struct litmus_lock *l = dgl_wait->locks[i];
-			if(!l->ops->is_owner(l, dgl_wait->task)) {  // double-check to be thread safe
-
-				TRACE_CUR("Activating priority inheritance on lock %d\n",
-						  l->ident);
-
-				TS_DGL_LOCK_SUSPEND;
-
-				l->ops->enable_priority(l, dgl_wait);
-				dgl_wait->last_primary = i;
+		first_primary = select_next_lock(dgl_wait);
 
-				TRACE_CUR("Suspending for lock %d\n", l->ident);
-
-				raw_spin_unlock_irqrestore(dgl_lock, irqflags);  // free dgl_lock before suspending
+		if (!first_primary) {
+			BUG();
+//			TRACE_CUR("We hold all the locks?\n");
+//			raw_spin_unlock_irqrestore(dgl_lock, irqflags);
+//			goto all_acquired;
+		}
 
-				suspend_for_lock(); // suspend!!!
+		TRACE_CUR("Suspending for lock %d\n", first_primary->ident);
 
-				TS_DGL_LOCK_RESUME;
+		TS_DGL_LOCK_SUSPEND;
 
-				TRACE_CUR("Woken up from DGL suspension.\n");
+		raw_spin_unlock_irqrestore(dgl_lock, irqflags);  // free dgl_lock before suspending
+		suspend_for_lock();
 
-				goto all_acquired;  // we should hold all locks when we wake up.
-			}
-		}
+		TS_DGL_LOCK_RESUME;
 
-		TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n");
-		//BUG();
+		TRACE_CUR("Woken up from DGL suspension.\n");
 	}
 
-	raw_spin_unlock_irqrestore(dgl_lock, irqflags);
-
-all_acquired:
-
 	// FOR SANITY CHECK FOR TESTING
-//	for(i = 0; i < dgl_wait->size; ++i) {
-//		struct litmus_lock *l = dgl_wait->locks[i];
-//		BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
-//	}
+	for(i = 0; i < dgl_wait->size; ++i) {
+		struct litmus_lock *tmp = dgl_wait->locks[i];
+		BUG_ON(!tmp->ops->is_owner(tmp, dgl_wait->task));
+	}
 
 	TRACE_CUR("Acquired entire DGL\n");
 
@@ -493,7 +464,6 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait)
 	int i;
 	unsigned long irqflags; //, dummyflags;
 	raw_spinlock_t *dgl_lock;
-	struct litmus_lock *l;
 	struct task_struct *t = current;
 
 #ifdef CONFIG_SCHED_DEBUG_TRACE
@@ -511,13 +481,19 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait)
 
 	dgl_wait->nr_remaining = dgl_wait->size;
 
+	/* enqueue for all locks */
 	for(i = 0; i < dgl_wait->size; ++i) {
-		struct litmus_lock *l = dgl_wait->locks[i];
-		// this should be a forced enqueue if atomic DGLs are needed.
-		l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i]);
+		/* dgl_lock must only enqueue.  cannot set TASK_UNINTERRUPTIBLE!!
+		 * Note the difference in requirements with do_litmus_dgl_lock().
+		 */
+		struct litmus_lock *tmp = dgl_wait->locks[i];
+		tmp->ops->dgl_lock(tmp, dgl_wait, &dgl_wait->wq_nodes[i]);
 	}
 
+	/* now try to take all locks */
 	if(__attempt_atomic_dgl_acquire(NULL, dgl_wait)) {
+		struct litmus_lock *l;
+
 		/* Failed to acquire all locks at once.
 		 * Pick a lock to push on and suspend. */
 		TRACE_CUR("Could not atomically acquire all locks.\n");
@@ -526,26 +502,13 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait)
 		 * __attempt_atomic_dgl_acquire() may actually succeed. */
 		set_task_state(t, TASK_UNINTERRUPTIBLE);
 
-#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
-		// KLUDGE: don't count this suspension as time in the critical gpu
-		// critical section
-		if(tsk_rt(t)->held_gpus) {
-			tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
-		}
-#endif
+		l = select_next_lock(dgl_wait);
 
-		// select a lock to push priority on
-		dgl_wait->last_primary = 0;  // default
-		select_next_lock(dgl_wait);  // may change value of last_primary
-
-		l = dgl_wait->locks[dgl_wait->last_primary];
+		TRACE_CUR("Suspending for lock %d\n", l->ident);
 
 		TS_DGL_LOCK_SUSPEND;
 
-		TRACE_CUR("Suspending for lock %d\n", l->ident);
-
 		raw_spin_unlock_irqrestore(dgl_lock, irqflags);  // free dgl_lock before suspending
-
 		suspend_for_lock(); // suspend!!!
 
 		TS_DGL_LOCK_RESUME;
@@ -562,8 +525,8 @@ all_acquired:
 
 	// SANITY CHECK FOR TESTING
 	for(i = 0; i < dgl_wait->size; ++i) {
-		struct litmus_lock *l = dgl_wait->locks[i];
-		BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
+		struct litmus_lock *tmp = dgl_wait->locks[i];
+		BUG_ON(!tmp->ops->is_owner(tmp, dgl_wait->task));
 	}
 
 	TRACE_CUR("Acquired entire DGL\n");
@@ -603,6 +566,8 @@ asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
 		err = sys_litmus_lock(dgl_ods[0]);
 	}
 	else {
+		init_dgl_wait_state(&dgl_wait_state);
+
 		for(i = 0; i < dgl_size; ++i) {
 			struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]);
 			if(entry && is_lock(entry)) {
-- 
cgit v1.2.2