From f916cdb8e6a9ee2c917fddb7351e6bb39f6c953e Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Wed, 18 Apr 2012 21:30:36 -0400
Subject: Added support for simult-users in kfmlp

---
 include/litmus/fdso.h        |   9 +-
 include/litmus/ikglp_lock.h  |  61 ++++++++
 include/litmus/kfmlp_lock.h  |  10 +-
 include/litmus/nvidia_info.h |   1 +
 include/litmus/rt_param.h    |   3 +-
 litmus/Kconfig               |  13 ++
 litmus/fdso.c                |   3 +-
 litmus/kfmlp_lock.c          | 337 +++++++++++++++++++++++++------------------
 litmus/nvidia_info.c         |  11 +-
 9 files changed, 297 insertions(+), 151 deletions(-)

diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
index b92c1a3f004f..552a1e731672 100644
--- a/include/litmus/fdso.h
+++ b/include/litmus/fdso.h
@@ -24,11 +24,12 @@ typedef enum  {
 	IKGLP_SEM	= 3,
 	KFMLP_SEM	= 4,
 
-	IKGLP_GPU_AFF_OBS = 5,
-	KFMLP_SIMPLE_GPU_AFF_OBS = 6,
-	KFMLP_GPU_AFF_OBS = 7,
+	IKGLP_SIMPLE_GPU_AFF_OBS = 5,
+	IKGLP_GPU_AFF_OBS = 6,
+	KFMLP_SIMPLE_GPU_AFF_OBS = 7,
+	KFMLP_GPU_AFF_OBS = 8,
 	
-	MAX_OBJ_TYPE	= 7
+	MAX_OBJ_TYPE	= 8
 } obj_type_t;
 
 struct inode_obj_id {
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
index c0cc04db1bc6..2a75a1719815 100644
--- a/include/litmus/ikglp_lock.h
+++ b/include/litmus/ikglp_lock.h
@@ -5,6 +5,12 @@
 #include <litmus/binheap.h>
 #include <litmus/locking.h>
 
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+#include <litmus/kexclu_affinity.h>
+
+struct ikglp_affinity;
+#endif
+
 typedef struct ikglp_heap_node
 {
 	struct task_struct *task;
@@ -81,6 +87,10 @@ struct ikglp_semaphore
 	struct fifo_queue *fifo_queues; // array nr_replicas in length
 	struct binheap_handle priority_queue;	// max-heap, base prio
 	struct binheap_handle donors;	// max-heap, base prio
+	
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	struct ikglp_affinity *aff_obs;
+#endif	
 };
 
 static inline struct ikglp_semaphore* ikglp_from_lock(struct litmus_lock* lock)
@@ -94,4 +104,55 @@ int ikglp_close(struct litmus_lock* l);
 void ikglp_free(struct litmus_lock* l);
 struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg);
 
+
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+
+struct ikglp_queue_info
+{
+	struct fifo_queue* q;
+	lt_t estimated_len;
+	int *nr_cur_users;
+};
+
+struct ikglp_affinity_ops
+{
+	struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t);	// select FIFO
+	struct task_struct* (*advise_steal)(struct ikglp_affinity* aff, wait_queue_t** to_steal, struct fifo_queue** to_steal_from);	// select steal from FIFO
+	struct task_struct* (*advise_donee_selection)(struct ikglp_affinity* aff, wait_queue_t** donee, struct fifo_queue** donee_queue);	// select a donee
+	struct task_struct* (*advise_doner_to_fq)(struct ikglp_affinity* aff, ikglp_wait_state_t** donor);	// select a donor to move to PQ
+	
+	void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);	// fifo enqueue
+	void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);	// fifo dequeue
+	void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);	// replica acquired
+	void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);		// replica freed
+	int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq);		// convert a replica # to a GPU (includes offsets and simult user folding)
+};
+
+struct ikglp_affinity
+{
+	struct affinity_observer obs;
+	struct ikglp_affinity_ops *ops;	
+	struct fifo_queue *q_info;
+	int *nr_cur_users_on_rsrc;
+	int offset;
+	int nr_simult;
+	int nr_rsrc;
+};
+
+static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
+{
+	return container_of(aff_obs, struct ikglp_affinity, obs);
+}
+
+int ikglp_aff_obs_close(struct affinity_observer*);
+void ikglp_aff_obs_free(struct affinity_observer*);
+struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops*,
+												void* __user arg);
+struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
+												void* __user arg);
+#endif
+
+
+
 #endif
diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h
index 614cccad5307..6d7e24b2a3ad 100644
--- a/include/litmus/kfmlp_lock.h
+++ b/include/litmus/kfmlp_lock.h
@@ -6,6 +6,8 @@
 
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 #include <litmus/kexclu_affinity.h>
+
+struct kfmlp_affinity;
 #endif
 
 /* struct for semaphore with priority inheritance */
@@ -50,10 +52,9 @@ struct kfmlp_queue_info
 {
 	struct kfmlp_queue* q;
 	lt_t estimated_len;
+	int *nr_cur_users;
 };
 
-struct kfmlp_affinity;
-
 struct kfmlp_affinity_ops
 {
 	struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t);
@@ -62,6 +63,7 @@ struct kfmlp_affinity_ops
 	void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
 	void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
 	void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
+	int (*replica_to_resource)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq);
 };
 
 struct kfmlp_affinity
@@ -69,8 +71,10 @@ struct kfmlp_affinity
 	struct affinity_observer obs;
 	struct kfmlp_affinity_ops *ops;	
 	struct kfmlp_queue_info *q_info;
-	struct kfmlp_queue_info *shortest_queue;
+	int *nr_cur_users_on_rsrc;
 	int offset;
+	int nr_simult;
+	int nr_rsrc;
 };
 
 static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
index 856c575374d3..580728051d4e 100644
--- a/include/litmus/nvidia_info.h
+++ b/include/litmus/nvidia_info.h
@@ -9,6 +9,7 @@
 
 //#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD
 #define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM
+#define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS
 
 int init_nvidia_info(void);
 
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 11f081527545..e832ffcba17c 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -59,10 +59,11 @@ struct affinity_observer_args
 	int lock_od;
 };
 
-struct kfmlp_gpu_affinity_observer_args
+struct gpu_affinity_observer_args
 {
 	struct affinity_observer_args obs;
 	int replica_to_gpu_offset;
+	int nr_simult_users;
 };
 
 /* The definition of the data that is shared between the kernel and real-time
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 34ce6fb3a22e..a34440f3d8bc 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -325,6 +325,19 @@ config NV_DEVICE_NUM
 	     Should be (<= to the number of CPUs) and
 		 (<= to the number of GPUs) in your system.
 
+config NV_MAX_SIMULT_USERS
+	int "Maximum number of threads sharing a GPU simultanously"
+	depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
+	range 1 3
+	default "2"
+	help
+		Should be equal to the #copy_engines + #execution_engines
+		of the GPUs in your system.
+
+		Scientific/Professional GPUs = 3  (ex. M2070, Quadro 6000?)
+		Consumer Fermi/Kepler GPUs   = 2  (GTX-4xx thru -6xx)
+		Older                        = 1  (ex. GTX-2xx)
+
 choice
 	  prompt "CUDA/Driver Version Support"
 	  default CUDA_4_0
diff --git a/litmus/fdso.c b/litmus/fdso.c
index 5a4f45c3251b..fb328db77dec 100644
--- a/litmus/fdso.c
+++ b/litmus/fdso.c
@@ -28,7 +28,8 @@ static const struct fdso_ops* fdso_ops[] = {
 	&generic_lock_ops, /* RSM_MUTEX */
 	&generic_lock_ops, /* IKGLP_SEM */
 	&generic_lock_ops, /* KFMLP_SEM */
-	&generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */
+	&generic_affinity_ops, /* IKGLP_SIMPLE_GPU_AFF_OBS */
+	&generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */	
 	&generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */
 	&generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */
 };
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index 7cdca1b7b50a..9bbe31a05b97 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -73,10 +73,9 @@ static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* se
 }
 
 
-// TODO: BREAK THIS UP INTO TWO STEPS:
-// 1) task to steal (and from what queue)
-// 2) update queues
-static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
+static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
+												 wait_queue_t** to_steal,
+												 struct kfmlp_queue** to_steal_from)
 {
 	/* must hold sem->lock */	
 	
@@ -189,76 +188,7 @@ static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
 	}
 #endif	
 }
-//// TODO: BREAK THIS UP INTO TWO STEPS:
-//// 1) task to steal (and from what queue)
-//// 2) update queues
-//static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
-//{
-//	/* must hold sem->lock */
-//	
-//	struct kfmlp_queue *my_queue = NULL;
-//	struct task_struct *max_hp = NULL;
-//	
-//	struct list_head	*pos;
-//	struct task_struct 	*queued;
-//	int i;
-//	
-//	for(i = 0; i < sem->num_resources; ++i)
-//	{
-//		if( (sem->queues[i].count > 1) &&
-//		   ((my_queue == NULL) ||
-//			//(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
-//			(litmus->compare(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
-//		{
-//			my_queue = &sem->queues[i];
-//		}
-//	}
-//	
-//	if(my_queue)
-//	{		
-//		max_hp = my_queue->hp_waiter;
-//		
-//		BUG_ON(!max_hp);
-//		
-//		TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
-//				  kfmlp_get_idx(sem, my_queue),
-//				  max_hp->comm, max_hp->pid,
-//				  kfmlp_get_idx(sem, my_queue));
-//		
-//		my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
-//		
-//		if(tsk_rt(my_queue->owner)->inh_task == max_hp)
-//		{
-//			litmus->decrease_prio(my_queue->owner, my_queue->hp_waiter);
-//		}
-//
-//		list_for_each(pos, &my_queue->wait.task_list)
-//		{
-//			queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
-//													   task_list)->private;
-//			/* Compare task prios, find high prio task. */
-//			if (queued == max_hp)
-//			{
-//				/*
-//				 TRACE_CUR("queue %d: found entry in wait queue.  REMOVING!\n",
-//				 kfmlp_get_idx(sem, my_queue));
-//				 */
-//				__remove_wait_queue(&my_queue->wait,
-//									list_entry(pos, wait_queue_t, task_list));
-//				break;
-//			}
-//		}
-//		--(my_queue->count);
-//		
-//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-//		if(sem->aff_obs) {
-//			sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, max_hp);
-//		}		
-//#endif		
-//	}
-//	
-//	return(max_hp);
-//}
+
 
 int kfmlp_lock(struct litmus_lock* l)
 {
@@ -378,6 +308,12 @@ int kfmlp_lock(struct litmus_lock* l)
 		spin_unlock_irqrestore(&sem->lock, flags);
 	}
 	
+	
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue);
+	}
+#endif
 	return kfmlp_get_idx(sem, my_queue);
 }
 
@@ -390,14 +326,14 @@ int kfmlp_unlock(struct litmus_lock* l)
 	unsigned long flags;
 	int err = 0;
 	
-	spin_lock_irqsave(&sem->lock, flags);
-	
 	my_queue = kfmlp_get_queue(sem, t);
 	
-	if (!my_queue || my_queue->owner != t) {
+	if (!my_queue) {
 		err = -EINVAL;
 		goto out;
-	}
+	}	
+	
+	spin_lock_irqsave(&sem->lock, flags);
 	
 	TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue));	
 	
@@ -489,9 +425,9 @@ RETRY:
 		}
 	}
 	
-out:
 	spin_unlock_irqrestore(&sem->lock, flags);
-	
+
+out:	
 	return err;
 }
 
@@ -580,6 +516,25 @@ struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
 
 #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
 
+static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica)
+{	
+	int gpu = replica % aff->nr_rsrc;
+	return gpu;
+}
+
+static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica)
+{	
+	int gpu = __replica_to_gpu(aff, replica) + aff->offset;
+	return gpu;
+}
+
+static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu)
+{
+	int replica = gpu - aff->offset;
+	return replica;
+}
+
+
 int kfmlp_aff_obs_close(struct affinity_observer* obs)
 {
 	return 0;
@@ -588,6 +543,7 @@ int kfmlp_aff_obs_close(struct affinity_observer* obs)
 void kfmlp_aff_obs_free(struct affinity_observer* obs)
 {
 	struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
+	kfree(kfmlp_aff->nr_cur_users_on_rsrc);
 	kfree(kfmlp_aff->q_info);
 	kfree(kfmlp_aff);
 }
@@ -597,37 +553,56 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
 												   void* __user args)
 {
 	struct kfmlp_affinity* kfmlp_aff;
-	struct kfmlp_gpu_affinity_observer_args aff_args;
+	struct gpu_affinity_observer_args aff_args;
 	struct kfmlp_semaphore* sem;
 	int i;
 	unsigned long flags;
 	
-	if(!access_ok(VERIFY_READ, args, sizeof(aff_args)))
-	{
+	if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
 		return(NULL);
 	}
-	if(__copy_from_user(&aff_args, args, sizeof(aff_args)))
-	{
+	if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
 		return(NULL);
 	}
 	
 	sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
 	
-	if(sem->litmus_lock.type != KFMLP_SEM)
-	{
+	if(sem->litmus_lock.type != KFMLP_SEM) {
 		TRACE_CUR("Lock type not supported.  Type = %d\n", sem->litmus_lock.type);
 		return(NULL);
 	}
 	
+	if((aff_args.nr_simult_users <= 0) ||
+	   (sem->num_resources%aff_args.nr_simult_users != 0)) {
+		TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
+				  "(%d) per replica.  #replicas should be evenly divisible "
+				  "by #simult_users.\n",
+				  sem->litmus_lock.ident,
+				  sem->num_resources,
+				  aff_args.nr_simult_users);
+		return(NULL);
+	}
+	
+	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
+		TRACE_CUR("System does not support #simult_users >%d.  %d requested.\n",
+				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
+		return(NULL);
+	}
+	
 	kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
-	if(!kfmlp_aff)
-	{
+	if(!kfmlp_aff) {
 		return(NULL);
 	}
 	
 	kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
-	if(!kfmlp_aff->q_info)
-	{
+	if(!kfmlp_aff->q_info) {
+		kfree(kfmlp_aff);
+		return(NULL);
+	}
+	
+	kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
+	if(!kfmlp_aff->nr_cur_users_on_rsrc) {
+		kfree(kfmlp_aff->q_info);		
 		kfree(kfmlp_aff);
 		return(NULL);
 	}
@@ -636,16 +611,24 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
 	
 	kfmlp_aff->ops = kfmlp_ops;
 	kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
+	kfmlp_aff->nr_simult = aff_args.nr_simult_users;
+	kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
+
+	memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
 	
-	for(i = 0; i < sem->num_resources; ++i)
-	{
+	for(i = 0; i < sem->num_resources; ++i) {
 		kfmlp_aff->q_info[i].q = &sem->queues[i];
 		kfmlp_aff->q_info[i].estimated_len = 0;
+		
+		// multiple q_info's will point to the same resource (aka GPU) if
+		// aff_args.nr_simult_users > 1
+		kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)];
 	}
 	
+	// attach observer to the lock
 	spin_lock_irqsave(&sem->lock, flags);
 	sem->aff_obs = kfmlp_aff;
-	kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)];
+	//kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)];
 	spin_unlock_irqrestore(&sem->lock, flags);
 	
 	return &kfmlp_aff->obs;
@@ -654,6 +637,13 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
 
 
 
+static int gpu_replica_to_resource(struct kfmlp_affinity* aff, 
+								   struct kfmlp_queue* fq) {
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq)));
+}
+
+
 // Smart KFMLP Affinity
 
 static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
@@ -675,55 +665,66 @@ struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct
 {
 	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
 	lt_t min_len;
+	int min_nr_users;
 	struct kfmlp_queue_info *shortest;
 	struct kfmlp_queue *to_enqueue;
 	int i;
+	int affinity_gpu;
 	
 	// simply pick the shortest queue if, we have no affinity, or we have
 	// affinity with the shortest
 	if(unlikely(tsk_rt(t)->last_gpu < 0)) {
-		// we have affinity with the shorest queue.  pick it.
-		shortest = aff->shortest_queue;
-		TRACE_CUR("special case: no affinity\n");
-		goto out;
+		affinity_gpu = aff->offset;  // first gpu
+		TRACE_CUR("no affinity\n");
+	}
+	else {
+		affinity_gpu = tsk_rt(t)->last_gpu;
 	}
 	
 	// all things being equal, let's start with the queue with which we have
 	// affinity.  this helps us maintain affinity even when we don't have
 	// an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
-	shortest = &aff->q_info[tsk_rt(t)->last_gpu - aff->offset];
+	shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
 	
-	if(shortest == aff->shortest_queue) {
-		TRACE_CUR("special case: have affinity with shortest queue\n");
-		goto out;
-	}
+//	if(shortest == aff->shortest_queue) {
+//		TRACE_CUR("special case: have affinity with shortest queue\n");
+//		goto out;
+//	}
 	
 	min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
+	min_nr_users = *(shortest->nr_cur_users);
 	
 	TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
 			  get_gpu_estimate(t, MIG_LOCAL),
 			  kfmlp_get_idx(sem, shortest->q),
 			  min_len);
-	
+
 	for(i = 0; i < sem->num_resources; ++i) {
 		if(&aff->q_info[i] != shortest) {
 			
 			lt_t est_len =
 				aff->q_info[i].estimated_len +
-				get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, i + aff->offset));
-			if(est_len < min_len) {
+				get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
+			
+			// queue is smaller, or they're equal and the other has a smaller number
+			// of total users.
+			//
+			// tie-break on the shortest number of simult users.  this only kicks in
+			// when there are more than 1 empty queues.
+			if((est_len < min_len) ||
+			   ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
 				shortest = &aff->q_info[i];
 				min_len = est_len;
+				min_nr_users = *(aff->q_info[i].nr_cur_users);
 			}
 			
 			TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
-					  get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, i + aff->offset)),
+					  get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
 					  kfmlp_get_idx(sem, aff->q_info[i].q),
 					  est_len);				
 		}
 	}
-	
-out:	
+
 	to_enqueue = shortest->q;
 	TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
 			  kfmlp_get_idx(sem, to_enqueue),
@@ -736,7 +737,7 @@ struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queu
 {
 	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
 	
-	// For now, just steal from the shortest (by number) queue.
+	// For now, just steal highest priority waiter
 	// TODO: Implement affinity-aware stealing.
 	
 	return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
@@ -747,7 +748,7 @@ void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq
 {
 	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
 	int replica = kfmlp_get_idx(sem, fq);
-	int gpu = aff->offset + replica;
+	int gpu = replica_to_gpu(aff, replica);
 	struct kfmlp_queue_info *info = &aff->q_info[replica];
 	lt_t est_time;
 	lt_t est_len_before;
@@ -765,22 +766,22 @@ void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq
 			  est_len_before, est_time,
 			  info->estimated_len);
 	
-	if(aff->shortest_queue == info) {
-		// we may no longer be the shortest
-		aff->shortest_queue = kfmlp_aff_find_shortest(aff);
-		
-		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
-				  kfmlp_get_idx(sem, aff->shortest_queue->q),
-				  aff->shortest_queue->q->count,
-				  aff->shortest_queue->estimated_len);
-	}
+//	if(aff->shortest_queue == info) {
+//		// we may no longer be the shortest
+//		aff->shortest_queue = kfmlp_aff_find_shortest(aff);
+//		
+//		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+//				  kfmlp_get_idx(sem, aff->shortest_queue->q),
+//				  aff->shortest_queue->q->count,
+//				  aff->shortest_queue->estimated_len);
+//	}
 }
 
 void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
 {
 	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
 	int replica = kfmlp_get_idx(sem, fq);
-	int gpu = aff->offset + replica;
+	int gpu = replica_to_gpu(aff, replica);
 	struct kfmlp_queue_info *info = &aff->q_info[replica];
 	lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
 	
@@ -797,28 +798,32 @@ void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq
 			  info->estimated_len);	
 	
 	// check to see if we're the shortest queue now.
-	if((aff->shortest_queue != info) &&
-	   (aff->shortest_queue->estimated_len > info->estimated_len)) {
-		
-		aff->shortest_queue = info;
-		
-		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
-				  kfmlp_get_idx(sem, info->q),
-				  info->q->count,
-				  info->estimated_len);		
-	}
+//	if((aff->shortest_queue != info) &&
+//	   (aff->shortest_queue->estimated_len > info->estimated_len)) {
+//		
+//		aff->shortest_queue = info;
+//		
+//		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+//				  kfmlp_get_idx(sem, info->q),
+//				  info->q->count,
+//				  info->estimated_len);		
+//	}
 }
 
 void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
 {
 	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
-	int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+	int replica = kfmlp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
 	
 	tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
 	
 	TRACE_CUR("%s/%d acquired gpu %d.  migration type = %d\n",
 			  t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
 	
+	// count the number or resource holders
+	++(*(aff->q_info[replica].nr_cur_users));
+	
 	reg_nv_device(gpu, 1, t);  // register
 	
 	tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
@@ -829,7 +834,8 @@ void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* f
 void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
 {
 	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
-	int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+	int replica = kfmlp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
 	lt_t est_time;
 	
 	stop_gpu_tracker(t);  // stop the tracker before we do anything else.
@@ -837,6 +843,10 @@ void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq,
 	est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
 	
 	tsk_rt(t)->last_gpu = gpu;
+	
+	// count the number or resource holders
+	--(*(aff->q_info[replica].nr_cur_users));	
+	
 	reg_nv_device(gpu, 0, t);	// unregister
 	
 	// update estimates
@@ -856,7 +866,8 @@ struct kfmlp_affinity_ops gpu_kfmlp_affinity =
 	.notify_enqueue = gpu_kfmlp_notify_enqueue,
 	.notify_dequeue = gpu_kfmlp_notify_dequeue,
 	.notify_acquired = gpu_kfmlp_notify_acquired,
-	.notify_freed = gpu_kfmlp_notify_freed
+	.notify_freed = gpu_kfmlp_notify_freed,
+	.replica_to_resource = gpu_replica_to_resource,
 };
 
 struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
@@ -877,8 +888,50 @@ struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* op
 struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
 {
 	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int min_count;
+	int min_nr_users;
+	struct kfmlp_queue_info *shortest;
+	struct kfmlp_queue *to_enqueue;
+	int i;
+	
 //	TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
-	return sem->shortest_queue;
+	
+	shortest = &aff->q_info[0];
+	min_count = shortest->q->count;
+	min_nr_users = *(shortest->nr_cur_users);
+
+	TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
+			  kfmlp_get_idx(sem, shortest->q),
+			  shortest->q->count,
+			  min_nr_users);
+	
+	for(i = 1; i < sem->num_resources; ++i) {
+		int len = aff->q_info[i].q->count;
+		
+		// queue is smaller, or they're equal and the other has a smaller number
+		// of total users.
+		//
+		// tie-break on the shortest number of simult users.  this only kicks in
+		// when there are more than 1 empty queues.
+		if((len < min_count) ||
+		   ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
+			shortest = &aff->q_info[i];
+			min_count = shortest->q->count;
+			min_nr_users = *(aff->q_info[i].nr_cur_users);
+		}
+		
+		TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
+				  kfmlp_get_idx(sem, aff->q_info[i].q),
+				  aff->q_info[i].q->count,
+				  *(aff->q_info[i].nr_cur_users));			
+	}
+	
+	to_enqueue = shortest->q;
+	TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
+			  kfmlp_get_idx(sem, to_enqueue),
+			  kfmlp_get_idx(sem, sem->shortest_queue));	
+	
+	return to_enqueue;
 }
 
 struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
@@ -901,19 +954,26 @@ void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_qu
 void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
 {
 	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
-	int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+	int replica = kfmlp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
 	
 //	TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n");
 	
+	// count the number or resource holders
+	++(*(aff->q_info[replica].nr_cur_users));		
+	
 	reg_nv_device(gpu, 1, t);  // register
 }
 
 void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
 {
 	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
-	int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+	int replica = kfmlp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
 	
 //	TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n");
+	// count the number or resource holders
+	--(*(aff->q_info[replica].nr_cur_users));
 	
 	reg_nv_device(gpu, 0, t);	// unregister
 }
@@ -925,7 +985,8 @@ struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
 	.notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
 	.notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
 	.notify_acquired = simple_gpu_kfmlp_notify_acquired,
-	.notify_freed = simple_gpu_kfmlp_notify_freed
+	.notify_freed = simple_gpu_kfmlp_notify_freed,
+	.replica_to_resource = gpu_replica_to_resource,	
 };
 
 struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 287e4a0662d9..fd6398121fbf 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -368,7 +368,7 @@ static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_
 	int i;
 	struct task_struct *found = NULL;
 	for(i = 0; i < reg->nr_owners; ++i) {
-		if(reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
+		if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
 			found = reg->owners[i];
 		}
 	}
@@ -433,8 +433,9 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
 			
 	raw_spin_lock_irqsave(&reg->lock, flags);
 	
-	if(reg->nr_owners < MAX_NR_OWNERS) {
-		for(i = 0; i < MAX_NR_OWNERS; ++i) {
+	if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
+		TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
+		for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
 			if(reg->owners[i] == NULL) {
 				reg->owners[i] = t;
 				
@@ -485,7 +486,9 @@ static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
 
 	raw_spin_lock_irqsave(&reg->lock, flags);
 	
-	for(i = 0; i < reg->nr_owners; ++i) {
+	TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
+	
+	for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
 		if(reg->owners[i] == t) {
 #ifdef CONFIG_LITMUS_SOFTIRQD			
 			flush_pending(klitirqd_th, t);
-- 
cgit v1.2.2