From 8675824ed85d6e83a24e77dabaf3a5c02c91ef6f Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Mon, 16 Apr 2012 20:09:15 -0400
Subject: Implement GPU-affinity-aware kfmlp (untested)

---
 include/litmus/fdso.h         |   7 +-
 include/litmus/fpmath.h       | 134 +++++++++
 include/litmus/gpu_affinity.h |  40 +++
 include/litmus/kfmlp_lock.h   |  56 +++-
 include/litmus/rt_param.h     |  37 +++
 litmus/Makefile               |   2 +-
 litmus/gpu_affinity.c         |  72 +++++
 litmus/kfmlp_lock.c           | 660 ++++++++++++++++++++++++++++++++++++------
 litmus/litmus.c               |   7 +
 litmus/locking.c              |  13 +
 litmus/rsm_lock.c             |  14 +-
 litmus/sched_gsn_edf.c        |  69 ++++-
 12 files changed, 1006 insertions(+), 105 deletions(-)
 create mode 100644 include/litmus/fpmath.h
 create mode 100644 include/litmus/gpu_affinity.h
 create mode 100644 litmus/gpu_affinity.c

diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
index baf28c47e95d..b92c1a3f004f 100644
--- a/include/litmus/fdso.h
+++ b/include/litmus/fdso.h
@@ -24,10 +24,11 @@ typedef enum  {
 	IKGLP_SEM	= 3,
 	KFMLP_SEM	= 4,
 
-	IKGLP_AFF_OBS = 5,
-	KFMLP_AFF_OBS = 6,
+	IKGLP_GPU_AFF_OBS = 5,
+	KFMLP_SIMPLE_GPU_AFF_OBS = 6,
+	KFMLP_GPU_AFF_OBS = 7,
 	
-	MAX_OBJ_TYPE	= 6
+	MAX_OBJ_TYPE	= 7
 } obj_type_t;
 
 struct inode_obj_id {
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
new file mode 100644
index 000000000000..35f81683d6ab
--- /dev/null
+++ b/include/litmus/fpmath.h
@@ -0,0 +1,134 @@
+#ifndef __FP_MATH_H__
+#define __FP_MATH_H__
+
+// Use 64-bit because we want to track things at the nanosecond scale.
+// This can lead to very large numbers.
+typedef int64_t fpbuf_t;
+typedef struct
+{
+	fpbuf_t val;
+} fp_t;
+
+#define FP_SHIFT 10
+#define ROUND_BIT (FP_SHIFT - 1)
+#define ONE FP(1)
+
+#define _fp(x) ((fp_t) {x})
+
+static const fp_t LITMUS_FP_ZERO = {.val = 0};
+static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
+
+static inline fp_t FP(fpbuf_t x)
+{
+	return _fp(((fpbuf_t) x) << FP_SHIFT);
+}
+
+/* divide two integers to obtain a fixed point value  */
+static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
+{
+	return _fp(FP(a).val / (b));
+}
+
+#ifdef __KERNEL__
+
+static inline fpbuf_t _point(fp_t x) 
+{ 	
+	return (x.val % (1 << FP_SHIFT));
+	
+}	
+
+#define fp2str(x) x.val
+/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
+#define _FP_  "%ld/1024"
+
+static inline fpbuf_t _floor(fp_t x)
+{
+	return x.val >> FP_SHIFT;
+}
+
+/* FIXME: negative rounding */
+static inline fpbuf_t _round(fp_t x)
+{
+	return _floor(x) + ((x.val >> ROUND_BIT) & 1);
+}	
+
+/* multiply two fixed point values */
+static inline fp_t _mul(fp_t a, fp_t b)
+{	
+	return _fp((a.val * b.val) >> FP_SHIFT);
+}
+
+static inline fp_t _div(fp_t a, fp_t b)
+{
+	/* try not to overflow */
+	if (unlikely(  a.val > (2l << (BITS_PER_LONG - FP_SHIFT)) ))
+		return _fp((a.val / b.val) << FP_SHIFT);
+	else
+		return _fp((a.val << FP_SHIFT) / b.val);
+}
+
+static inline fp_t _add(fp_t a, fp_t b)
+{
+	return _fp(a.val + b.val);
+}
+
+static inline fp_t _sub(fp_t a, fp_t b)
+{
+	return _fp(a.val - b.val);
+}
+
+static inline fp_t _neg(fp_t x)
+{
+	return _fp(-x.val);
+}
+
+static inline fp_t _abs(fp_t x)
+{
+	return _fp(abs(x.val));
+}
+
+/* works the same as casting float/double to integer */
+static inline fpbuf_t _fp_to_integer(fp_t x)
+{
+	return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
+}
+
+static inline fp_t _integer_to_fp(fpbuf_t x)
+{
+	return _frac(x,1);
+}
+
+static inline int _leq(fp_t a, fp_t b)
+{
+	return a.val <= b.val;
+}
+
+static inline int _geq(fp_t a, fp_t b)
+{
+	return a.val >= b.val;
+}
+
+static inline int _lt(fp_t a, fp_t b)
+{
+	return a.val < b.val;
+}
+
+static inline int _gt(fp_t a, fp_t b)
+{
+	return a.val > b.val;
+}
+
+static inline int _eq(fp_t a, fp_t b)
+{
+	return a.val == b.val;
+}
+
+static inline fp_t _max(fp_t a, fp_t b)
+{
+	if (a.val < b.val)
+		return b;
+	else
+		return a;
+}
+#endif
+#endif
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
new file mode 100644
index 000000000000..c29ff3de997c
--- /dev/null
+++ b/include/litmus/gpu_affinity.h
@@ -0,0 +1,40 @@
+#ifndef LITMUS_GPU_AFFINITY_H
+#define LITMUS_GPU_AFFINITY_H
+
+#include <litmus/rt_param.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/litmus.h>
+
+void update_gpu_estimate(struct task_struct* t, lt_t observed);
+gpu_migration_dist_t gpu_migration_distance(int a, int b);
+
+static inline void reset_gpu_tracker(struct task_struct* t)
+{
+	t->rt_param.accum_gpu_time = 0;
+}
+
+static inline void start_gpu_tracker(struct task_struct* t)
+{
+	t->rt_param.gpu_time_stamp = litmus_clock();
+}
+
+static inline void stop_gpu_tracker(struct task_struct* t)
+{
+	lt_t now = litmus_clock();
+	t->rt_param.accum_gpu_time += (now - t->rt_param.gpu_time_stamp);
+}
+
+static inline lt_t get_gpu_time(struct task_struct* t)
+{
+	return t->rt_param.accum_gpu_time;
+}
+
+static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist)
+{
+	lt_t val = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est);
+	
+	// minimum value is 1.
+	return ((val > 0) ? val : 1);
+}
+
+#endif
\ No newline at end of file
diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h
index 49156a9ba4ea..614cccad5307 100644
--- a/include/litmus/kfmlp_lock.h
+++ b/include/litmus/kfmlp_lock.h
@@ -4,6 +4,10 @@
 #include <litmus/litmus.h>
 #include <litmus/locking.h>
 
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+#include <litmus/kexclu_affinity.h>
+#endif
+
 /* struct for semaphore with priority inheritance */
 struct kfmlp_queue
 {
@@ -23,6 +27,10 @@ struct kfmlp_semaphore
 	
 	struct kfmlp_queue *queues; /* array */
 	struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
+	
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	struct kfmlp_affinity *aff_obs;
+#endif
 };
 
 static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
@@ -36,4 +44,50 @@ int kfmlp_close(struct litmus_lock* l);
 void kfmlp_free(struct litmus_lock* l);
 struct litmus_lock* kfmlp_new(struct litmus_lock_ops*, void* __user arg);
 
-#endif
\ No newline at end of file
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+
+struct kfmlp_queue_info
+{
+	struct kfmlp_queue* q;
+	lt_t estimated_len;
+};
+
+struct kfmlp_affinity;
+
+struct kfmlp_affinity_ops
+{
+	struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t);
+	struct task_struct* (*advise_steal)(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from);
+	void (*notify_enqueue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);	
+	void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
+	void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
+	void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
+};
+
+struct kfmlp_affinity
+{
+	struct affinity_observer obs;
+	struct kfmlp_affinity_ops *ops;	
+	struct kfmlp_queue_info *q_info;
+	struct kfmlp_queue_info *shortest_queue;
+	int offset;
+};
+
+static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
+{
+	return container_of(aff_obs, struct kfmlp_affinity, obs);
+}
+
+int kfmlp_aff_obs_close(struct affinity_observer*);
+void kfmlp_aff_obs_free(struct affinity_observer*);
+struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops*,
+											void* __user arg);
+struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
+												void* __user arg);
+
+
+#endif
+
+#endif
+
+
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index cc638e9c55d1..ad46ab4c64cc 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -5,6 +5,8 @@
 #ifndef _LINUX_RT_PARAM_H_
 #define _LINUX_RT_PARAM_H_
 
+#include <litmus/fpmath.h>
+
 /* Litmus time type. */
 typedef unsigned long long lt_t;
 
@@ -57,6 +59,12 @@ struct affinity_observer_args
 	int lock_od;
 };
 
+struct kfmlp_gpu_affinity_observer_args
+{
+	struct affinity_observer_args obs;
+	int replica_to_gpu_offset;
+};
+
 /* The definition of the data that is shared between the kernel and real-time
  * tasks via a shared page (see litmus/ctrldev.c).
  *
@@ -116,6 +124,21 @@ enum klitirqd_sem_status
 	HELD
 };
 
+typedef enum gpu_migration_dist
+{	
+	MIG_LOCAL = 0,
+	MIG_NEAR = 1,
+	MIG_MED = 2,
+	MIG_FAR = 3,
+	
+	MIG_LAST = MIG_FAR
+} gpu_migration_dist_t;
+
+typedef struct feedback_est{
+	fp_t est;
+	fp_t accum_err;
+} feedback_est_t;
+
 /*	RT task parameters for scheduling extensions
  *	These parameters are inherited during clone and therefore must
  *	be explicitly set up before the task set is launched.
@@ -160,6 +183,20 @@ struct rt_param {
 	/* number of top-half interrupts handled on behalf of current job */
 	atomic_t					nv_int_count;
 	long unsigned int			held_gpus;  // bitmap of held GPUs.
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	fp_t	gpu_fb_param_a;
+	fp_t	gpu_fb_param_b;
+	
+	gpu_migration_dist_t	gpu_migration;
+	int				last_gpu;
+	feedback_est_t	gpu_migration_est[MIG_LAST]; // local, near, med, far
+	
+	lt_t accum_gpu_time;
+	lt_t gpu_time_stamp;
+	
+	unsigned int suspend_gpu_tracker_on_block:1;
+#endif
 #endif
 
 #ifdef CONFIG_LITMUS_LOCKING
diff --git a/litmus/Makefile b/litmus/Makefile
index 1698afb75ec4..080cbf694a41 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -35,4 +35,4 @@ obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
 obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
 obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o
 
-obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o
+obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o gpu_affinity.o
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
new file mode 100644
index 000000000000..43171390bed7
--- /dev/null
+++ b/litmus/gpu_affinity.c
@@ -0,0 +1,72 @@
+
+#ifdef CONFIG_LITMUS_NVIDIA
+
+#include <linux/sched.h>
+#include <litmus/litmus.h>
+#include <litmus/gpu_affinity.h>
+
+static void update_estimate(feedback_est_t* fb, fp_t* a, fp_t* b, lt_t observed)
+{
+	fp_t err, new;
+	fp_t actual = _frac(observed, 1); // observed is in ns, so beware of overflow!
+	
+	err = _sub(actual, fb->est);
+	new = _add(_mul(*a, err),
+			   _mul(*b, fb->accum_err));
+	
+	fb->est = new;
+	fb->accum_err = _add(fb->accum_err, err);
+}
+
+void update_gpu_estimate(struct task_struct *t, lt_t observed)
+{
+	feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
+	
+	TRACE_TASK(t, "GPU est update before (dist = %d): %d.%d\n", 
+			   tsk_rt(t)->gpu_migration,
+			   _fp_to_integer(fb->est),
+			   _point(fb->est));
+	
+	update_estimate(fb,
+					&tsk_rt(t)->gpu_fb_param_a,
+					&tsk_rt(t)->gpu_fb_param_b,
+					observed);
+	
+	TRACE_TASK(t, "GPU est update after (dist = %d): %d.%d\n", 
+			   tsk_rt(t)->gpu_migration,
+			   _fp_to_integer(fb->est),
+			   _point(fb->est));	
+}
+
+gpu_migration_dist_t gpu_migration_distance(int a, int b)
+{
+	// GPUs organized in a binary hierarchy, no more than 2^MIG_LAST GPUs
+	int i;
+	int level;
+	int max_level;
+	
+	if(unlikely(a < 0 || b < 0)) {
+		return MIG_LAST;
+	}
+	
+	if(a == b) {
+		return MIG_LOCAL;
+	}
+	
+	for(i = 1, level = 2, max_level = 1<<MIG_LAST;
+		level <= max_level;
+		++i, level <<= 1) {
+		if(a/level == b/level) {
+			return (gpu_migration_dist_t)(i);
+		}
+	}
+	
+	WARN_ON(1);
+	return MIG_LAST;
+}
+
+
+
+
+#endif
+
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index f7bb17103383..b30e5b589882 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -3,9 +3,14 @@
 
 #include <litmus/trace.h>
 #include <litmus/sched_plugin.h>
+#include <litmus/fdso.h>
+
 #include <litmus/kfmlp_lock.h>
 
-//#include <litmus/edf_common.h>
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#include <litmus/nvidia_info.h>
+#endif
 
 static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
 								struct kfmlp_queue* queue)
@@ -67,74 +72,177 @@ static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* se
 	return(shortest);
 }
 
-static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
+
+// TODO: BREAK THIS UP INTO TWO STEPS:
+// 1) task to steal (and from what queue)
+// 2) update queues
+static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
 {
-	/* must hold sem->lock */
-	
-	struct kfmlp_queue *my_queue = NULL;
-	struct task_struct *max_hp = NULL;
-	
+	/* must hold sem->lock */	
 	
-	struct list_head	*pos;
-	struct task_struct 	*queued;
 	int i;
 	
+	*to_steal = NULL;
+	*to_steal_from = NULL;	
+	
 	for(i = 0; i < sem->num_resources; ++i)
 	{
 		if( (sem->queues[i].count > 1) &&
-		   ((my_queue == NULL) ||
+		   ((*to_steal_from == NULL) ||
 			//(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
-			(litmus->compare(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
+			(litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) )
 		{
-			my_queue = &sem->queues[i];
+			*to_steal_from = &sem->queues[i];
 		}
 	}
 	
-	if(my_queue)
-	{		
-		max_hp = my_queue->hp_waiter;
-		
-		BUG_ON(!max_hp);
-		
-		TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
-				  kfmlp_get_idx(sem, my_queue),
-				  max_hp->comm, max_hp->pid,
-				  kfmlp_get_idx(sem, my_queue));
-		
-		my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
-		
-		if(tsk_rt(my_queue->owner)->inh_task == max_hp)
-		{
-			litmus->decrease_prio(my_queue->owner, my_queue->hp_waiter);
-		}
-
-		list_for_each(pos, &my_queue->wait.task_list)
+	if(*to_steal_from)
+	{
+		struct list_head *pos;
+		list_for_each(pos, &(*to_steal_from)->wait.task_list)
 		{
-			queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
-													   task_list)->private;
+			wait_queue_t *node = list_entry(pos, wait_queue_t, task_list);
+			struct task_struct *queued = (struct task_struct*) node->private;
 			/* Compare task prios, find high prio task. */
-			if (queued == max_hp)
+			if (queued == (*to_steal_from)->hp_waiter)
 			{
-				/*
-				 TRACE_CUR("queue %d: found entry in wait queue.  REMOVING!\n",
-				 kfmlp_get_idx(sem, my_queue));
-				 */
-				__remove_wait_queue(&my_queue->wait,
-									list_entry(pos, wait_queue_t, task_list));
-				break;
+				*to_steal = node;
+				
+				TRACE_CUR("steal: selected %s/%d from queue %d\n",
+						  queued->comm, queued->pid,
+						  kfmlp_get_idx(sem, *to_steal_from));
+				
+				return queued;
 			}
 		}
-		--(my_queue->count);
 	}
 	
-	return(max_hp);
+	return NULL;
+}
+
+static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
+							 struct kfmlp_queue *dst,
+							 wait_queue_t *wait,
+							 struct kfmlp_queue *src)
+{
+	struct task_struct* t = (struct task_struct*) wait->private;
+	
+	__remove_wait_queue(&src->wait, wait);	
+	--(src->count);
+	
+	if(t == src->hp_waiter) {
+		src->hp_waiter = kfmlp_find_hp_waiter(src, NULL);
+		
+		if(src->owner && tsk_rt(src->owner)->inh_task == t) {
+			litmus->decrease_prio(src->owner, src->hp_waiter);
+		}
+	}
+	
+	if(sem->shortest_queue->count > src->count) {
+		sem->shortest_queue = src;
+	}
+	
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t);
+	}
+#endif	
+	
+	init_waitqueue_entry(wait, t);
+	__add_wait_queue_tail_exclusive(&dst->wait, wait);
+	++(dst->count);
+	
+	if(litmus->compare(t, dst->hp_waiter)) {
+		dst->hp_waiter = t;
+		
+		if(dst->owner && litmus->compare(t, dst->owner))
+		{
+			litmus->increase_prio(dst->owner, t);
+		}
+	}
+	
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t);
+	}
+#endif	
 }
+//// TODO: BREAK THIS UP INTO TWO STEPS:
+//// 1) task to steal (and from what queue)
+//// 2) update queues
+//static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
+//{
+//	/* must hold sem->lock */
+//	
+//	struct kfmlp_queue *my_queue = NULL;
+//	struct task_struct *max_hp = NULL;
+//	
+//	struct list_head	*pos;
+//	struct task_struct 	*queued;
+//	int i;
+//	
+//	for(i = 0; i < sem->num_resources; ++i)
+//	{
+//		if( (sem->queues[i].count > 1) &&
+//		   ((my_queue == NULL) ||
+//			//(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
+//			(litmus->compare(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
+//		{
+//			my_queue = &sem->queues[i];
+//		}
+//	}
+//	
+//	if(my_queue)
+//	{		
+//		max_hp = my_queue->hp_waiter;
+//		
+//		BUG_ON(!max_hp);
+//		
+//		TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
+//				  kfmlp_get_idx(sem, my_queue),
+//				  max_hp->comm, max_hp->pid,
+//				  kfmlp_get_idx(sem, my_queue));
+//		
+//		my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
+//		
+//		if(tsk_rt(my_queue->owner)->inh_task == max_hp)
+//		{
+//			litmus->decrease_prio(my_queue->owner, my_queue->hp_waiter);
+//		}
+//
+//		list_for_each(pos, &my_queue->wait.task_list)
+//		{
+//			queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
+//													   task_list)->private;
+//			/* Compare task prios, find high prio task. */
+//			if (queued == max_hp)
+//			{
+//				/*
+//				 TRACE_CUR("queue %d: found entry in wait queue.  REMOVING!\n",
+//				 kfmlp_get_idx(sem, my_queue));
+//				 */
+//				__remove_wait_queue(&my_queue->wait,
+//									list_entry(pos, wait_queue_t, task_list));
+//				break;
+//			}
+//		}
+//		--(my_queue->count);
+//		
+//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+//		if(sem->aff_obs) {
+//			sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, max_hp);
+//		}		
+//#endif		
+//	}
+//	
+//	return(max_hp);
+//}
 
 int kfmlp_lock(struct litmus_lock* l)
 {
 	struct task_struct* t = current;
 	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
-	struct kfmlp_queue* my_queue;
+	struct kfmlp_queue* my_queue = NULL;
 	wait_queue_t wait;
 	unsigned long flags;
 	
@@ -143,7 +251,16 @@ int kfmlp_lock(struct litmus_lock* l)
 	
 	spin_lock_irqsave(&sem->lock, flags);
 	
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING	
+	if(sem->aff_obs) {
+		my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t);
+	}
+	if(!my_queue) {
+		my_queue = sem->shortest_queue;
+	}
+#else
 	my_queue = sem->shortest_queue;
+#endif
 	
 	if (my_queue->owner) {
 		/* resource is not free => must suspend and wait */
@@ -170,7 +287,17 @@ int kfmlp_lock(struct litmus_lock* l)
 		}
 		
 		++(my_queue->count);
+		
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING		
+		if(my_queue == sem->shortest_queue) {
+			sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
+		}
+		if(sem->aff_obs) {
+			sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
+		}
+#else
 		sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
+#endif
 		
 		/* release lock before sleeping */
 		spin_unlock_irqrestore(&sem->lock, flags);
@@ -206,7 +333,18 @@ int kfmlp_lock(struct litmus_lock* l)
 		my_queue->owner = t;
 		
 		++(my_queue->count);
-		sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);		
+		
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		if(my_queue == sem->shortest_queue) {
+			sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
+		}
+		if(sem->aff_obs) {
+			sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
+			sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t);
+		}
+#else
+		sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
+#endif
 		
 		spin_unlock_irqrestore(&sem->lock, flags);
 	}
@@ -219,7 +357,7 @@ int kfmlp_unlock(struct litmus_lock* l)
 {
 	struct task_struct *t = current, *next;
 	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
-	struct kfmlp_queue *my_queue;
+	struct kfmlp_queue *my_queue, *to_steal_from;
 	unsigned long flags;
 	int err = 0;
 	
@@ -227,29 +365,43 @@ int kfmlp_unlock(struct litmus_lock* l)
 	
 	my_queue = kfmlp_get_queue(sem, t);
 	
-	if (!my_queue) {
+	if (!my_queue || my_queue->owner != t) {
 		err = -EINVAL;
 		goto out;
 	}
 	
+	my_queue->owner = NULL;  // clear ownership
+	--(my_queue->count);
+	
+	if(my_queue->count < sem->shortest_queue->count)
+	{
+		sem->shortest_queue = my_queue;
+	}
+	
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t);
+		sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t);
+	}	
+#endif
+	
+	/* we lose the benefit of priority inheritance (if any) */
+	if (tsk_rt(t)->inh_task)
+		litmus->decrease_prio(t, NULL);	
+	
+	
 	/* check if there are jobs waiting for this resource */
+RETRY:
 	next = __waitqueue_remove_first(&my_queue->wait);
 	if (next) {
-		/*
-		 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
-		 kfmlp_get_idx(sem, my_queue),
-		 next->comm, next->pid);
-		 */
 		/* next becomes the resouce holder */
 		my_queue->owner = next;
 		
-		--(my_queue->count);
-		// the '=' of '<=' is a dumb method to attempt to build
-		// affinity until tasks can tell us where they ran last...
-		if(my_queue->count <= sem->shortest_queue->count)
-		{
-			sem->shortest_queue = my_queue;
-		}	
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		if(sem->aff_obs) {
+			sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next);
+		}		
+#endif		
 		
 		TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
 				  kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
@@ -257,10 +409,6 @@ int kfmlp_unlock(struct litmus_lock* l)
 		/* determine new hp_waiter if necessary */
 		if (next == my_queue->hp_waiter) {
 			TRACE_TASK(next, "was highest-prio waiter\n");
-			/* next has the highest priority --- it doesn't need to
-			 * inherit.  However, we need to make sure that the
-			 * next-highest priority in the queue is reflected in
-			 * hp_waiter. */
 			my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
 			if (my_queue->hp_waiter)
 				TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
@@ -278,46 +426,34 @@ int kfmlp_unlock(struct litmus_lock* l)
 	}
 	else
 	{
-		TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
-		
-		next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
+		// TODO: put this stealing logic before we attempt to release
+		// our resource.  (simplifies code and gets rid of ugly goto RETRY.
+		wait_queue_t *wait;
 		
-		/*
-		 if(next)
-		 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
-		 kfmlp_get_idx(sem, my_queue),
-		 next->comm, next->pid);
-		 */
-		
-		my_queue->owner = next;
+		TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		next = (sem->aff_obs) ?
+			sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) :
+			kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
+#else
+		next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
+#endif
 		
-		if(next)
-		{
-			TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
-					  kfmlp_get_idx(sem, my_queue),
-					  next->comm, next->pid);
+		if(next) {
+			kfmlp_steal_node(sem, my_queue, wait, to_steal_from);
 			
-			/* wake up next */
-			wake_up_process(next);			
+			TRACE_CUR("queued %d: stole %s/%d from queue %d\n",
+					  next->comm, next->pid,
+					  kfmlp_get_idx(sem, to_steal_from));
+			
+			goto RETRY;  // will succeed this time.
 		}
-		else
-		{
+		else {		
 			TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
-			
-			--(my_queue->count);
-			// the '=' of '<=' is a dumb method to attempt to build
-			// affinity until tasks can tell us where they ran last...
-			if(my_queue->count <= sem->shortest_queue->count)
-			{
-				sem->shortest_queue = my_queue;
-			}
 		}
 	}
 	
-	/* we lose the benefit of priority inheritance (if any) */
-	if (tsk_rt(t)->inh_task)
-		litmus->decrease_prio(t, NULL);
-	
 out:
 	spin_unlock_irqrestore(&sem->lock, flags);
 	
@@ -403,3 +539,337 @@ struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
 
 	return &sem->litmus_lock;
 }
+
+
+
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+
+int kfmlp_aff_obs_close(struct affinity_observer* obs)
+{
+	return 0;
+}
+
+void kfmlp_aff_obs_free(struct affinity_observer* obs)
+{
+	struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
+	kfree(kfmlp_aff->q_info);
+	kfree(kfmlp_aff);
+}
+
+static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops,
+												   struct kfmlp_affinity_ops* kfmlp_ops,
+												   void* __user args)
+{
+	struct kfmlp_affinity* kfmlp_aff;
+	struct kfmlp_gpu_affinity_observer_args aff_args;
+	struct kfmlp_semaphore* sem;
+	int i;
+	unsigned long flags;
+	
+	if(!access_ok(VERIFY_READ, args, sizeof(aff_args)))
+	{
+		return(NULL);
+	}
+	if(__copy_from_user(&aff_args, args, sizeof(aff_args)))
+	{
+		return(NULL);
+	}
+	
+	sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
+	
+	if(sem->litmus_lock.type != KFMLP_SEM)
+	{
+		TRACE_CUR("Lock type not supported.  Type = %d\n", sem->litmus_lock.type);
+		return(NULL);
+	}
+	
+	kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
+	if(!kfmlp_aff)
+	{
+		return(NULL);
+	}
+	
+	kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
+	if(!kfmlp_aff->q_info)
+	{
+		kfree(kfmlp_aff);
+		return(NULL);
+	}
+	
+	kfmlp_aff->obs.ops = ops;
+	kfmlp_aff->ops = kfmlp_ops;
+	kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
+	
+	for(i = 0; i < sem->num_resources; ++i)
+	{
+		kfmlp_aff->q_info[i].q = &sem->queues[i];
+		kfmlp_aff->q_info[i].estimated_len = 0;
+	}
+	
+	spin_lock_irqsave(&sem->lock, flags);
+	sem->aff_obs = kfmlp_aff;
+	kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)];
+	spin_unlock_irqrestore(&sem->lock, flags);
+	
+	return &kfmlp_aff->obs;
+}
+
+
+
+
+// Smart KFMLP Affinity
+
+static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	struct kfmlp_queue_info *shortest = &aff->q_info[0];
+	int i;
+	
+	for(i = 1; i < sem->num_resources; ++i) {
+		if(aff->q_info[i].estimated_len < shortest->estimated_len) {
+			shortest = &aff->q_info[i];
+		}
+	}
+	
+	return(shortest);
+}
+
+struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	lt_t min_len;
+	struct kfmlp_queue_info *shortest;
+	struct kfmlp_queue *to_enqueue;
+	int i;
+	
+	// simply pick the shortest queue if, we have no affinity, or we have
+	// affinity with the shortest
+	if((tsk_rt(t)->last_gpu < 0) ||
+	   ((kfmlp_get_idx(sem, aff->shortest_queue->q) + aff->offset) == tsk_rt(t)->last_gpu)) {
+		// we have affinity with the shorest queue.  pick it.
+		to_enqueue = aff->shortest_queue->q;
+		
+		TRACE_CUR("special case: no affinity or have affinity with shortest\n");
+		
+		goto out;
+	}
+	
+	// enqueue where we will have the shortest time to completion
+	
+	shortest = &aff->q_info[0];
+	min_len = shortest->estimated_len + get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, 0 + aff->offset));
+	
+	for(i = 1; i < sem->num_resources; ++i) {
+		lt_t est_len =
+			aff->q_info[i].estimated_len +
+			get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, i + aff->offset));
+							 
+		if(est_len < min_len) {
+			shortest = &aff->q_info[i];
+			min_len = est_len;
+		}
+	}
+	to_enqueue = shortest->q;
+	
+out:
+	TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
+			  kfmlp_get_idx(sem, to_enqueue),
+			  kfmlp_get_idx(sem, sem->shortest_queue));	
+	
+	return to_enqueue;
+}
+
+struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	
+	// For now, just steal from the shortest (by number) queue.
+	// TODO: Implement affinity-aware stealing.
+	
+	return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
+}
+
+
+void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int replica = kfmlp_get_idx(sem, fq);
+	int gpu = aff->offset + replica;
+	struct kfmlp_queue_info *info = &aff->q_info[replica];
+	lt_t est_time;
+	
+	if(current == t) {
+		tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
+	}
+
+	est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+	info->estimated_len += est_time;
+	
+	TRACE_CUR("fq %d est len is now %llu\n",
+			  kfmlp_get_idx(sem, aff->shortest_queue->q),
+			  aff->shortest_queue->estimated_len);
+	
+	if(aff->shortest_queue == info) {
+		// we may no longer be the shortest
+		aff->shortest_queue = kfmlp_aff_find_shortest(aff);
+		
+		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+				  kfmlp_get_idx(sem, aff->shortest_queue->q),
+				  aff->shortest_queue->q->count,
+				  aff->shortest_queue->estimated_len);
+	}
+}
+
+void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int replica = kfmlp_get_idx(sem, fq);
+	int gpu = aff->offset + replica;
+	struct kfmlp_queue_info *info = &aff->q_info[replica];
+	lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+	
+	if(est_time > info->estimated_len) {
+		WARN_ON(1);
+		info->estimated_len = 0;
+	}
+	else {
+		info->estimated_len -= est_time;
+	}
+	
+	TRACE_CUR("fq %d est len is now %llu\n",
+			  kfmlp_get_idx(sem, info->q),
+			  info->estimated_len);	
+	
+	// check to see if we're the shortest queue now.
+	if((aff->shortest_queue != info) &&
+	   (aff->shortest_queue->estimated_len > info->estimated_len)) {
+		
+		aff->shortest_queue = info;
+		
+		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+				  kfmlp_get_idx(sem, info->q),
+				  info->q->count,
+				  info->estimated_len);		
+	}
+}
+
+void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+	
+	tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
+	
+	TRACE_CUR("%s/%d acquired gpu %d.  migration type = %d\n",
+			  t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
+	
+	reg_nv_device(gpu, 1);  // register
+	
+	tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
+	reset_gpu_tracker(t);
+	start_gpu_tracker(t);
+}
+
+void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+	lt_t est_time;
+	
+	stop_gpu_tracker(t);  // stop the tracker before we do anything else.
+	
+	est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+	
+	tsk_rt(t)->last_gpu = gpu;
+	reg_nv_device(gpu, 0);	// unregister
+	
+	// update estimates
+	update_gpu_estimate(t, get_gpu_time(t));
+	
+	TRACE_CUR("%s/%d freed gpu %d.  actual time was %llu.  estimated was %llu.  diff is %d\n",
+			  t->comm, t->pid, gpu,
+			  get_gpu_time(t),
+			  est_time,
+			  (long long)get_gpu_time(t) - (long long)est_time);
+}
+
+struct kfmlp_affinity_ops gpu_kfmlp_affinity =
+{
+	.advise_enqueue = gpu_kfmlp_advise_enqueue,
+	.advise_steal = gpu_kfmlp_advise_steal,
+	.notify_enqueue = gpu_kfmlp_notify_enqueue,
+	.notify_dequeue = gpu_kfmlp_notify_dequeue,
+	.notify_acquired = gpu_kfmlp_notify_acquired,
+	.notify_freed = gpu_kfmlp_notify_freed
+};
+
+struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
+											void* __user args)
+{
+	return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args);
+}
+
+
+
+
+
+
+
+
+// Simple KFMLP Affinity (standard KFMLP with auto-gpu registration)
+
+struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	return sem->shortest_queue;
+}
+
+struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
+}
+
+void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+}
+
+void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+}
+
+void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+	
+	reg_nv_device(gpu, 1);  // register
+}
+
+void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+	
+	reg_nv_device(gpu, 0);	// unregister
+}
+
+struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
+{
+	.advise_enqueue = simple_gpu_kfmlp_advise_enqueue,
+	.advise_steal = simple_gpu_kfmlp_advise_steal,
+	.notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
+	.notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
+	.notify_acquired = simple_gpu_kfmlp_notify_acquired,
+	.notify_freed = simple_gpu_kfmlp_notify_freed
+};
+
+struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
+												void* __user args)
+{
+	return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args);
+}
+
+#endif
+
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 2f9079421ec7..dd8b72e1af08 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -387,6 +387,13 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 		p->rt_param.ctrl_page   = ctrl_page;
 	}
 	
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
+	p->rt_param.gpu_fb_param_a = _frac(14008, 1000);
+	p->rt_param.gpu_fb_param_b = _frac(16024, 1000);
+	p->rt_param.gpu_migration = MIG_LAST;
+	p->rt_param.last_gpu = -1;
+#endif
+	
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order);
 	raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock);
diff --git a/litmus/locking.c b/litmus/locking.c
index 6d28efe97c91..ef13062913ce 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -10,6 +10,10 @@
 #include <linux/uaccess.h>
 #endif
 
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#endif
+
 static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
 static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
 static int close_generic_lock(struct od_table_entry* entry);
@@ -50,6 +54,7 @@ static  int create_generic_lock(void** obj_ref, obj_type_t type, void* __user ar
 		INIT_BINHEAP_NODE(&lock->nest.hp_binheap_node);
 		WARN_ON(!(lock->nest.hp_waiter_ptr));
 #endif
+		lock->type = type;		
 		lock->ident = atomic_inc_return(&lock_id_gen);
 		*obj_ref = lock;
     }
@@ -292,6 +297,14 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
 
 		TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n",
 				  dgl_wait->nr_remaining);
+		
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+		// KLUDGE: don't count this suspension as time in the critical gpu
+		// critical section
+		if(tsk_rt(dgl_wait->task)->held_gpus) {
+			tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1;
+		}
+#endif		
 
 		// note reverse order.  see comments in select_next_lock for reason.
 		for(i = dgl_wait->size - 1; i >= 0; --i) {
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
index aaca93c1e5d1..0a851cd430a7 100644
--- a/litmus/rsm_lock.c
+++ b/litmus/rsm_lock.c
@@ -7,6 +7,10 @@
 
 //#include <litmus/edf_common.h>
 
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#endif
+
 
 /* caller is responsible for locking */
 static struct task_struct* rsm_mutex_find_hp_waiter(struct rsm_mutex *mutex,
@@ -202,7 +206,15 @@ int rsm_mutex_lock(struct litmus_lock* l)
 
 	if (mutex->owner) {
 		TRACE_TASK(t, "Blocking on lock %d.\n", l->ident);
-
+		
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+		// KLUDGE: don't count this suspension as time in the critical gpu
+		// critical section
+		if(tsk_rt(t)->held_gpus) {
+			tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
+		}
+#endif
+		
 		/* resource is not free => must suspend and wait */
 
 		owner = mutex->owner;
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 1440372227c6..b4ab2361e37a 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -61,6 +61,9 @@
 #include <litmus/nvidia_info.h>
 #endif
 
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#endif
 
 /* Overview of GSN-EDF operations.
  *
@@ -813,6 +816,14 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 	 */
 	if (blocks)
 		unlink(entry->scheduled);
+	
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)	
+	if(tsk_rt(entry->scheduled)->held_gpus) {
+		if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
+			stop_gpu_tracker(entry->scheduled);
+		}
+	}
+#endif
 
 	/* Request a sys_exit_np() call if we would like to preempt but cannot.
 	 * We need to make sure to update the link structure anyway in case
@@ -862,7 +873,7 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 		if (exists)
 			next = prev;
 	}
-
+	
 	sched_state_task_picked();
 
 	raw_spin_unlock(&gsnedf_lock);
@@ -1429,9 +1440,6 @@ static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg)
 	return kfmlp_new(&gsnedf_kfmlp_lock_ops, arg);
 }
 
-
-
-
 /* ******************** FMLP support ********************** */
 
 /* struct for semaphore with priority inheritance */
@@ -1676,7 +1684,57 @@ UNSUPPORTED_LOCK:
 	return err;
 }
 
+#endif  // CONFIG_LITMUS_LOCKING
+
+
+
+
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+static struct affinity_observer_ops gsnedf_kfmlp_affinity_ops = {
+	.close = kfmlp_aff_obs_close,
+	.deallocate = kfmlp_aff_obs_free,
+};
+
+static long gsnedf_allocate_affinity_observer(
+								struct affinity_observer **aff_obs,
+								int type,
+								void* __user args)
+{
+	int err;
+	
+	/* GSN-EDF currently only supports the FMLP for global resources. */
+	switch (type) {
+			
+		case KFMLP_SIMPLE_GPU_AFF_OBS:
+			*aff_obs = kfmlp_simple_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
+			break;
+		case KFMLP_GPU_AFF_OBS:
+			*aff_obs = kfmlp_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
+			break;			
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+//		case IKGLP_GPU_AFF_OBS:
+//			*aff_obs = gsnedf_new_ikglp_aff(arg);
+//			break;
 #endif
+		default:
+			err = -ENXIO;
+			goto UNSUPPORTED_AFF_OBS;
+	};
+	
+	if (*aff_obs)
+		err = 0;
+	else
+		err = -ENOMEM;
+	
+UNSUPPORTED_AFF_OBS:
+	return err;
+}
+#endif
+
+
+
+
 
 static long gsnedf_activate_plugin(void)
 {
@@ -1746,6 +1804,9 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
 #ifdef CONFIG_LITMUS_DGL_SUPPORT
 	.get_dgl_spinlock = gsnedf_get_dgl_spinlock,
 #endif
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	.allocate_aff_obs = gsnedf_allocate_affinity_observer,
+#endif	
 #ifdef CONFIG_LITMUS_SOFTIRQD
 	.increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
 	.decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
-- 
cgit v1.2.2