From a463f9a9e04385f0729f7435a0a6dff7d89b25de Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Sat, 26 May 2012 17:29:58 -0400
Subject: GPUSync patch for Litmus 2012.1.

---
 arch/x86/kernel/irq.c                 |    4 +
 arch/x86/kernel/syscall_table_32.S    |    1 +
 include/linux/completion.h            |    1 +
 include/linux/interrupt.h             |   10 +-
 include/linux/mutex.h                 |   10 +
 include/linux/semaphore.h             |    9 +
 include/linux/workqueue.h             |   18 +
 include/litmus/binheap.h              |  207 +++
 include/litmus/edf_common.h           |   12 +
 include/litmus/fdso.h                 |   14 +-
 include/litmus/fpmath.h               |  145 ++
 include/litmus/gpu_affinity.h         |   49 +
 include/litmus/ikglp_lock.h           |  160 ++
 include/litmus/kexclu_affinity.h      |   35 +
 include/litmus/kfmlp_lock.h           |   97 ++
 include/litmus/litmus.h               |    9 +-
 include/litmus/litmus_softirq.h       |  199 +++
 include/litmus/locking.h              |  142 +-
 include/litmus/nvidia_info.h          |   46 +
 include/litmus/preempt.h              |    2 +-
 include/litmus/rsm_lock.h             |   54 +
 include/litmus/rt_param.h             |  100 +-
 include/litmus/sched_plugin.h         |   76 +-
 include/litmus/sched_trace.h          |  218 ++-
 include/litmus/sched_trace_external.h |   78 +
 include/litmus/trace.h                |   34 +-
 include/litmus/unistd_32.h            |    5 +-
 include/litmus/unistd_64.h            |    9 +-
 kernel/lockdep.c                      |    7 +-
 kernel/mutex.c                        |  125 ++
 kernel/sched.c                        |   27 +
 kernel/semaphore.c                    |   13 +-
 kernel/softirq.c                      |  322 +++-
 kernel/workqueue.c                    |   71 +-
 litmus/Kconfig                        |  148 +-
 litmus/Makefile                       |   11 +-
 litmus/affinity.c                     |    2 +-
 litmus/binheap.c                      |  443 +++++
 litmus/edf_common.c                   |  147 +-
 litmus/fdso.c                         |   13 +
 litmus/gpu_affinity.c                 |  113 ++
 litmus/ikglp_lock.c                   | 2838 +++++++++++++++++++++++++++++++++
 litmus/jobs.c                         |   17 +-
 litmus/kexclu_affinity.c              |   92 ++
 litmus/kfmlp_lock.c                   | 1002 ++++++++++++
 litmus/litmus.c                       |  126 +-
 litmus/litmus_pai_softirq.c           |   64 +
 litmus/litmus_proc.c                  |   17 +
 litmus/litmus_softirq.c               | 1582 ++++++++++++++++++
 litmus/locking.c                      |  393 ++++-
 litmus/nvidia_info.c                  |  597 +++++++
 litmus/preempt.c                      |    5 +
 litmus/rsm_lock.c                     |  796 +++++++++
 litmus/sched_cedf.c                   | 1062 +++++++++++-
 litmus/sched_gsn_edf.c                | 1032 ++++++++++--
 litmus/sched_litmus.c                 |    2 +
 litmus/sched_plugin.c                 |  135 +-
 litmus/sched_task_trace.c             |  282 +++-
 litmus/sched_trace_external.c         |   64 +
 59 files changed, 13012 insertions(+), 280 deletions(-)
 create mode 100644 include/litmus/binheap.h
 create mode 100644 include/litmus/fpmath.h
 create mode 100644 include/litmus/gpu_affinity.h
 create mode 100644 include/litmus/ikglp_lock.h
 create mode 100644 include/litmus/kexclu_affinity.h
 create mode 100644 include/litmus/kfmlp_lock.h
 create mode 100644 include/litmus/litmus_softirq.h
 create mode 100644 include/litmus/nvidia_info.h
 create mode 100644 include/litmus/rsm_lock.h
 create mode 100644 include/litmus/sched_trace_external.h
 create mode 100644 litmus/binheap.c
 create mode 100644 litmus/gpu_affinity.c
 create mode 100644 litmus/ikglp_lock.c
 create mode 100644 litmus/kexclu_affinity.c
 create mode 100644 litmus/kfmlp_lock.c
 create mode 100644 litmus/litmus_pai_softirq.c
 create mode 100644 litmus/litmus_softirq.c
 create mode 100644 litmus/nvidia_info.c
 create mode 100644 litmus/rsm_lock.c
 create mode 100644 litmus/sched_trace_external.c

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 6c0802eb2f7f..680a5cb4b585 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -10,6 +10,10 @@
 #include <linux/ftrace.h>
 #include <linux/delay.h>
 
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/sched_trace.h>
+#endif
+
 #include <asm/apic.h>
 #include <asm/io_apic.h>
 #include <asm/irq.h>
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d0126222b394..0cb4373698e7 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -358,3 +358,4 @@ ENTRY(sys_call_table)
 	.long sys_wait_for_ts_release
 	.long sys_release_ts		/* +10 */
 	.long sys_null_call
+	.long sys_register_nv_device
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 9d727271c9fe..cff405c4dd3a 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -76,6 +76,7 @@ static inline void init_completion(struct completion *x)
 	init_waitqueue_head(&x->wait);
 }
 
+extern void __wait_for_completion_locked(struct completion *);
 extern void wait_for_completion(struct completion *);
 extern int wait_for_completion_interruptible(struct completion *x);
 extern int wait_for_completion_killable(struct completion *x);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index f6efed0039ed..57a7bc8807be 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -445,6 +445,7 @@ static inline void __raise_softirq_irqoff(unsigned int nr)
 
 extern void raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq(unsigned int nr);
+extern void wakeup_softirqd(void);
 
 /* This is the worklist that queues up per-cpu softirq work.
  *
@@ -500,6 +501,10 @@ struct tasklet_struct
 	atomic_t count;
 	void (*func)(unsigned long);
 	unsigned long data;
+
+#if defined(CONFIG_LITMUS_SOFTIRQD) || defined(CONFIG_LITMUS_PAI_SOFTIRQD)
+	struct task_struct *owner;
+#endif
 };
 
 #define DECLARE_TASKLET(name, func, data) \
@@ -537,6 +542,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t)
 #define tasklet_unlock(t) do { } while (0)
 #endif
 
+extern void ___tasklet_schedule(struct tasklet_struct *t);
 extern void __tasklet_schedule(struct tasklet_struct *t);
 
 static inline void tasklet_schedule(struct tasklet_struct *t)
@@ -545,6 +551,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t)
 		__tasklet_schedule(t);
 }
 
+extern void ___tasklet_hi_schedule(struct tasklet_struct *t);
 extern void __tasklet_hi_schedule(struct tasklet_struct *t);
 
 static inline void tasklet_hi_schedule(struct tasklet_struct *t)
@@ -553,6 +560,7 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t)
 		__tasklet_hi_schedule(t);
 }
 
+extern void ___tasklet_hi_schedule_first(struct tasklet_struct *t);
 extern void __tasklet_hi_schedule_first(struct tasklet_struct *t);
 
 /*
@@ -582,7 +590,7 @@ static inline void tasklet_disable(struct tasklet_struct *t)
 }
 
 static inline void tasklet_enable(struct tasklet_struct *t)
-{
+{	
 	smp_mb__before_atomic_dec();
 	atomic_dec(&t->count);
 }
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index a940fe435aca..cb47debbf24d 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -126,6 +126,15 @@ static inline int mutex_is_locked(struct mutex *lock)
 	return atomic_read(&lock->count) != 1;
 }
 
+/* return non-zero to abort.  only pre-side-effects may abort */
+typedef int (*side_effect_t)(unsigned long);
+extern void mutex_lock_sfx(struct mutex *lock,
+						   side_effect_t pre, unsigned long pre_arg,
+						   side_effect_t post, unsigned long post_arg);
+extern void mutex_unlock_sfx(struct mutex *lock,
+							 side_effect_t pre, unsigned long pre_arg,
+							 side_effect_t post, unsigned long post_arg);
+
 /*
  * See kernel/mutex.c for detailed documentation of these APIs.
  * Also see Documentation/mutex-design.txt.
@@ -153,6 +162,7 @@ extern void mutex_lock(struct mutex *lock);
 extern int __must_check mutex_lock_interruptible(struct mutex *lock);
 extern int __must_check mutex_lock_killable(struct mutex *lock);
 
+
 # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index 39fa04966aa8..c83fc2b65f01 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -43,4 +43,13 @@ extern int __must_check down_trylock(struct semaphore *sem);
 extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
 extern void up(struct semaphore *sem);
 
+extern void __down(struct semaphore *sem);
+extern void __up(struct semaphore *sem);
+
+struct semaphore_waiter {
+	struct list_head list;
+	struct task_struct *task;
+	int up;
+};
+
 #endif /* __LINUX_SEMAPHORE_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f584aba78ca9..1ec2ec7d4e3b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -83,6 +83,9 @@ struct work_struct {
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map lockdep_map;
 #endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	struct task_struct *owner;
+#endif
 };
 
 #define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
@@ -115,11 +118,25 @@ struct execute_work {
 #define __WORK_INIT_LOCKDEP_MAP(n, k)
 #endif
 
+#ifdef CONFIG_LITMUS_SOFTIRQD
+#define __WORK_INIT_OWNER() \
+	.owner = NULL,
+
+#define PREPARE_OWNER(_work, _owner) \
+	do { \
+		(_work)->owner = (_owner); \
+	} while(0)
+#else
+#define __WORK_INIT_OWNER()
+#define PREPARE_OWNER(_work, _owner)
+#endif
+
 #define __WORK_INITIALIZER(n, f) {				\
 	.data = WORK_DATA_STATIC_INIT(),			\
 	.entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
 	__WORK_INIT_LOCKDEP_MAP(#n, &(n))			\
+	__WORK_INIT_OWNER() \
 	}
 
 #define __DELAYED_WORK_INITIALIZER(n, f) {			\
@@ -357,6 +374,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
+extern int __schedule_work(struct work_struct *work);
 extern int schedule_work(struct work_struct *work);
 extern int schedule_work_on(int cpu, struct work_struct *work);
 extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
diff --git a/include/litmus/binheap.h b/include/litmus/binheap.h
new file mode 100644
index 000000000000..9e966e3886cb
--- /dev/null
+++ b/include/litmus/binheap.h
@@ -0,0 +1,207 @@
+#ifndef LITMUS_BINARY_HEAP_H
+#define LITMUS_BINARY_HEAP_H
+
+#include <linux/kernel.h>
+
+/**
+ * Simple binary heap with add, arbitrary delete, delete_root, and top
+ * operations.
+ *
+ * Style meant to conform with list.h.
+ *
+ * Motivation: Linux's prio_heap.h is of fixed size. Litmus's binomial
+ * heap may be overkill (and perhaps not general enough) for some applications.
+ *
+ * Note: In order to make node swaps fast, a node inserted with a data pointer
+ * may not always hold said data pointer. This is similar to the binomial heap
+ * implementation. This does make node deletion tricky since we have to
+ * (1) locate the node that holds the data pointer to delete, and (2) the
+ * node that was originally inserted with said data pointer. These have to be
+ * coalesced into a single node before removal (see usage of
+ * __binheap_safe_swap()). We have to track node references to accomplish this.
+ */
+
+struct binheap_node {
+	void	*data;
+	struct binheap_node *parent;
+	struct binheap_node *left;
+	struct binheap_node *right;
+
+	/* pointer to binheap_node that holds *data for which this binheap_node
+	 * was originally inserted.  (*data "owns" this node)
+	 */
+	struct binheap_node *ref;
+	struct binheap_node **ref_ptr;
+};
+
+/**
+ * Signature of compator function.  Assumed 'less-than' (min-heap).
+ * Pass in 'greater-than' for max-heap.
+ *
+ * TODO: Consider macro-based implementation that allows comparator to be
+ * inlined (similar to Linux red/black tree) for greater efficiency.
+ */
+typedef int (*binheap_order_t)(struct binheap_node *a,
+							   struct binheap_node *b);
+
+
+struct binheap_handle {
+	struct binheap_node *root;
+
+	/* pointer to node to take next inserted child */
+	struct binheap_node *next;
+
+	/* pointer to last node in complete binary tree */
+	struct binheap_node *last;
+
+	/* comparator function pointer */
+	binheap_order_t compare;
+};
+
+
+#define BINHEAP_POISON	((void*)(0xdeadbeef))
+
+
+/**
+ * binheap_entry - get the struct for this heap node.
+ *  Only valid when called upon heap nodes other than the root handle.
+ * @ptr:	the heap node.
+ * @type:	the type of struct pointed to by binheap_node::data.
+ * @member:	unused.
+ */
+#define binheap_entry(ptr, type, member) \
+((type *)((ptr)->data))
+
+/**
+ * binheap_node_container - get the struct that contains this node.
+ *  Only valid when called upon heap nodes other than the root handle.
+ * @ptr:	the heap node.
+ * @type:	the type of struct the node is embedded in.
+ * @member:	the name of the binheap_struct within the (type) struct.
+ */
+#define binheap_node_container(ptr, type, member) \
+container_of((ptr), type, member)
+
+/**
+ * binheap_top_entry - get the struct for the node at the top of the heap.
+ *  Only valid when called upon the heap handle node.
+ * @ptr:    the special heap-handle node.
+ * @type:   the type of the struct the head is embedded in.
+ * @member:	the name of the binheap_struct within the (type) struct.
+ */
+#define binheap_top_entry(ptr, type, member) \
+binheap_entry((ptr)->root, type, member)
+
+/**
+ * binheap_delete_root - remove the root element from the heap.
+ * @handle:	 handle to the heap.
+ * @type:    the type of the struct the head is embedded in.
+ * @member:	 the name of the binheap_struct within the (type) struct.
+ */
+#define binheap_delete_root(handle, type, member) \
+__binheap_delete_root((handle), &((type *)((handle)->root->data))->member)
+
+/**
+ * binheap_delete - remove an arbitrary element from the heap.
+ * @to_delete:  pointer to node to be removed.
+ * @handle:	 handle to the heap.
+ */
+#define binheap_delete(to_delete, handle) \
+__binheap_delete((to_delete), (handle))
+
+/**
+ * binheap_add - insert an element to the heap
+ * new_node: node to add.
+ * @handle:	 handle to the heap.
+ * @type:    the type of the struct the head is embedded in.
+ * @member:	 the name of the binheap_struct within the (type) struct.
+ */
+#define binheap_add(new_node, handle, type, member) \
+__binheap_add((new_node), (handle), container_of((new_node), type, member))
+
+/**
+ * binheap_decrease - re-eval the position of a node (based upon its
+ * original data pointer).
+ * @handle: handle to the heap.
+ * @orig_node: node that was associated with the data pointer
+ *             (whose value has changed) when said pointer was
+ *             added to the heap.
+ */
+#define binheap_decrease(orig_node, handle) \
+__binheap_decrease((orig_node), (handle))
+
+#define BINHEAP_NODE_INIT() { NULL, BINHEAP_POISON, NULL, NULL , NULL, NULL}
+
+#define BINHEAP_NODE(name) \
+	struct binheap_node name = BINHEAP_NODE_INIT()
+
+
+static inline void INIT_BINHEAP_NODE(struct binheap_node *n)
+{
+	n->data = NULL;
+	n->parent = BINHEAP_POISON;
+	n->left = NULL;
+	n->right = NULL;
+	n->ref = NULL;
+	n->ref_ptr = NULL;
+}
+
+static inline void INIT_BINHEAP_HANDLE(
+	struct binheap_handle *handle,
+	binheap_order_t compare)
+{
+	handle->root = NULL;
+	handle->next = NULL;
+	handle->last = NULL;
+	handle->compare = compare;
+}
+
+/* Returns true (1) if binheap is empty. */
+static inline int binheap_empty(struct binheap_handle *handle)
+{
+	return(handle->root == NULL);
+}
+
+/* Returns true (1) if binheap node is in a heap. */
+static inline int binheap_is_in_heap(struct binheap_node *node)
+{
+	return (node->parent != BINHEAP_POISON);
+}
+
+
+int binheap_is_in_this_heap(struct binheap_node *node, struct binheap_handle* heap);
+
+
+
+void __binheap_add(struct binheap_node *new_node,
+	struct binheap_handle *handle,
+	void *data);
+
+
+/**
+ * Removes the root node from the heap. The node is removed after coalescing
+ * the binheap_node with its original data pointer at the root of the tree.
+ *
+ * The 'last' node in the tree is then swapped up to the root and bubbled
+ * down.
+ */
+void __binheap_delete_root(struct binheap_handle *handle,
+	struct binheap_node *container);
+
+/**
+ * Delete an arbitrary node.  Bubble node to delete up to the root,
+ * and then delete to root.
+ */
+void __binheap_delete(
+	struct binheap_node *node_to_delete,
+	struct binheap_handle *handle);
+
+/**
+ * Bubble up a node whose pointer has decreased in value.
+ */
+void __binheap_decrease(struct binheap_node *orig_node,
+						struct binheap_handle *handle);
+
+
+#endif
+
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
index bbaf22ea7f12..63dff7efe8fb 100644
--- a/include/litmus/edf_common.h
+++ b/include/litmus/edf_common.h
@@ -20,6 +20,18 @@ int edf_higher_prio(struct task_struct* first,
 
 int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
 
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+/* binheap_nodes must be embedded within 'struct litmus_lock' */
+int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b);
+int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b);
+int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b);
+int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b);
+
+int __edf_higher_prio(struct task_struct* first, comparison_mode_t first_mode,
+					  struct task_struct* second, comparison_mode_t second_mode);
+
+#endif
+
 int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
 
 #endif
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
index caf2a1e6918c..1f5d3bd1a1db 100644
--- a/include/litmus/fdso.h
+++ b/include/litmus/fdso.h
@@ -20,7 +20,16 @@ typedef enum  {
 	FMLP_SEM	= 0,
 	SRP_SEM		= 1,
 
-	MAX_OBJ_TYPE	= 1
+	RSM_MUTEX	= 2,
+	IKGLP_SEM	= 3,
+	KFMLP_SEM	= 4,
+
+	IKGLP_SIMPLE_GPU_AFF_OBS = 5,
+	IKGLP_GPU_AFF_OBS = 6,
+	KFMLP_SIMPLE_GPU_AFF_OBS = 7,
+	KFMLP_GPU_AFF_OBS = 8,
+
+	MAX_OBJ_TYPE	= 8
 } obj_type_t;
 
 struct inode_obj_id {
@@ -64,8 +73,11 @@ static inline void* od_lookup(int od, obj_type_t type)
 }
 
 #define lookup_fmlp_sem(od)((struct pi_semaphore*)  od_lookup(od, FMLP_SEM))
+#define lookup_kfmlp_sem(od)((struct pi_semaphore*)  od_lookup(od, KFMLP_SEM))
 #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
 #define lookup_ics(od)     ((struct ics*)           od_lookup(od, ICS_ID))
 
+#define lookup_rsm_mutex(od)((struct litmus_lock*)  od_lookup(od, FMLP_SEM))
+
 
 #endif
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
new file mode 100644
index 000000000000..04d4bcaeae96
--- /dev/null
+++ b/include/litmus/fpmath.h
@@ -0,0 +1,145 @@
+#ifndef __FP_MATH_H__
+#define __FP_MATH_H__
+
+#ifndef __KERNEL__
+#include <stdint.h>
+#define abs(x) (((x) < 0) ? -(x) : x)
+#endif
+
+// Use 64-bit because we want to track things at the nanosecond scale.
+// This can lead to very large numbers.
+typedef int64_t fpbuf_t;
+typedef struct
+{
+	fpbuf_t val;
+} fp_t;
+
+#define FP_SHIFT 10
+#define ROUND_BIT (FP_SHIFT - 1)
+
+#define _fp(x) ((fp_t) {x})
+
+#ifdef __KERNEL__
+static const fp_t LITMUS_FP_ZERO = {.val = 0};
+static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
+#endif
+
+static inline fp_t FP(fpbuf_t x)
+{
+	return _fp(((fpbuf_t) x) << FP_SHIFT);
+}
+
+/* divide two integers to obtain a fixed point value  */
+static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
+{
+	return _fp(FP(a).val / (b));
+}
+
+static inline fpbuf_t _point(fp_t x)
+{
+	return (x.val % (1 << FP_SHIFT));
+
+}
+
+#define fp2str(x) x.val
+/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
+#define _FP_  "%ld/1024"
+
+static inline fpbuf_t _floor(fp_t x)
+{
+	return x.val >> FP_SHIFT;
+}
+
+/* FIXME: negative rounding */
+static inline fpbuf_t _round(fp_t x)
+{
+	return _floor(x) + ((x.val >> ROUND_BIT) & 1);
+}
+
+/* multiply two fixed point values */
+static inline fp_t _mul(fp_t a, fp_t b)
+{
+	return _fp((a.val * b.val) >> FP_SHIFT);
+}
+
+static inline fp_t _div(fp_t a, fp_t b)
+{
+#if !defined(__KERNEL__) && !defined(unlikely)
+#define unlikely(x) (x)
+#define DO_UNDEF_UNLIKELY
+#endif
+	/* try not to overflow */
+	if (unlikely(  a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) ))
+		return _fp((a.val / b.val) << FP_SHIFT);
+	else
+		return _fp((a.val << FP_SHIFT) / b.val);
+#ifdef DO_UNDEF_UNLIKELY
+#undef unlikely
+#undef DO_UNDEF_UNLIKELY
+#endif
+}
+
+static inline fp_t _add(fp_t a, fp_t b)
+{
+	return _fp(a.val + b.val);
+}
+
+static inline fp_t _sub(fp_t a, fp_t b)
+{
+	return _fp(a.val - b.val);
+}
+
+static inline fp_t _neg(fp_t x)
+{
+	return _fp(-x.val);
+}
+
+static inline fp_t _abs(fp_t x)
+{
+	return _fp(abs(x.val));
+}
+
+/* works the same as casting float/double to integer */
+static inline fpbuf_t _fp_to_integer(fp_t x)
+{
+	return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
+}
+
+static inline fp_t _integer_to_fp(fpbuf_t x)
+{
+	return _frac(x,1);
+}
+
+static inline int _leq(fp_t a, fp_t b)
+{
+	return a.val <= b.val;
+}
+
+static inline int _geq(fp_t a, fp_t b)
+{
+	return a.val >= b.val;
+}
+
+static inline int _lt(fp_t a, fp_t b)
+{
+	return a.val < b.val;
+}
+
+static inline int _gt(fp_t a, fp_t b)
+{
+	return a.val > b.val;
+}
+
+static inline int _eq(fp_t a, fp_t b)
+{
+	return a.val == b.val;
+}
+
+static inline fp_t _max(fp_t a, fp_t b)
+{
+	if (a.val < b.val)
+		return b;
+	else
+		return a;
+}
+#endif
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
new file mode 100644
index 000000000000..6b3fb8b28745
--- /dev/null
+++ b/include/litmus/gpu_affinity.h
@@ -0,0 +1,49 @@
+#ifndef LITMUS_GPU_AFFINITY_H
+#define LITMUS_GPU_AFFINITY_H
+
+#include <litmus/rt_param.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/litmus.h>
+
+void update_gpu_estimate(struct task_struct* t, lt_t observed);
+gpu_migration_dist_t gpu_migration_distance(int a, int b);
+
+static inline void reset_gpu_tracker(struct task_struct* t)
+{
+	t->rt_param.accum_gpu_time = 0;
+}
+
+static inline void start_gpu_tracker(struct task_struct* t)
+{
+	t->rt_param.gpu_time_stamp = litmus_clock();
+}
+
+static inline void stop_gpu_tracker(struct task_struct* t)
+{
+	lt_t now = litmus_clock();
+	t->rt_param.accum_gpu_time += (now - t->rt_param.gpu_time_stamp);
+}
+
+static inline lt_t get_gpu_time(struct task_struct* t)
+{
+	return t->rt_param.accum_gpu_time;
+}
+
+static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist)
+{
+	int i;
+	fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est);
+	lt_t val = (temp >= 0) ? temp : 0;  // never allow negative estimates...
+
+	WARN_ON(temp < 0);
+
+	// lower-bound a distant migration to be at least equal to the level
+	// below it.
+	for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) {
+		val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
+	}
+
+	return ((val > 0) ? val : dist+1);
+}
+
+#endif
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
new file mode 100644
index 000000000000..af6f15178cb1
--- /dev/null
+++ b/include/litmus/ikglp_lock.h
@@ -0,0 +1,160 @@
+#ifndef LITMUS_IKGLP_H
+#define LITMUS_IKGLP_H
+
+#include <litmus/litmus.h>
+#include <litmus/binheap.h>
+#include <litmus/locking.h>
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+#include <litmus/kexclu_affinity.h>
+
+struct ikglp_affinity;
+#endif
+
+typedef struct ikglp_heap_node
+{
+	struct task_struct *task;
+	struct binheap_node node;
+} ikglp_heap_node_t;
+
+struct fifo_queue;
+struct ikglp_wait_state;
+
+typedef struct ikglp_donee_heap_node
+{
+	struct task_struct *task;
+	struct fifo_queue *fq;
+	struct ikglp_wait_state *donor_info;  // cross-linked with ikglp_wait_state_t of donor
+
+	struct binheap_node node;
+} ikglp_donee_heap_node_t;
+
+// Maintains the state of a request as it goes through the IKGLP
+typedef struct ikglp_wait_state {
+	struct task_struct *task;  // pointer back to the requesting task
+
+	// Data for while waiting in FIFO Queue
+	wait_queue_t fq_node;
+	ikglp_heap_node_t global_heap_node;
+	ikglp_donee_heap_node_t donee_heap_node;
+
+	// Data for while waiting in PQ
+	ikglp_heap_node_t pq_node;
+
+	// Data for while waiting as a donor
+	ikglp_donee_heap_node_t *donee_info;  // cross-linked with donee's ikglp_donee_heap_node_t
+	struct nested_info prio_donation;
+	struct binheap_node node;
+} ikglp_wait_state_t;
+
+/* struct for semaphore with priority inheritance */
+struct fifo_queue
+{
+	wait_queue_head_t wait;
+	struct task_struct* owner;
+
+	// used for bookkeepping
+	ikglp_heap_node_t global_heap_node;
+	ikglp_donee_heap_node_t donee_heap_node;
+
+	struct task_struct* hp_waiter;
+	int count; /* number of waiters + holder */
+
+	struct nested_info nest;
+};
+
+struct ikglp_semaphore
+{
+	struct litmus_lock litmus_lock;
+
+	raw_spinlock_t	lock;
+	raw_spinlock_t	real_lock;
+
+	int nr_replicas; // AKA k
+	int m;
+
+	int max_fifo_len; // max len of a fifo queue
+	int nr_in_fifos;
+
+	struct binheap_handle top_m;  // min heap, base prio
+	int top_m_size;  // number of nodes in top_m
+
+	struct binheap_handle not_top_m; // max heap, base prio
+
+	struct binheap_handle donees;	// min-heap, base prio
+	struct fifo_queue *shortest_fifo_queue; // pointer to shortest fifo queue
+
+	/* data structures for holding requests */
+	struct fifo_queue *fifo_queues; // array nr_replicas in length
+	struct binheap_handle priority_queue;	// max-heap, base prio
+	struct binheap_handle donors;	// max-heap, base prio
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	struct ikglp_affinity *aff_obs;
+#endif
+};
+
+static inline struct ikglp_semaphore* ikglp_from_lock(struct litmus_lock* lock)
+{
+	return container_of(lock, struct ikglp_semaphore, litmus_lock);
+}
+
+int ikglp_lock(struct litmus_lock* l);
+int ikglp_unlock(struct litmus_lock* l);
+int ikglp_close(struct litmus_lock* l);
+void ikglp_free(struct litmus_lock* l);
+struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg);
+
+
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+
+struct ikglp_queue_info
+{
+	struct fifo_queue* q;
+	lt_t estimated_len;
+	int *nr_cur_users;
+};
+
+struct ikglp_affinity_ops
+{
+	struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t);	// select FIFO
+	ikglp_wait_state_t* (*advise_steal)(struct ikglp_affinity* aff, struct fifo_queue* dst);	// select steal from FIFO
+	ikglp_donee_heap_node_t* (*advise_donee_selection)(struct ikglp_affinity* aff, struct task_struct* t);	// select a donee
+	ikglp_wait_state_t* (*advise_donor_to_fq)(struct ikglp_affinity* aff, struct fifo_queue* dst);	// select a donor to move to PQ
+
+	void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);	// fifo enqueue
+	void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);	// fifo dequeue
+	void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);	// replica acquired
+	void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);		// replica freed
+	int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq);		// convert a replica # to a GPU (includes offsets and simult user folding)
+};
+
+struct ikglp_affinity
+{
+	struct affinity_observer obs;
+	struct ikglp_affinity_ops *ops;
+	struct ikglp_queue_info *q_info;
+	int *nr_cur_users_on_rsrc;
+	int offset;
+	int nr_simult;
+	int nr_rsrc;
+	int relax_max_fifo_len;
+};
+
+static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
+{
+	return container_of(aff_obs, struct ikglp_affinity, obs);
+}
+
+int ikglp_aff_obs_close(struct affinity_observer*);
+void ikglp_aff_obs_free(struct affinity_observer*);
+struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops*,
+												void* __user arg);
+struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
+												void* __user arg);
+#endif
+
+
+
+#endif
diff --git a/include/litmus/kexclu_affinity.h b/include/litmus/kexclu_affinity.h
new file mode 100644
index 000000000000..f6355de49074
--- /dev/null
+++ b/include/litmus/kexclu_affinity.h
@@ -0,0 +1,35 @@
+#ifndef LITMUS_AFF_OBS_H
+#define LITMUS_AFF_OBS_H
+
+#include <litmus/locking.h>
+
+struct affinity_observer_ops;
+
+struct affinity_observer
+{
+	struct affinity_observer_ops* ops;
+	int type;
+	int ident;
+
+	struct litmus_lock* lock;  // the lock under observation
+};
+
+typedef int (*aff_obs_open_t)(struct affinity_observer* aff_obs,
+							  void* __user arg);
+typedef int (*aff_obs_close_t)(struct affinity_observer* aff_obs);
+typedef void (*aff_obs_free_t)(struct affinity_observer* aff_obs);
+
+struct affinity_observer_ops
+{
+	aff_obs_open_t open;
+	aff_obs_close_t close;
+	aff_obs_free_t deallocate;
+};
+
+struct litmus_lock* get_lock_from_od(int od);
+
+void affinity_observer_new(struct affinity_observer* aff,
+						   struct affinity_observer_ops* ops,
+						   struct affinity_observer_args* args);
+
+#endif
diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h
new file mode 100644
index 000000000000..5f0aae6e6f42
--- /dev/null
+++ b/include/litmus/kfmlp_lock.h
@@ -0,0 +1,97 @@
+#ifndef LITMUS_KFMLP_H
+#define LITMUS_KFMLP_H
+
+#include <litmus/litmus.h>
+#include <litmus/locking.h>
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+#include <litmus/kexclu_affinity.h>
+
+struct kfmlp_affinity;
+#endif
+
+/* struct for semaphore with priority inheritance */
+struct kfmlp_queue
+{
+	wait_queue_head_t wait;
+	struct task_struct* owner;
+	struct task_struct* hp_waiter;
+	int count; /* number of waiters + holder */
+};
+
+struct kfmlp_semaphore
+{
+	struct litmus_lock litmus_lock;
+
+	spinlock_t	lock;
+
+	int num_resources; /* aka k */
+
+	struct kfmlp_queue *queues; /* array */
+	struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	struct kfmlp_affinity *aff_obs;
+#endif
+};
+
+static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
+{
+	return container_of(lock, struct kfmlp_semaphore, litmus_lock);
+}
+
+int kfmlp_lock(struct litmus_lock* l);
+int kfmlp_unlock(struct litmus_lock* l);
+int kfmlp_close(struct litmus_lock* l);
+void kfmlp_free(struct litmus_lock* l);
+struct litmus_lock* kfmlp_new(struct litmus_lock_ops*, void* __user arg);
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+
+struct kfmlp_queue_info
+{
+	struct kfmlp_queue* q;
+	lt_t estimated_len;
+	int *nr_cur_users;
+};
+
+struct kfmlp_affinity_ops
+{
+	struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t);
+	struct task_struct* (*advise_steal)(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from);
+	void (*notify_enqueue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
+	void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
+	void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
+	void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
+	int (*replica_to_resource)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq);
+};
+
+struct kfmlp_affinity
+{
+	struct affinity_observer obs;
+	struct kfmlp_affinity_ops *ops;
+	struct kfmlp_queue_info *q_info;
+	int *nr_cur_users_on_rsrc;
+	int offset;
+	int nr_simult;
+	int nr_rsrc;
+};
+
+static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
+{
+	return container_of(aff_obs, struct kfmlp_affinity, obs);
+}
+
+int kfmlp_aff_obs_close(struct affinity_observer*);
+void kfmlp_aff_obs_free(struct affinity_observer*);
+struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops*,
+											void* __user arg);
+struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
+												void* __user arg);
+
+
+#endif
+
+#endif
+
+
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 0b071fd359f9..71df378236f5 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -26,6 +26,7 @@ static inline int in_list(struct list_head* list)
 		);
 }
 
+
 struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
 
 #define NO_CPU			0xffffffff
@@ -53,12 +54,16 @@ void litmus_exit_task(struct task_struct *tsk);
 #define get_rt_phase(t)		(tsk_rt(t)->task_params.phase)
 #define get_partition(t) 	(tsk_rt(t)->task_params.cpu)
 #define get_deadline(t)		(tsk_rt(t)->job_params.deadline)
+#define get_period(t)		(tsk_rt(t)->task_params.period)
 #define get_release(t)		(tsk_rt(t)->job_params.release)
 #define get_class(t)		(tsk_rt(t)->task_params.cls)
 
 #define is_priority_boosted(t)	(tsk_rt(t)->priority_boosted)
 #define get_boost_start(t)	(tsk_rt(t)->boost_start_time)
 
+#define effective_priority(t) ((!(tsk_rt(t)->inh_task)) ? t : tsk_rt(t)->inh_task)
+#define base_priority(t) (t)
+
 inline static int budget_exhausted(struct task_struct* t)
 {
 	return get_exec_time(t) >= get_exec_cost(t);
@@ -114,10 +119,12 @@ static inline lt_t litmus_clock(void)
 #define earlier_deadline(a, b) (lt_before(\
 	(a)->rt_param.job_params.deadline,\
 	(b)->rt_param.job_params.deadline))
+#define shorter_period(a, b) (lt_before(\
+	(a)->rt_param.task_params.period,\
+	(b)->rt_param.task_params.period))
 #define earlier_release(a, b)  (lt_before(\
 	(a)->rt_param.job_params.release,\
 	(b)->rt_param.job_params.release))
-
 void preempt_if_preemptable(struct task_struct* t, int on_cpu);
 
 #ifdef CONFIG_LITMUS_LOCKING
diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h
new file mode 100644
index 000000000000..1eb5ea1a6c4b
--- /dev/null
+++ b/include/litmus/litmus_softirq.h
@@ -0,0 +1,199 @@
+#ifndef __LITMUS_SOFTIRQ_H
+#define __LITMUS_SOFTIRQ_H
+
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+
+/*
+   Threaded tasklet handling for Litmus.  Tasklets
+   are scheduled with the priority of the tasklet's
+   owner---that is, the RT task on behalf the tasklet
+   runs.
+
+   Tasklets are current scheduled in FIFO order with
+   NO priority inheritance for "blocked" tasklets.
+
+   klitirqd assumes the priority of the owner of the
+   tasklet when the tasklet is next to execute.
+
+   Currently, hi-tasklets are scheduled before
+   low-tasklets, regardless of priority of low-tasklets.
+   And likewise, low-tasklets are scheduled before work
+   queue objects.  This priority inversion probably needs
+   to be fixed, though it is not an issue if our work with
+   GPUs as GPUs are owned (and associated klitirqds) for
+   exclusive time periods, thus no inversions can
+   occur.
+ */
+
+
+
+#define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD
+
+/* Spawns NR_LITMUS_SOFTIRQD klitirqd daemons.
+   Actual launch of threads is deffered to kworker's
+   workqueue, so daemons will likely not be immediately
+   running when this function returns, though the required
+   data will be initialized.
+
+   @affinity_set: an array expressing the processor affinity
+    for each of the NR_LITMUS_SOFTIRQD daemons.  May be set
+    to NULL for global scheduling.
+
+	- Examples -
+	8-CPU system with two CPU clusters:
+		affinity[] = {0, 0, 0, 0, 3, 3, 3, 3}
+		NOTE: Daemons not actually bound to specified CPU, but rather
+		cluster in which the CPU resides.
+
+	8-CPU system, partitioned:
+		affinity[] = {0, 1, 2, 3, 4, 5, 6, 7}
+
+	FIXME: change array to a CPU topology or array of cpumasks
+
+ */
+void spawn_klitirqd(int* affinity);
+
+
+/* Raises a flag to tell klitirqds to terminate.
+   Termination is async, so some threads may be running
+   after function return. */
+void kill_klitirqd(void);
+
+
+/* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready
+   to handle tasklets. 0, otherwise.*/
+int klitirqd_is_ready(void);
+
+/* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready
+   to handle tasklets. 0, otherwise.*/
+int klitirqd_is_dead(void);
+
+/* Flushes all pending work out to the OS for regular
+ * tasklet/work processing of the specified 'owner'
+ *
+ * PRECOND: klitirqd_thread must have a clear entry
+ * in the GPU registry, otherwise this call will become
+ * a no-op as work will loop back to the klitirqd_thread.
+ *
+ * Pass NULL for owner to flush ALL pending items.
+ */
+void flush_pending(struct task_struct* klitirqd_thread,
+				   struct task_struct* owner);
+
+struct task_struct* get_klitirqd(unsigned int k_id);
+
+
+extern int __litmus_tasklet_schedule(
+        struct tasklet_struct *t,
+        unsigned int k_id);
+
+/* schedule a tasklet on klitirqd #k_id */
+static inline int litmus_tasklet_schedule(
+    struct tasklet_struct *t,
+    unsigned int k_id)
+{
+	int ret = 0;
+	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+		ret = __litmus_tasklet_schedule(t, k_id);
+	return(ret);
+}
+
+/* for use by __tasklet_schedule() */
+static inline int _litmus_tasklet_schedule(
+    struct tasklet_struct *t,
+    unsigned int k_id)
+{
+    return(__litmus_tasklet_schedule(t, k_id));
+}
+
+
+
+
+extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t,
+                                         unsigned int k_id);
+
+/* schedule a hi tasklet on klitirqd #k_id */
+static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t,
+                                              unsigned int k_id)
+{
+	int ret = 0;
+	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+		ret = __litmus_tasklet_hi_schedule(t, k_id);
+	return(ret);
+}
+
+/* for use by __tasklet_hi_schedule() */
+static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t,
+                                               unsigned int k_id)
+{
+    return(__litmus_tasklet_hi_schedule(t, k_id));
+}
+
+
+
+
+
+extern int __litmus_tasklet_hi_schedule_first(
+    struct tasklet_struct *t,
+    unsigned int k_id);
+
+/* schedule a hi tasklet on klitirqd #k_id on next go-around */
+/* PRECONDITION: Interrupts must be disabled. */
+static inline int litmus_tasklet_hi_schedule_first(
+    struct tasklet_struct *t,
+    unsigned int k_id)
+{
+	int ret = 0;
+	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+		ret = __litmus_tasklet_hi_schedule_first(t, k_id);
+	return(ret);
+}
+
+/* for use by __tasklet_hi_schedule_first() */
+static inline int _litmus_tasklet_hi_schedule_first(
+    struct tasklet_struct *t,
+    unsigned int k_id)
+{
+    return(__litmus_tasklet_hi_schedule_first(t, k_id));
+}
+
+
+
+//////////////
+
+extern int __litmus_schedule_work(
+	struct work_struct* w,
+	unsigned int k_id);
+
+static inline int litmus_schedule_work(
+	struct work_struct* w,
+	unsigned int k_id)
+{
+	return(__litmus_schedule_work(w, k_id));
+}
+
+
+
+///////////// mutex operations for client threads.
+
+void down_and_set_stat(struct task_struct* t,
+					 enum klitirqd_sem_status to_set,
+					 struct mutex* sem);
+
+void __down_and_reset_and_set_stat(struct task_struct* t,
+				enum klitirqd_sem_status to_reset,
+				enum klitirqd_sem_status to_set,
+				struct mutex* sem);
+
+void up_and_set_stat(struct task_struct* t,
+					enum klitirqd_sem_status to_set,
+					struct mutex* sem);
+
+
+
+void release_klitirqd_lock(struct task_struct* t);
+
+int reacquire_klitirqd_lock(struct task_struct* t);
+
+#endif
diff --git a/include/litmus/locking.h b/include/litmus/locking.h
index 4d7b870cb443..36647fee03e4 100644
--- a/include/litmus/locking.h
+++ b/include/litmus/locking.h
@@ -1,28 +1,160 @@
 #ifndef LITMUS_LOCKING_H
 #define LITMUS_LOCKING_H
 
+#include <linux/list.h>
+
 struct litmus_lock_ops;
 
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+struct nested_info
+{
+	struct litmus_lock *lock;
+	struct task_struct *hp_waiter_eff_prio;
+	struct task_struct **hp_waiter_ptr;
+    struct binheap_node hp_binheap_node;
+};
+
+static inline struct task_struct* top_priority(struct binheap_handle* handle) {
+	if(!binheap_empty(handle)) {
+		return (struct task_struct*)(binheap_top_entry(handle, struct nested_info, hp_binheap_node)->hp_waiter_eff_prio);
+	}
+	return NULL;
+}
+
+void print_hp_waiters(struct binheap_node* n, int depth);
+#endif
+
+
 /* Generic base struct for LITMUS^RT userspace semaphores.
  * This structure should be embedded in protocol-specific semaphores.
  */
 struct litmus_lock {
 	struct litmus_lock_ops *ops;
 	int type;
+
+	int ident;
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	struct nested_info nest;
+//#ifdef CONFIG_DEBUG_SPINLOCK
+	char cheat_lockdep[2];
+	struct lock_class_key key;
+//#endif
+#endif
 };
 
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+
+#define MAX_DGL_SIZE CONFIG_LITMUS_MAX_DGL_SIZE
+
+typedef struct dgl_wait_state {
+	struct task_struct *task;	/* task waiting on DGL */
+	struct litmus_lock *locks[MAX_DGL_SIZE];	/* requested locks in DGL */
+	int size;			/* size of the DGL */
+	int nr_remaining;	/* nr locks remainging before DGL is complete */
+	int last_primary;	/* index lock in locks[] that has active priority */
+	wait_queue_t wq_nodes[MAX_DGL_SIZE];
+} dgl_wait_state_t;
+
+void wake_or_wait_on_next_lock(dgl_wait_state_t *dgl_wait);
+void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/);
+
+void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait);
+int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key);
+void __waitqueue_dgl_remove_first(wait_queue_head_t *wq, dgl_wait_state_t** dgl_wait, struct task_struct **task);
+#endif
+
+typedef int (*lock_op_t)(struct litmus_lock *l);
+typedef lock_op_t lock_close_t;
+typedef lock_op_t lock_lock_t;
+typedef lock_op_t lock_unlock_t;
+
+typedef int (*lock_open_t)(struct litmus_lock *l, void* __user arg);
+typedef void (*lock_free_t)(struct litmus_lock *l);
+
 struct litmus_lock_ops {
 	/* Current task tries to obtain / drop a reference to a lock.
 	 * Optional methods, allowed by default. */
-	int (*open)(struct litmus_lock*, void* __user);
-	int (*close)(struct litmus_lock*);
+	lock_open_t open;
+	lock_close_t close;
 
 	/* Current tries to lock/unlock this lock (mandatory methods). */
-	int (*lock)(struct litmus_lock*);
-	int (*unlock)(struct litmus_lock*);
+	lock_lock_t lock;
+	lock_unlock_t unlock;
 
 	/* The lock is no longer being referenced (mandatory method). */
-	void (*deallocate)(struct litmus_lock*);
+	lock_free_t deallocate;
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	void (*propagate_increase_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags);
+	void (*propagate_decrease_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags);
+#endif
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	raw_spinlock_t* (*get_dgl_spin_lock)(struct litmus_lock *l);
+	int (*dgl_lock)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node);
+	int (*is_owner)(struct litmus_lock *l, struct task_struct *t);
+	void (*enable_priority)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait);
+#endif
 };
 
+
+/*
+ Nested inheritance can be achieved with fine-grain locking when there is
+ no need for DGL support, presuming locks are acquired in a partial order
+ (no cycles!).  However, DGLs allow locks to be acquired in any order.  This
+ makes nested inheritance very difficult (we don't yet know a solution) to
+ realize with fine-grain locks, so we use a big lock instead.
+
+ Code contains both fine-grain and coarse-grain methods together, side-by-side.
+ Each lock operation *IS NOT* surrounded by ifdef/endif to help make code more
+ readable.  However, this leads to the odd situation where both code paths
+ appear together in code as if they were both active together.
+
+ THIS IS NOT REALLY THE CASE!  ONLY ONE CODE PATH IS ACTUALLY ACTIVE!
+
+ Example:
+	lock_global_irqsave(coarseLock, flags);
+	lock_fine_irqsave(fineLock, flags);
+
+ Reality (coarse):
+	lock_global_irqsave(coarseLock, flags);
+	//lock_fine_irqsave(fineLock, flags);
+
+ Reality (fine):
+	//lock_global_irqsave(coarseLock, flags);
+	lock_fine_irqsave(fineLock, flags);
+
+ Be careful when you read code involving nested inheritance.
+ */
+#if defined(CONFIG_LITMUS_DGL_SUPPORT)
+/* DGL requires a big lock to implement nested inheritance */
+#define lock_global_irqsave(lock, flags)		raw_spin_lock_irqsave((lock), (flags))
+#define lock_global(lock)						raw_spin_lock((lock))
+#define unlock_global_irqrestore(lock, flags)	raw_spin_unlock_irqrestore((lock), (flags))
+#define unlock_global(lock)						raw_spin_unlock((lock))
+
+/* fine-grain locking are no-ops with DGL support */
+#define lock_fine_irqsave(lock, flags)
+#define lock_fine(lock)
+#define unlock_fine_irqrestore(lock, flags)
+#define unlock_fine(lock)
+
+#elif defined(CONFIG_LITMUS_NESTED_LOCKING)
+/* Use fine-grain locking when DGLs are disabled. */
+/* global locking are no-ops without DGL support */
+#define lock_global_irqsave(lock, flags)
+#define lock_global(lock)
+#define unlock_global_irqrestore(lock, flags)
+#define unlock_global(lock)
+
+#define lock_fine_irqsave(lock, flags)			raw_spin_lock_irqsave((lock), (flags))
+#define lock_fine(lock)							raw_spin_lock((lock))
+#define unlock_fine_irqrestore(lock, flags)		raw_spin_unlock_irqrestore((lock), (flags))
+#define unlock_fine(lock)						raw_spin_unlock((lock))
+
 #endif
+
+
+#endif
+
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
new file mode 100644
index 000000000000..97c9577141db
--- /dev/null
+++ b/include/litmus/nvidia_info.h
@@ -0,0 +1,46 @@
+#ifndef __LITMUS_NVIDIA_H
+#define __LITMUS_NVIDIA_H
+
+#include <linux/interrupt.h>
+
+
+#include <litmus/litmus_softirq.h>
+
+
+//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD
+#define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM
+#define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS
+
+int init_nvidia_info(void);
+void shutdown_nvidia_info(void);
+
+int is_nvidia_func(void* func_addr);
+
+void dump_nvidia_info(const struct tasklet_struct *t);
+
+
+// Returns the Nvidia device # associated with provided tasklet and work_struct.
+u32 get_tasklet_nv_device_num(const struct tasklet_struct *t);
+u32 get_work_nv_device_num(const struct work_struct *t);
+
+
+int init_nv_device_reg(void);
+//int get_nv_device_id(struct task_struct* owner);
+
+
+int reg_nv_device(int reg_device_id, int register_device, struct task_struct *t);
+
+struct task_struct* get_nv_max_device_owner(u32 target_device_id);
+//int is_nv_device_owner(u32 target_device_id);
+
+void lock_nv_registry(u32 reg_device_id, unsigned long* flags);
+void unlock_nv_registry(u32 reg_device_id, unsigned long* flags);
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+void pai_check_priority_increase(struct task_struct *t, int reg_device_id);
+void pai_check_priority_decrease(struct task_struct *t, int reg_device_id);
+#endif
+
+//void increment_nv_int_count(u32 device);
+
+#endif
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
index 380b886d78ff..8f3a9ca2d4e3 100644
--- a/include/litmus/preempt.h
+++ b/include/litmus/preempt.h
@@ -26,12 +26,12 @@ const char* sched_state_name(int s);
 				    (x), #x, __FUNCTION__);		\
 	} while (0);
 
+//#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) /* ignore */
 #define TRACE_SCHED_STATE_CHANGE(x, y, cpu)				\
 	TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n",			\
 		    cpu,  (x), sched_state_name(x),			\
 		    (y), sched_state_name(y))
 
-
 typedef enum scheduling_state {
 	TASK_SCHEDULED    = (1 << 0),  /* The currently scheduled task is the one that
 					* should be scheduled, and the processor does not
diff --git a/include/litmus/rsm_lock.h b/include/litmus/rsm_lock.h
new file mode 100644
index 000000000000..a15189683de4
--- /dev/null
+++ b/include/litmus/rsm_lock.h
@@ -0,0 +1,54 @@
+#ifndef LITMUS_RSM_H
+#define LITMUS_RSM_H
+
+#include <litmus/litmus.h>
+#include <litmus/binheap.h>
+#include <litmus/locking.h>
+
+/* struct for semaphore with priority inheritance */
+struct rsm_mutex {
+	struct litmus_lock litmus_lock;
+
+	/* current resource holder */
+	struct task_struct *owner;
+
+	/* highest-priority waiter */
+	struct task_struct *hp_waiter;
+
+	/* FIFO queue of waiting tasks -- for now.  time stamp in the future. */
+	wait_queue_head_t	wait;
+
+	/* we do some nesting within spinlocks, so we can't use the normal
+	 sleeplocks found in wait_queue_head_t. */
+	raw_spinlock_t		lock;
+};
+
+static inline struct rsm_mutex* rsm_mutex_from_lock(struct litmus_lock* lock)
+{
+	return container_of(lock, struct rsm_mutex, litmus_lock);
+}
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t);
+int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node);
+void rsm_mutex_enable_priority(struct litmus_lock *l, dgl_wait_state_t* dgl_wait);
+#endif
+
+void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l,
+											  struct task_struct* t,
+											  raw_spinlock_t* to_unlock,
+											  unsigned long irqflags);
+
+void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l,
+											  struct task_struct* t,
+											  raw_spinlock_t* to_unlock,
+											  unsigned long irqflags);
+
+int rsm_mutex_lock(struct litmus_lock* l);
+int rsm_mutex_unlock(struct litmus_lock* l);
+int rsm_mutex_close(struct litmus_lock* l);
+void rsm_mutex_free(struct litmus_lock* l);
+struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops*);
+
+
+#endif
\ No newline at end of file
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index d6d799174160..0198884eab86 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -5,6 +5,8 @@
 #ifndef _LINUX_RT_PARAM_H_
 #define _LINUX_RT_PARAM_H_
 
+#include <litmus/fpmath.h>
+
 /* Litmus time type. */
 typedef unsigned long long lt_t;
 
@@ -24,6 +26,7 @@ static inline int lt_after_eq(lt_t a, lt_t b)
 typedef enum {
 	RT_CLASS_HARD,
 	RT_CLASS_SOFT,
+	RT_CLASS_SOFT_W_SLIP,
 	RT_CLASS_BEST_EFFORT
 } task_class_t;
 
@@ -52,6 +55,19 @@ union np_flag {
 	} np;
 };
 
+struct affinity_observer_args
+{
+	int lock_od;
+};
+
+struct gpu_affinity_observer_args
+{
+	struct affinity_observer_args obs;
+	int replica_to_gpu_offset;
+	int nr_simult_users;
+	int relaxed_rules;
+};
+
 /* The definition of the data that is shared between the kernel and real-time
  * tasks via a shared page (see litmus/ctrldev.c).
  *
@@ -75,6 +91,9 @@ struct control_page {
 /* don't export internal data structures to user space (liblitmus) */
 #ifdef __KERNEL__
 
+#include <litmus/binheap.h>
+#include <linux/semaphore.h>
+
 struct _rt_domain;
 struct bheap_node;
 struct release_heap;
@@ -100,6 +119,31 @@ struct rt_job {
 
 struct pfair_param;
 
+enum klitirqd_sem_status
+{
+	NEED_TO_REACQUIRE,
+	REACQUIRING,
+	NOT_HELD,
+	HELD
+};
+
+typedef enum gpu_migration_dist
+{
+	// TODO: Make this variable against NR_NVIDIA_GPUS
+	MIG_LOCAL = 0,
+	MIG_NEAR = 1,
+	MIG_MED = 2,
+	MIG_FAR = 3,	// 8 GPUs in a binary tree hierarchy
+	MIG_NONE = 4,
+
+	MIG_LAST = MIG_NONE
+} gpu_migration_dist_t;
+
+typedef struct feedback_est{
+	fp_t est;
+	fp_t accum_err;
+} feedback_est_t;
+
 /*	RT task parameters for scheduling extensions
  *	These parameters are inherited during clone and therefore must
  *	be explicitly set up before the task set is launched.
@@ -114,6 +158,52 @@ struct rt_param {
 	/* is the task present? (true if it can be scheduled) */
 	unsigned int		present:1;
 
+#ifdef CONFIG_LITMUS_SOFTIRQD
+    /* proxy threads have minimum priority by default */
+    unsigned int        is_proxy_thread:1;
+
+	/* pointer to klitirqd currently working on this
+	   task_struct's behalf.  only set by the task pointed
+	   to by klitirqd.
+
+	   ptr only valid if is_proxy_thread == 0
+	 */
+	struct task_struct* cur_klitirqd;
+
+	/* Used to implement mutual execution exclusion between
+	 * job and klitirqd execution.  Job must always hold
+	 * it's klitirqd_sem to execute.  klitirqd instance
+	 * must hold the semaphore before executing on behalf
+	 * of a job.
+	 */
+	struct mutex				klitirqd_sem;
+
+	/* status of held klitirqd_sem, even if the held klitirqd_sem is from
+	   another task (only proxy threads do this though).
+	 */
+	atomic_t					klitirqd_sem_stat;
+#endif
+
+#ifdef CONFIG_LITMUS_NVIDIA
+	/* number of top-half interrupts handled on behalf of current job */
+	atomic_t					nv_int_count;
+	long unsigned int			held_gpus;  // bitmap of held GPUs.
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	fp_t	gpu_fb_param_a[MIG_LAST+1];
+	fp_t	gpu_fb_param_b[MIG_LAST+1];
+
+	gpu_migration_dist_t	gpu_migration;
+	int				last_gpu;
+	feedback_est_t	gpu_migration_est[MIG_LAST+1]; // local, near, med, far
+
+	lt_t accum_gpu_time;
+	lt_t gpu_time_stamp;
+
+	unsigned int suspend_gpu_tracker_on_block:1;
+#endif
+#endif
+
 #ifdef CONFIG_LITMUS_LOCKING
 	/* Is the task being priority-boosted by a locking protocol? */
 	unsigned int		priority_boosted:1;
@@ -133,7 +223,15 @@ struct rt_param {
 	 * could point to self if PI does not result in
 	 * an increased task priority.
 	 */
-	 struct task_struct*	inh_task;
+	struct task_struct*	inh_task;
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	raw_spinlock_t			hp_blocked_tasks_lock;
+	struct binheap_handle	hp_blocked_tasks;
+
+	/* pointer to lock upon which is currently blocked */
+	struct litmus_lock* blocked_lock;
+#endif
 
 #ifdef CONFIG_NP_SECTION
 	/* For the FMLP under PSN-EDF, it is required to make the task
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 6e7cabdddae8..24a6858b4b0b 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -11,6 +11,12 @@
 #include <litmus/locking.h>
 #endif
 
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+#include <litmus/kexclu_affinity.h>
+#endif
+
+#include <linux/interrupt.h>
+
 /************************ setup/tear down ********************/
 
 typedef long (*activate_plugin_t) (void);
@@ -29,7 +35,6 @@ typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
  */
 typedef void (*finish_switch_t)(struct task_struct *prev);
 
-
 /********************* task state changes ********************/
 
 /* Called to setup a new real-time task.
@@ -58,6 +63,47 @@ typedef void (*task_exit_t)    (struct task_struct *);
 typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
 				 void* __user config);
 
+struct affinity_observer;
+typedef long (*allocate_affinity_observer_t) (
+								struct affinity_observer **aff_obs, int type,
+								void* __user config);
+
+typedef void (*increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
+typedef void (*decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
+typedef void (*nested_increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
+									  raw_spinlock_t *to_unlock, unsigned long irqflags);
+typedef void (*nested_decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
+									  raw_spinlock_t *to_unlock, unsigned long irqflags);
+
+typedef void (*increase_prio_klitirq_t)(struct task_struct* klitirqd,
+                                        struct task_struct* old_owner,
+                                        struct task_struct* new_owner);
+typedef void (*decrease_prio_klitirqd_t)(struct task_struct* klitirqd,
+                                         struct task_struct* old_owner);
+
+
+typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet);
+typedef void (*change_prio_pai_tasklet_t)(struct task_struct *old_prio,
+										  struct task_struct *new_prio);
+typedef void (*run_tasklets_t)(struct task_struct* next);
+
+typedef raw_spinlock_t* (*get_dgl_spinlock_t) (struct task_struct *t);
+
+
+typedef int (*higher_prio_t)(struct task_struct* a, struct task_struct* b);
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+
+typedef enum
+{
+	BASE,
+	EFFECTIVE
+} comparison_mode_t;
+
+typedef int (*__higher_prio_t)(struct task_struct* a, comparison_mode_t a_mod,
+							  struct task_struct* b, comparison_mode_t b_mod);
+#endif
+
 
 /********************* sys call backends  ********************/
 /* This function causes the caller to sleep until the next release */
@@ -88,14 +134,40 @@ struct sched_plugin {
 	/*	task state changes 	*/
 	admit_task_t		admit_task;
 
-        task_new_t 		task_new;
+    task_new_t			task_new;
 	task_wake_up_t		task_wake_up;
 	task_block_t		task_block;
 	task_exit_t 		task_exit;
 
+	higher_prio_t		compare;
+
 #ifdef CONFIG_LITMUS_LOCKING
 	/*	locking protocols	*/
 	allocate_lock_t		allocate_lock;
+	increase_prio_t		increase_prio;
+	decrease_prio_t		decrease_prio;
+#endif
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	nested_increase_prio_t nested_increase_prio;
+	nested_decrease_prio_t nested_decrease_prio;
+	__higher_prio_t		__compare;
+#endif
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	get_dgl_spinlock_t	get_dgl_spinlock;
+#endif
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	allocate_affinity_observer_t allocate_aff_obs;
+#endif
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+    increase_prio_klitirq_t		increase_prio_klitirqd;
+    decrease_prio_klitirqd_t	decrease_prio_klitirqd;
+#endif
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+	enqueue_pai_tasklet_t		enqueue_pai_tasklet;
+	change_prio_pai_tasklet_t	change_prio_pai_tasklet;
+	run_tasklets_t				run_tasklets;
 #endif
 } __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
 
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
index 7ca34cb13881..b1b71f6c5f0c 100644
--- a/include/litmus/sched_trace.h
+++ b/include/litmus/sched_trace.h
@@ -10,13 +10,14 @@ struct st_trace_header {
 	u8	type;		/* Of what type is this record?  */
 	u8	cpu;		/* On which CPU was it recorded? */
 	u16	pid;		/* PID of the task.              */
-	u32	job;		/* The job sequence number.      */
-};
+	u32 job:24;		/* The job sequence number.      */
+	u8  extra;
+} __attribute__((packed));
 
 #define ST_NAME_LEN 16
 struct st_name_data {
 	char	cmd[ST_NAME_LEN];/* The name of the executable of this process. */
-};
+} __attribute__((packed));
 
 struct st_param_data {		/* regular params */
 	u32	wcet;
@@ -25,30 +26,29 @@ struct st_param_data {		/* regular params */
 	u8	partition;
 	u8	class;
 	u8	__unused[2];
-};
+} __attribute__((packed));
 
 struct st_release_data {	/* A job is was/is going to be released. */
 	u64	release;	/* What's the release time?              */
 	u64	deadline;	/* By when must it finish?		 */
-};
+} __attribute__((packed));
 
 struct st_assigned_data {	/* A job was asigned to a CPU. 		 */
 	u64	when;
 	u8	target;		/* Where should it execute?	         */
 	u8	__unused[7];
-};
+} __attribute__((packed));
 
 struct st_switch_to_data {	/* A process was switched to on a given CPU.   */
 	u64	when;		/* When did this occur?                        */
 	u32	exec_time;	/* Time the current job has executed.          */
 	u8	__unused[4];
-
-};
+} __attribute__((packed));
 
 struct st_switch_away_data {	/* A process was switched away from on a given CPU. */
 	u64	when;
 	u64	exec_time;
-};
+} __attribute__((packed));
 
 struct st_completion_data {	/* A job completed. */
 	u64	when;
@@ -56,35 +56,108 @@ struct st_completion_data {	/* A job completed. */
 				 * next task automatically; set to 0 otherwise.
 				 */
 	u8	__uflags:7;
-	u8	__unused[7];
-};
+	u16 nv_int_count;
+	u8	__unused[5];
+} __attribute__((packed));
 
 struct st_block_data {		/* A task blocks. */
 	u64	when;
 	u64	__unused;
-};
+} __attribute__((packed));
 
 struct st_resume_data {		/* A task resumes. */
 	u64	when;
 	u64	__unused;
-};
+} __attribute__((packed));
 
 struct st_action_data {
 	u64	when;
 	u8	action;
 	u8	__unused[7];
-};
+} __attribute__((packed));
 
 struct st_sys_release_data {
 	u64	when;
 	u64	release;
-};
+} __attribute__((packed));
+
+
+struct st_tasklet_release_data {
+	u64 when;
+	u64 __unused;
+} __attribute__((packed));
+
+struct st_tasklet_begin_data {
+	u64 when;
+	u16 exe_pid;
+	u8  __unused[6];
+} __attribute__((packed));
+
+struct st_tasklet_end_data {
+	u64 when;
+	u16 exe_pid;
+	u8	flushed;
+	u8	__unused[5];
+} __attribute__((packed));
+
+
+struct st_work_release_data {
+	u64 when;
+	u64 __unused;
+} __attribute__((packed));
+
+struct st_work_begin_data {
+	u64 when;
+	u16 exe_pid;
+	u8	__unused[6];
+} __attribute__((packed));
+
+struct st_work_end_data {
+	u64 when;
+	u16 exe_pid;
+	u8	flushed;
+	u8	__unused[5];
+} __attribute__((packed));
+
+struct st_effective_priority_change_data {
+	u64 when;
+	u16 inh_pid;
+	u8	__unused[6];
+} __attribute__((packed));
+
+struct st_nv_interrupt_begin_data {
+	u64 when;
+	u32 device;
+	u32 serialNumber;
+} __attribute__((packed));
+
+struct st_nv_interrupt_end_data {
+	u64 when;
+	u32 device;
+	u32 serialNumber;
+} __attribute__((packed));
+
+struct st_prediction_err_data {
+	u64 distance;
+	u64 rel_err;
+} __attribute__((packed));
+
+struct st_migration_data {
+	u64 observed;
+	u64 estimated;
+} __attribute__((packed));
+
+struct migration_info {
+	u64 observed;
+	u64 estimated;
+	u8 distance;
+} __attribute__((packed));
 
 #define DATA(x) struct st_ ## x ## _data x;
 
 typedef enum {
-        ST_NAME = 1,		/* Start at one, so that we can spot
-				 * uninitialized records. */
+    ST_NAME = 1, /* Start at one, so that we can spot
+				  * uninitialized records. */
 	ST_PARAM,
 	ST_RELEASE,
 	ST_ASSIGNED,
@@ -94,7 +167,19 @@ typedef enum {
 	ST_BLOCK,
 	ST_RESUME,
 	ST_ACTION,
-	ST_SYS_RELEASE
+	ST_SYS_RELEASE,
+	ST_TASKLET_RELEASE,
+	ST_TASKLET_BEGIN,
+	ST_TASKLET_END,
+	ST_WORK_RELEASE,
+	ST_WORK_BEGIN,
+	ST_WORK_END,
+	ST_EFF_PRIO_CHANGE,
+	ST_NV_INTERRUPT_BEGIN,
+	ST_NV_INTERRUPT_END,
+
+	ST_PREDICTION_ERR,
+	ST_MIGRATION,
 } st_event_record_type_t;
 
 struct st_event_record {
@@ -113,8 +198,20 @@ struct st_event_record {
 		DATA(resume);
 		DATA(action);
 		DATA(sys_release);
+		DATA(tasklet_release);
+		DATA(tasklet_begin);
+		DATA(tasklet_end);
+		DATA(work_release);
+		DATA(work_begin);
+		DATA(work_end);
+		DATA(effective_priority_change);
+		DATA(nv_interrupt_begin);
+		DATA(nv_interrupt_end);
+
+		DATA(prediction_err);
+		DATA(migration);
 	} data;
-};
+} __attribute__((packed));
 
 #undef DATA
 
@@ -129,6 +226,8 @@ struct st_event_record {
 	ft_event1(id, callback, task)
 #define SCHED_TRACE2(id, callback, task, xtra) \
 	ft_event2(id, callback, task, xtra)
+#define SCHED_TRACE3(id, callback, task, xtra1, xtra2) \
+	ft_event3(id, callback, task, xtra1, xtra2)
 
 /* provide prototypes; needed on sparc64 */
 #ifndef NO_TASK_TRACE_DECLS
@@ -155,12 +254,58 @@ feather_callback void do_sched_trace_action(unsigned long id,
 feather_callback void do_sched_trace_sys_release(unsigned long id,
 						 lt_t* start);
 
+
+feather_callback void do_sched_trace_tasklet_release(unsigned long id,
+												   struct task_struct* owner);
+feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
+												  struct task_struct* owner);
+feather_callback void do_sched_trace_tasklet_end(unsigned long id,
+												 struct task_struct* owner,
+												 unsigned long flushed);
+
+feather_callback void do_sched_trace_work_release(unsigned long id,
+													 struct task_struct* owner);
+feather_callback void do_sched_trace_work_begin(unsigned long id,
+												struct task_struct* owner,
+												struct task_struct* exe);
+feather_callback void do_sched_trace_work_end(unsigned long id,
+											  struct task_struct* owner,
+											  struct task_struct* exe,
+											  unsigned long flushed);
+
+feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
+											  struct task_struct* task,
+											  struct task_struct* inh);
+
+feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
+												u32 device);
+feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
+												unsigned long unused);
+
+feather_callback void do_sched_trace_prediction_err(unsigned long id,
+													  struct task_struct* task,
+													  gpu_migration_dist_t* distance,
+													  fp_t* rel_err);
+
+
+
+
+
+feather_callback void do_sched_trace_migration(unsigned long id,
+											  struct task_struct* task,
+											  struct migration_info* mig_info);
+
+
+/* returns true if we're tracing an interrupt on current CPU */
+/* int is_interrupt_tracing_active(void); */
+
 #endif
 
 #else
 
 #define SCHED_TRACE(id, callback, task)        /* no tracing */
 #define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
+#define SCHED_TRACE3(id, callback, task, xtra1, xtra2)
 
 #endif
 
@@ -193,6 +338,41 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
 	SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when)
 
 
+#define sched_trace_tasklet_release(t) \
+	SCHED_TRACE(SCHED_TRACE_BASE_ID + 11, do_sched_trace_tasklet_release, t)
+
+#define sched_trace_tasklet_begin(t) \
+	SCHED_TRACE(SCHED_TRACE_BASE_ID + 12, do_sched_trace_tasklet_begin, t)
+
+#define sched_trace_tasklet_end(t, flushed) \
+	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 13, do_sched_trace_tasklet_end, t, flushed)
+
+
+#define sched_trace_work_release(t) \
+	SCHED_TRACE(SCHED_TRACE_BASE_ID + 14, do_sched_trace_work_release, t)
+
+#define sched_trace_work_begin(t, e) \
+	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 15, do_sched_trace_work_begin, t, e)
+
+#define sched_trace_work_end(t, e, flushed) \
+	SCHED_TRACE3(SCHED_TRACE_BASE_ID + 16, do_sched_trace_work_end, t, e, flushed)
+
+
+#define sched_trace_eff_prio_change(t, inh) \
+	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 17, do_sched_trace_eff_prio_change, t, inh)
+
+
+#define sched_trace_nv_interrupt_begin(d) \
+	SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d)
+#define sched_trace_nv_interrupt_end(d) \
+	SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d)
+
+#define sched_trace_prediction_err(t, dist, rel_err) \
+	SCHED_TRACE3(SCHED_TRACE_BASE_ID + 20, do_sched_trace_prediction_err, t, dist, rel_err)
+
+#define sched_trace_migration(t, mig_info) \
+	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 21, do_sched_trace_migration, t, mig_info)
+
 #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
 
 #endif /* __KERNEL__ */
diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h
new file mode 100644
index 000000000000..e70e45e4cf51
--- /dev/null
+++ b/include/litmus/sched_trace_external.h
@@ -0,0 +1,78 @@
+/*
+ * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
+ */
+#ifndef _LINUX_SCHED_TRACE_EXTERNAL_H_
+#define _LINUX_SCHED_TRACE_EXTERNAL_H_
+
+
+#ifdef CONFIG_SCHED_TASK_TRACE
+extern void __sched_trace_tasklet_begin_external(struct task_struct* t);
+static inline void sched_trace_tasklet_begin_external(struct task_struct* t)
+{
+	__sched_trace_tasklet_begin_external(t);
+}
+
+extern void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed);
+static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
+{
+	__sched_trace_tasklet_end_external(t, flushed);
+}
+
+extern void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e);
+static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
+{
+	__sched_trace_work_begin_external(t, e);
+}
+
+extern void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f);
+static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
+{
+	__sched_trace_work_end_external(t, e, f);
+}
+
+#ifdef CONFIG_LITMUS_NVIDIA
+extern void __sched_trace_nv_interrupt_begin_external(u32 device);
+static inline void sched_trace_nv_interrupt_begin_external(u32 device)
+{
+	__sched_trace_nv_interrupt_begin_external(device);
+}
+
+extern void __sched_trace_nv_interrupt_end_external(u32 device);
+static inline void sched_trace_nv_interrupt_end_external(u32 device)
+{
+	__sched_trace_nv_interrupt_end_external(device);
+}
+#endif
+
+#else
+
+// no tracing.
+static inline void sched_trace_tasklet_begin_external(struct task_struct* t){}
+static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed){}
+static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e){}
+static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f){}
+
+#ifdef CONFIG_LITMUS_NVIDIA
+static inline void sched_trace_nv_interrupt_begin_external(u32 device){}
+static inline void sched_trace_nv_interrupt_end_external(u32 device){}
+#endif
+
+#endif
+
+
+#ifdef CONFIG_LITMUS_NVIDIA
+
+#define EX_TS(evt) \
+extern void __##evt(void); \
+static inline void EX_##evt(void) { __##evt(); }
+
+EX_TS(TS_NV_TOPISR_START)
+EX_TS(TS_NV_TOPISR_END)
+EX_TS(TS_NV_BOTISR_START)
+EX_TS(TS_NV_BOTISR_END)
+EX_TS(TS_NV_RELEASE_BOTISR_START)
+EX_TS(TS_NV_RELEASE_BOTISR_END)
+
+#endif
+
+#endif
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
index e809376d6487..e078aee4234d 100644
--- a/include/litmus/trace.h
+++ b/include/litmus/trace.h
@@ -103,14 +103,46 @@ feather_callback void save_task_latency(unsigned long event, unsigned long when_
 #define TS_LOCK_START			TIMESTAMP(170)
 #define TS_LOCK_SUSPEND			TIMESTAMP(171)
 #define TS_LOCK_RESUME			TIMESTAMP(172)
-#define TS_LOCK_END			TIMESTAMP(173)
+#define TS_LOCK_END				TIMESTAMP(173)
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+#define TS_DGL_LOCK_START			TIMESTAMP(175)
+#define TS_DGL_LOCK_SUSPEND			TIMESTAMP(176)
+#define TS_DGL_LOCK_RESUME			TIMESTAMP(177)
+#define TS_DGL_LOCK_END				TIMESTAMP(178)
+#endif
 
 #define TS_UNLOCK_START			TIMESTAMP(180)
 #define TS_UNLOCK_END			TIMESTAMP(181)
 
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+#define TS_DGL_UNLOCK_START			TIMESTAMP(185)
+#define TS_DGL_UNLOCK_END			TIMESTAMP(186)
+#endif
+
 #define TS_SEND_RESCHED_START(c)	CTIMESTAMP(190, c)
 #define TS_SEND_RESCHED_END		DTIMESTAMP(191, TSK_UNKNOWN)
 
 #define TS_RELEASE_LATENCY(when)	LTIMESTAMP(208, &(when))
 
+
+#ifdef CONFIG_LITMUS_NVIDIA
+
+#define TS_NV_TOPISR_START		TIMESTAMP(200)
+#define TS_NV_TOPISR_END		TIMESTAMP(201)
+
+#define TS_NV_BOTISR_START		TIMESTAMP(202)
+#define TS_NV_BOTISR_END		TIMESTAMP(203)
+
+#define TS_NV_RELEASE_BOTISR_START	TIMESTAMP(204)
+#define TS_NV_RELEASE_BOTISR_END	TIMESTAMP(205)
+
+#endif
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+#define TS_NV_SCHED_BOTISR_START	TIMESTAMP(206)
+#define TS_NV_SCHED_BOTISR_END		TIMESTAMP(207)
+#endif
+
+
 #endif /* !_SYS_TRACE_H_ */
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
index 94264c27d9ac..4fa514c89605 100644
--- a/include/litmus/unistd_32.h
+++ b/include/litmus/unistd_32.h
@@ -17,5 +17,8 @@
 #define __NR_wait_for_ts_release __LSC(9)
 #define __NR_release_ts		__LSC(10)
 #define __NR_null_call		__LSC(11)
+#define __NR_litmus_dgl_lock	__LSC(12)
+#define __NR_litmus_dgl_unlock	__LSC(13)
+#define __NR_register_nv_device			__LSC(14)
 
-#define NR_litmus_syscalls 12
+#define NR_litmus_syscalls 15
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index d5ced0d2642c..f80dc45dc185 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -29,5 +29,12 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
 __SYSCALL(__NR_release_ts, sys_release_ts)
 #define __NR_null_call				__LSC(11)
 __SYSCALL(__NR_null_call, sys_null_call)
+#define __NR_litmus_dgl_lock		__LSC(12)
+__SYSCALL(__NR_litmus_dgl_lock, sys_litmus_dgl_lock)
+#define __NR_litmus_dgl_unlock		__LSC(13)
+__SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock)
+#define __NR_register_nv_device			__LSC(14)
+__SYSCALL(__NR_register_nv_device, sys_register_nv_device)
 
-#define NR_litmus_syscalls 12
+
+#define NR_litmus_syscalls 15
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 298c9276dfdb..2bdcdc3691e5 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -542,7 +542,7 @@ static void print_lock(struct held_lock *hlock)
 	print_ip_sym(hlock->acquire_ip);
 }
 
-static void lockdep_print_held_locks(struct task_struct *curr)
+void lockdep_print_held_locks(struct task_struct *curr)
 {
 	int i, depth = curr->lockdep_depth;
 
@@ -558,6 +558,7 @@ static void lockdep_print_held_locks(struct task_struct *curr)
 		print_lock(curr->held_locks + i);
 	}
 }
+EXPORT_SYMBOL(lockdep_print_held_locks);
 
 static void print_kernel_version(void)
 {
@@ -583,6 +584,10 @@ static int static_obj(void *obj)
 		      end   = (unsigned long) &_end,
 		      addr  = (unsigned long) obj;
 
+	// GLENN
+	return 1;
+
+
 	/*
 	 * static variable?
 	 */
diff --git a/kernel/mutex.c b/kernel/mutex.c
index d607ed5dd441..2f363b9bfc1f 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -498,3 +498,128 @@ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
 	return 1;
 }
 EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
+
+
+
+
+void mutex_lock_sfx(struct mutex *lock,
+				   side_effect_t pre, unsigned long pre_arg,
+				   side_effect_t post, unsigned long post_arg)
+{
+	long state = TASK_UNINTERRUPTIBLE;
+
+	struct task_struct *task = current;
+	struct mutex_waiter waiter;
+	unsigned long flags;
+	
+	preempt_disable();
+	mutex_acquire(&lock->dep_map, subclass, 0, ip);
+
+	spin_lock_mutex(&lock->wait_lock, flags);
+	
+	if(pre)
+	{
+		if(unlikely(pre(pre_arg)))
+		{
+			// this will fuck with lockdep's CONFIG_PROVE_LOCKING...
+			spin_unlock_mutex(&lock->wait_lock, flags);
+			preempt_enable();
+			return;
+		}
+	}
+
+	debug_mutex_lock_common(lock, &waiter);
+	debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
+	
+	/* add waiting tasks to the end of the waitqueue (FIFO): */
+	list_add_tail(&waiter.list, &lock->wait_list);
+	waiter.task = task;
+	
+	if (atomic_xchg(&lock->count, -1) == 1)
+		goto done;
+	
+	lock_contended(&lock->dep_map, ip);
+	
+	for (;;) {
+		/*
+		 * Lets try to take the lock again - this is needed even if
+		 * we get here for the first time (shortly after failing to
+		 * acquire the lock), to make sure that we get a wakeup once
+		 * it's unlocked. Later on, if we sleep, this is the
+		 * operation that gives us the lock. We xchg it to -1, so
+		 * that when we release the lock, we properly wake up the
+		 * other waiters:
+		 */
+		if (atomic_xchg(&lock->count, -1) == 1)
+			break;
+		
+		__set_task_state(task, state);
+		
+		/* didnt get the lock, go to sleep: */
+		spin_unlock_mutex(&lock->wait_lock, flags);
+		preempt_enable_no_resched();
+		schedule();
+		preempt_disable();
+		spin_lock_mutex(&lock->wait_lock, flags);
+	}
+	
+done:
+	lock_acquired(&lock->dep_map, ip);
+	/* got the lock - rejoice! */
+	mutex_remove_waiter(lock, &waiter, current_thread_info());
+	mutex_set_owner(lock);
+	
+	/* set it to 0 if there are no waiters left: */
+	if (likely(list_empty(&lock->wait_list)))
+		atomic_set(&lock->count, 0);
+	
+	if(post)
+		post(post_arg);	
+	
+	spin_unlock_mutex(&lock->wait_lock, flags);
+	
+	debug_mutex_free_waiter(&waiter);
+	preempt_enable();
+}
+EXPORT_SYMBOL(mutex_lock_sfx);
+
+void mutex_unlock_sfx(struct mutex *lock,
+					side_effect_t pre, unsigned long pre_arg,
+					side_effect_t post, unsigned long post_arg)
+{
+	unsigned long flags;
+	
+	spin_lock_mutex(&lock->wait_lock, flags);
+	
+	if(pre)
+		pre(pre_arg);
+	
+	//mutex_release(&lock->dep_map, nested, _RET_IP_);
+	mutex_release(&lock->dep_map, 1, _RET_IP_);
+	debug_mutex_unlock(lock);
+	
+	/*
+	 * some architectures leave the lock unlocked in the fastpath failure
+	 * case, others need to leave it locked. In the later case we have to
+	 * unlock it here
+	 */
+	if (__mutex_slowpath_needs_to_unlock())
+		atomic_set(&lock->count, 1);
+	
+	if (!list_empty(&lock->wait_list)) {
+		/* get the first entry from the wait-list: */
+		struct mutex_waiter *waiter =
+		list_entry(lock->wait_list.next,
+				   struct mutex_waiter, list);
+		
+		debug_mutex_wake_waiter(lock, waiter);
+		
+		wake_up_process(waiter->task);
+	}
+	
+	if(post)
+		post(post_arg);
+	
+	spin_unlock_mutex(&lock->wait_lock, flags);	
+}
+EXPORT_SYMBOL(mutex_unlock_sfx);
diff --git a/kernel/sched.c b/kernel/sched.c
index baaca61bc3a3..f3d9a69a3777 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -83,6 +83,10 @@
 #include <litmus/sched_trace.h>
 #include <litmus/trace.h>
 
+#ifdef CONFIG_LITMUS_SOFTIRQD
+#include <litmus/litmus_softirq.h>
+#endif
+
 static void litmus_tick(struct rq*, struct task_struct*);
 
 #define CREATE_TRACE_POINTS
@@ -4305,6 +4309,7 @@ pick_next_task(struct rq *rq)
 	BUG(); /* the idle class will always have a runnable task */
 }
 
+
 /*
  * schedule() is the main scheduler function.
  */
@@ -4323,6 +4328,10 @@ need_resched:
 	rcu_note_context_switch(cpu);
 	prev = rq->curr;
 
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	release_klitirqd_lock(prev);
+#endif
+
 	/* LITMUS^RT: quickly re-evaluate the scheduling decision
 	 * if the previous one is no longer valid after CTX.
 	 */
@@ -4411,13 +4420,24 @@ litmus_need_resched_nonpreemptible:
 		goto litmus_need_resched_nonpreemptible;
 
 	preempt_enable_no_resched();
+
 	if (need_resched())
 		goto need_resched;
 
+#ifdef LITMUS_SOFTIRQD
+	reacquire_klitirqd_lock(prev);
+#endif
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+	litmus->run_tasklets(prev);
+#endif	
+	
 	srp_ceiling_block();
 }
 EXPORT_SYMBOL(schedule);
 
+
+
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 
 static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
@@ -4561,6 +4581,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 	}
 }
 
+
 /**
  * __wake_up - wake up threads blocked on a waitqueue.
  * @q: the waitqueue
@@ -4747,6 +4768,12 @@ void __sched wait_for_completion(struct completion *x)
 }
 EXPORT_SYMBOL(wait_for_completion);
 
+void __sched __wait_for_completion_locked(struct completion *x)
+{
+	do_wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(__wait_for_completion_locked);
+
 /**
  * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
  * @x:  holds the state of this particular completion
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 94a62c0d4ade..c947a046a6d7 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -33,11 +33,11 @@
 #include <linux/spinlock.h>
 #include <linux/ftrace.h>
 
-static noinline void __down(struct semaphore *sem);
+noinline void __down(struct semaphore *sem);
 static noinline int __down_interruptible(struct semaphore *sem);
 static noinline int __down_killable(struct semaphore *sem);
 static noinline int __down_timeout(struct semaphore *sem, long jiffies);
-static noinline void __up(struct semaphore *sem);
+noinline void __up(struct semaphore *sem);
 
 /**
  * down - acquire the semaphore
@@ -190,11 +190,13 @@ EXPORT_SYMBOL(up);
 
 /* Functions for the contended case */
 
+/*
 struct semaphore_waiter {
 	struct list_head list;
 	struct task_struct *task;
 	int up;
 };
+ */
 
 /*
  * Because this function is inlined, the 'state' parameter will be
@@ -233,10 +235,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
 	return -EINTR;
 }
 
-static noinline void __sched __down(struct semaphore *sem)
+noinline void __sched __down(struct semaphore *sem)
 {
 	__down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
 }
+EXPORT_SYMBOL(__down);
+
 
 static noinline int __sched __down_interruptible(struct semaphore *sem)
 {
@@ -253,7 +257,7 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
 	return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies);
 }
 
-static noinline void __sched __up(struct semaphore *sem)
+noinline void __sched __up(struct semaphore *sem)
 {
 	struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
 						struct semaphore_waiter, list);
@@ -261,3 +265,4 @@ static noinline void __sched __up(struct semaphore *sem)
 	waiter->up = 1;
 	wake_up_process(waiter->task);
 }
+EXPORT_SYMBOL(__up);
\ No newline at end of file
diff --git a/kernel/softirq.c b/kernel/softirq.c
index fca82c32042b..5ce271675662 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -29,6 +29,15 @@
 #include <trace/events/irq.h>
 
 #include <asm/irq.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_trace.h>
+
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/nvidia_info.h>
+#include <litmus/trace.h>
+#endif
+
 /*
    - No shared variables, all the data are CPU local.
    - If a softirq needs serialization, let it serialize itself
@@ -67,7 +76,7 @@ char *softirq_to_name[NR_SOFTIRQS] = {
  * to the pending events, so lets the scheduler to balance
  * the softirq load for us.
  */
-static void wakeup_softirqd(void)
+void wakeup_softirqd(void)
 {
 	/* Interrupts are disabled: no need to stop preemption */
 	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
@@ -193,6 +202,7 @@ void local_bh_enable_ip(unsigned long ip)
 }
 EXPORT_SYMBOL(local_bh_enable_ip);
 
+
 /*
  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
  * and we fall back to softirqd after that.
@@ -206,65 +216,65 @@ EXPORT_SYMBOL(local_bh_enable_ip);
 
 asmlinkage void __do_softirq(void)
 {
-	struct softirq_action *h;
-	__u32 pending;
-	int max_restart = MAX_SOFTIRQ_RESTART;
-	int cpu;
+    struct softirq_action *h; 
+    __u32 pending;
+    int max_restart = MAX_SOFTIRQ_RESTART;
+    int cpu;
 
-	pending = local_softirq_pending();
-	account_system_vtime(current);
+    pending = local_softirq_pending();
+    account_system_vtime(current);
 
-	__local_bh_disable((unsigned long)__builtin_return_address(0),
-				SOFTIRQ_OFFSET);
-	lockdep_softirq_enter();
+    __local_bh_disable((unsigned long)__builtin_return_address(0),
+                SOFTIRQ_OFFSET);
+    lockdep_softirq_enter();
 
-	cpu = smp_processor_id();
+    cpu = smp_processor_id();
 restart:
-	/* Reset the pending bitmask before enabling irqs */
-	set_softirq_pending(0);
+    /* Reset the pending bitmask before enabling irqs */
+    set_softirq_pending(0);
 
-	local_irq_enable();
+    local_irq_enable();
 
-	h = softirq_vec;
-
-	do {
-		if (pending & 1) {
-			unsigned int vec_nr = h - softirq_vec;
-			int prev_count = preempt_count();
-
-			kstat_incr_softirqs_this_cpu(vec_nr);
-
-			trace_softirq_entry(vec_nr);
-			h->action(h);
-			trace_softirq_exit(vec_nr);
-			if (unlikely(prev_count != preempt_count())) {
-				printk(KERN_ERR "huh, entered softirq %u %s %p"
-				       "with preempt_count %08x,"
-				       " exited with %08x?\n", vec_nr,
-				       softirq_to_name[vec_nr], h->action,
-				       prev_count, preempt_count());
-				preempt_count() = prev_count;
-			}
+    h = softirq_vec;
 
-			rcu_bh_qs(cpu);
-		}
-		h++;
-		pending >>= 1;
-	} while (pending);
+    do {
+        if (pending & 1) {
+            unsigned int vec_nr = h - softirq_vec;
+            int prev_count = preempt_count();
 
-	local_irq_disable();
+            kstat_incr_softirqs_this_cpu(vec_nr);
 
-	pending = local_softirq_pending();
-	if (pending && --max_restart)
-		goto restart;
+            trace_softirq_entry(vec_nr);
+            h->action(h);
+            trace_softirq_exit(vec_nr);
+            if (unlikely(prev_count != preempt_count())) {
+                printk(KERN_ERR "huh, entered softirq %u %s %p"
+                       "with preempt_count %08x,"
+                       " exited with %08x?\n", vec_nr,
+                       softirq_to_name[vec_nr], h->action,
+                       prev_count, preempt_count());
+                preempt_count() = prev_count;
+            }   
 
-	if (pending)
-		wakeup_softirqd();
+            rcu_bh_qs(cpu);
+        }   
+        h++;
+        pending >>= 1;
+    } while (pending);
 
-	lockdep_softirq_exit();
+    local_irq_disable();
 
-	account_system_vtime(current);
-	__local_bh_enable(SOFTIRQ_OFFSET);
+    pending = local_softirq_pending();
+    if (pending && --max_restart)
+        goto restart;
+
+    if (pending)
+        wakeup_softirqd();
+
+    lockdep_softirq_exit();
+
+    account_system_vtime(current);
+    __local_bh_enable(SOFTIRQ_OFFSET);
 }
 
 #ifndef __ARCH_HAS_DO_SOFTIRQ
@@ -402,7 +412,98 @@ struct tasklet_head
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
 
+#ifdef CONFIG_LITMUS_NVIDIA
+static int __do_nv_now(struct tasklet_struct* tasklet)
+{
+	int success = 1;
+
+	if(tasklet_trylock(tasklet)) {
+		if (!atomic_read(&tasklet->count)) {
+			if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) {    
+				BUG();
+			}
+			tasklet->func(tasklet->data);
+			tasklet_unlock(tasklet);
+		}
+		else {
+			success = 0;
+		}
+
+		tasklet_unlock(tasklet);
+	}
+	else {
+		success = 0;
+	}
+
+	return success;
+}
+#endif
+
+
 void __tasklet_schedule(struct tasklet_struct *t)
+{
+#ifdef CONFIG_LITMUS_NVIDIA
+	if(is_nvidia_func(t->func))
+	{
+#if 0
+		// do nvidia tasklets right away and return
+		if(__do_nv_now(t))
+			return;
+#else
+		u32 nvidia_device = get_tasklet_nv_device_num(t);	
+		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
+		//			  __FUNCTION__, nvidia_device,litmus_clock());
+
+		unsigned long flags;
+		struct task_struct* device_owner;
+
+		lock_nv_registry(nvidia_device, &flags);
+
+		device_owner = get_nv_max_device_owner(nvidia_device);
+
+		if(device_owner==NULL)
+		{
+			t->owner = NULL;
+		}
+		else
+		{
+			if(is_realtime(device_owner))
+			{
+				TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
+					  __FUNCTION__, nvidia_device,litmus_clock());				
+				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
+					  __FUNCTION__,device_owner->pid,nvidia_device);
+
+				t->owner = device_owner;
+				sched_trace_tasklet_release(t->owner);
+
+				if(likely(_litmus_tasklet_schedule(t,nvidia_device)))
+				{
+					unlock_nv_registry(nvidia_device, &flags);
+					return;
+				}
+				else
+				{
+					t->owner = NULL; /* fall through to normal scheduling */
+				}
+			}
+			else
+			{
+				t->owner = NULL;
+			}
+		}
+		unlock_nv_registry(nvidia_device, &flags);
+#endif
+	}
+
+#endif
+
+	___tasklet_schedule(t);
+}
+EXPORT_SYMBOL(__tasklet_schedule);
+
+
+void ___tasklet_schedule(struct tasklet_struct *t)
 {
 	unsigned long flags;
 
@@ -413,10 +514,64 @@ void __tasklet_schedule(struct tasklet_struct *t)
 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
 	local_irq_restore(flags);
 }
+EXPORT_SYMBOL(___tasklet_schedule);
 
-EXPORT_SYMBOL(__tasklet_schedule);
 
 void __tasklet_hi_schedule(struct tasklet_struct *t)
+{
+#ifdef CONFIG_LITMUS_NVIDIA
+	if(is_nvidia_func(t->func))
+	{	
+		u32 nvidia_device = get_tasklet_nv_device_num(t);
+		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
+		//			  __FUNCTION__, nvidia_device,litmus_clock());
+
+		unsigned long flags;
+		struct task_struct* device_owner;
+		
+		lock_nv_registry(nvidia_device, &flags);
+		
+		device_owner = get_nv_max_device_owner(nvidia_device);
+
+		if(device_owner==NULL) 
+		{
+			t->owner = NULL;
+		}
+		else
+		{
+			if( is_realtime(device_owner))
+			{
+				TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n",
+					  __FUNCTION__, nvidia_device,litmus_clock());				
+				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
+					  __FUNCTION__,device_owner->pid,nvidia_device);
+				
+				t->owner = device_owner;
+				sched_trace_tasklet_release(t->owner);
+				if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device)))
+				{
+					unlock_nv_registry(nvidia_device, &flags);
+					return;
+				}
+				else
+				{
+					t->owner = NULL; /* fall through to normal scheduling */
+				}
+			}
+			else
+			{
+				t->owner = NULL;
+			}
+		}
+		unlock_nv_registry(nvidia_device, &flags);
+	}
+#endif
+
+	___tasklet_hi_schedule(t);
+}
+EXPORT_SYMBOL(__tasklet_hi_schedule);
+
+void ___tasklet_hi_schedule(struct tasklet_struct* t)
 {
 	unsigned long flags;
 
@@ -427,10 +582,64 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
 	raise_softirq_irqoff(HI_SOFTIRQ);
 	local_irq_restore(flags);
 }
-
-EXPORT_SYMBOL(__tasklet_hi_schedule);
+EXPORT_SYMBOL(___tasklet_hi_schedule);
 
 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
+{
+	BUG_ON(!irqs_disabled());
+#ifdef CONFIG_LITMUS_NVIDIA	
+	if(is_nvidia_func(t->func))
+	{	
+		u32 nvidia_device = get_tasklet_nv_device_num(t);
+		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
+		//			  __FUNCTION__, nvidia_device,litmus_clock());
+		unsigned long flags;
+		struct task_struct* device_owner;
+		
+		lock_nv_registry(nvidia_device, &flags);
+
+		device_owner = get_nv_max_device_owner(nvidia_device);
+
+		if(device_owner==NULL)
+		{
+			t->owner = NULL;
+		}
+		else
+		{
+			if(is_realtime(device_owner))
+			{
+				TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
+					  __FUNCTION__, nvidia_device,litmus_clock());
+				
+				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
+					  __FUNCTION__,device_owner->pid,nvidia_device);
+				
+				t->owner = device_owner;
+				sched_trace_tasklet_release(t->owner);
+				if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device)))
+				{
+					unlock_nv_registry(nvidia_device, &flags);
+					return;
+				}
+				else
+				{
+					t->owner = NULL; /* fall through to normal scheduling */
+				}
+			}
+			else
+			{
+				t->owner = NULL;
+			}
+		}
+		unlock_nv_registry(nvidia_device, &flags);
+	}
+#endif
+
+	___tasklet_hi_schedule_first(t);
+}
+EXPORT_SYMBOL(__tasklet_hi_schedule_first);
+
+void ___tasklet_hi_schedule_first(struct tasklet_struct* t)
 {
 	BUG_ON(!irqs_disabled());
 
@@ -438,8 +647,7 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
 	__this_cpu_write(tasklet_hi_vec.head, t);
 	__raise_softirq_irqoff(HI_SOFTIRQ);
 }
-
-EXPORT_SYMBOL(__tasklet_hi_schedule_first);
+EXPORT_SYMBOL(___tasklet_hi_schedule_first);
 
 static void tasklet_action(struct softirq_action *a)
 {
@@ -495,6 +703,7 @@ static void tasklet_hi_action(struct softirq_action *a)
 			if (!atomic_read(&t->count)) {
 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
 					BUG();
+
 				t->func(t->data);
 				tasklet_unlock(t);
 				continue;
@@ -518,8 +727,13 @@ void tasklet_init(struct tasklet_struct *t,
 	t->next = NULL;
 	t->state = 0;
 	atomic_set(&t->count, 0);
+
 	t->func = func;
 	t->data = data;
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	t->owner = NULL;
+#endif
 }
 
 EXPORT_SYMBOL(tasklet_init);
@@ -534,6 +748,7 @@ void tasklet_kill(struct tasklet_struct *t)
 			yield();
 		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
 	}
+
 	tasklet_unlock_wait(t);
 	clear_bit(TASKLET_STATE_SCHED, &t->state);
 }
@@ -808,6 +1023,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
 	for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
 		if (*i == t) {
 			*i = t->next;
+
 			/* If this was the tail element, move the tail ptr */
 			if (*i == NULL)
 				per_cpu(tasklet_vec, cpu).tail = i;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0400553f0d04..6b59d59ce3cf 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -44,6 +44,13 @@
 
 #include "workqueue_sched.h"
 
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/litmus.h>
+#include <litmus/sched_trace.h>
+#include <litmus/nvidia_info.h>
+#endif
+
+
 enum {
 	/* global_cwq flags */
 	GCWQ_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
@@ -1047,9 +1054,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		work_flags |= WORK_STRUCT_DELAYED;
 		worklist = &cwq->delayed_works;
 	}
-
 	insert_work(cwq, work, worklist, work_flags);
-
 	spin_unlock_irqrestore(&gcwq->lock, flags);
 }
 
@@ -2687,10 +2692,70 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
  */
 int schedule_work(struct work_struct *work)
 {
-	return queue_work(system_wq, work);
+#if 0
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
+	if(is_nvidia_func(work->func))
+	{
+		u32 nvidiaDevice = get_work_nv_device_num(work);
+		
+		//1) Ask Litmus which task owns GPU <nvidiaDevice>. (API to be defined.)
+		unsigned long flags;
+		struct task_struct* device_owner;
+		
+		lock_nv_registry(nvidiaDevice, &flags);
+		
+		device_owner = get_nv_max_device_owner(nvidiaDevice);
+		
+		//2) If there is an owner, set work->owner to the owner's task struct.
+		if(device_owner==NULL) 
+		{
+			work->owner = NULL;
+			//TRACE("%s: the owner task of NVIDIA Device %u is NULL\n",__FUNCTION__,nvidiaDevice);
+		}
+		else
+		{
+			if( is_realtime(device_owner))
+			{
+				TRACE("%s: Handling NVIDIA work for device\t%u\tat\t%llu\n",
+					  __FUNCTION__, nvidiaDevice,litmus_clock());
+				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
+					  __FUNCTION__,
+					  device_owner->pid,
+					  nvidiaDevice);
+				
+				//3) Call litmus_schedule_work() and return (don't execute the rest
+				//	of schedule_schedule()).
+				work->owner = device_owner;
+				sched_trace_work_release(work->owner);
+				if(likely(litmus_schedule_work(work, nvidiaDevice)))
+				{
+					unlock_nv_registry(nvidiaDevice, &flags);
+					return 1;
+				}
+				else
+				{
+					work->owner = NULL; /* fall through to normal work scheduling */
+				}
+			}
+			else
+			{
+				work->owner = NULL;
+			}
+		}
+		unlock_nv_registry(nvidiaDevice, &flags);
+	}
+#endif
+#endif
+	return(__schedule_work(work));
 }
 EXPORT_SYMBOL(schedule_work);
 
+int __schedule_work(struct work_struct* work)
+{
+	return queue_work(system_wq, work);
+}
+EXPORT_SYMBOL(__schedule_work);
+
 /*
  * schedule_work_on - put work task on a specific cpu
  * @cpu: cpu to put the work task on
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 94b48e199577..8c156e4da528 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -60,6 +60,42 @@ config LITMUS_LOCKING
 	  Say Yes if you want to include locking protocols such as the FMLP and
 	  Baker's SRP.
 
+config LITMUS_AFFINITY_LOCKING
+	bool "Enable affinity infrastructure in k-exclusion locking protocols."
+	depends on LITMUS_LOCKING
+	default n
+	help
+	  Enable affinity tracking infrastructure in k-exclusion locking protocols.
+	  This only enabled the *infrastructure* not actual affinity algorithms.
+
+	  If unsure, say No.
+
+config LITMUS_NESTED_LOCKING
+		bool "Support for nested inheritance in locking protocols"
+	depends on LITMUS_LOCKING
+	default n
+	help
+	  Enable nested priority inheritance.
+
+config LITMUS_DGL_SUPPORT
+	bool "Support for dynamic group locks"
+	depends on LITMUS_NESTED_LOCKING
+	default n
+	help
+	  Enable dynamic group lock support.
+
+config LITMUS_MAX_DGL_SIZE
+	int "Maximum size of a dynamic group lock."
+	depends on LITMUS_DGL_SUPPORT
+	range 1 128
+	default "10"
+	help
+		Dynamic group lock data structures are allocated on the process
+		stack when a group is requested. We set a maximum size of
+		locks in a dynamic group lock to avoid dynamic allocation.
+
+		TODO: Batch DGL requests exceeding LITMUS_MAX_DGL_SIZE.
+
 endmenu
 
 menu "Performance Enhancements"
@@ -121,7 +157,7 @@ config SCHED_TASK_TRACE
 config SCHED_TASK_TRACE_SHIFT
        int "Buffer size for sched_trace_xxx() events"
        depends on SCHED_TASK_TRACE
-       range 8 13
+       range 8 15
        default 9
        help
 
@@ -215,4 +251,114 @@ config PREEMPT_STATE_TRACE
 
 endmenu
 
+menu "Interrupt Handling"
+
+choice 
+	prompt "Scheduling of interrupt bottom-halves in Litmus."
+	default LITMUS_SOFTIRQD_NONE
+	depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ
+	help
+		Schedule tasklets with known priorities in Litmus.
+
+config LITMUS_SOFTIRQD_NONE
+	bool "No tasklet scheduling in Litmus."
+	help
+	  Don't schedule tasklets in Litmus.  Default.
+
+config LITMUS_SOFTIRQD
+	bool "Spawn klitirqd interrupt handling threads."
+	help
+	  Create klitirqd interrupt handling threads.  Work must be
+	  specifically dispatched to these workers.  (Softirqs for
+	  Litmus tasks are not magically redirected to klitirqd.)
+
+	  G-EDF/RM, C-EDF/RM ONLY for now!
+
+
+config LITMUS_PAI_SOFTIRQD
+	bool "Defer tasklets to context switch points."
+	help
+	  Only execute scheduled tasklet bottom halves at
+	  scheduling points.  Trades context switch overhead
+	  at the cost of non-preemptive durations of bottom half
+	  processing.
+		 
+	  G-EDF/RM, C-EDF/RM ONLY for now!	 
+		 
+endchoice	   
+	   
+
+config NR_LITMUS_SOFTIRQD
+	   int "Number of klitirqd."
+	   depends on LITMUS_SOFTIRQD
+	   range 1 4096
+	   default "1"
+	   help
+	     Should be <= to the number of CPUs in your system.
+
+config LITMUS_NVIDIA
+	  bool "Litmus handling of NVIDIA interrupts."
+	  default n
+	  help
+	    Direct tasklets from NVIDIA devices to Litmus's klitirqd
+		or PAI interrupt handling routines.
+
+		If unsure, say No.
+
+config LITMUS_AFFINITY_AWARE_GPU_ASSINGMENT
+	  bool "Enable affinity-aware heuristics to improve GPU assignment."
+	  depends on LITMUS_NVIDIA && LITMUS_AFFINITY_LOCKING
+	  default n
+	  help
+	    Enable several heuristics to improve the assignment
+		of GPUs to real-time tasks to reduce the overheads
+		of memory migrations.
+
+		If unsure, say No.
+
+config NV_DEVICE_NUM
+	   int "Number of NVIDIA GPUs."
+	   depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
+	   range 1 4096
+	   default "1"
+	   help
+	     Should be (<= to the number of CPUs) and
+		 (<= to the number of GPUs) in your system.
+
+config NV_MAX_SIMULT_USERS
+	int "Maximum number of threads sharing a GPU simultanously"
+	depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
+	range 1 3
+	default "2"
+	help
+		Should be equal to the #copy_engines + #execution_engines
+		of the GPUs in your system.
+
+		Scientific/Professional GPUs = 3  (ex. M2070, Quadro 6000?)
+		Consumer Fermi/Kepler GPUs   = 2  (GTX-4xx thru -6xx)
+		Older                        = 1  (ex. GTX-2xx)
+
+choice
+	  prompt "CUDA/Driver Version Support"
+	  default CUDA_4_0
+	  depends on LITMUS_NVIDIA
+	  help
+	  	Select the version of CUDA/driver to support.
+	
+config CUDA_4_0
+	  bool "CUDA 4.0"
+	  depends on LITMUS_NVIDIA
+	  help
+	  	Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40)
+
+config CUDA_3_2
+	  bool "CUDA 3.2"
+	  depends on LITMUS_NVIDIA
+	  help
+	  	Support CUDA 3.2 (dev. driver version: x86_64-260.24)
+
+endchoice
+
+endmenu
+
 endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index 7338180f196f..080cbf694a41 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -15,9 +15,11 @@ obj-y     = sched_plugin.o litmus.o \
 	    locking.o \
 	    srp.o \
 	    bheap.o \
+        binheap.o \
 	    ctrldev.o \
 	    sched_gsn_edf.o \
-	    sched_psn_edf.o
+	    sched_psn_edf.o \
+        kfmlp_lock.o
 
 obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
 obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
@@ -27,3 +29,10 @@ obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
 obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
 obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
 obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
+
+obj-$(CONFIG_LITMUS_NESTED_LOCKING) += rsm_lock.o ikglp_lock.o
+obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
+obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
+obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o
+
+obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o gpu_affinity.o
diff --git a/litmus/affinity.c b/litmus/affinity.c
index 3fa6dd789400..cd93249b5506 100644
--- a/litmus/affinity.c
+++ b/litmus/affinity.c
@@ -26,7 +26,7 @@ void init_topology(void) {
 					cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
 			}
 			printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
-							cpu, neigh_info[cpu].size[i], i, 
+							cpu, neigh_info[cpu].size[i], i,
 							*cpumask_bits(neigh_info[cpu].neighbors[i]));
 		}
 
diff --git a/litmus/binheap.c b/litmus/binheap.c
new file mode 100644
index 000000000000..8d42403ad52c
--- /dev/null
+++ b/litmus/binheap.c
@@ -0,0 +1,443 @@
+#include <litmus/binheap.h>
+
+//extern void dump_node_data(struct binheap_node* parent, struct binheap_node* child);
+//extern void dump_node_data2(struct binheap_handle *handle, struct binheap_node* bad_node);
+
+int binheap_is_in_this_heap(struct binheap_node *node,
+	struct binheap_handle* heap)
+{
+	if(!binheap_is_in_heap(node)) {
+		return 0;
+	}
+
+	while(node->parent != NULL) {
+		node = node->parent;
+	}
+
+	return (node == heap->root);
+}
+
+/* Update the node reference pointers.  Same logic as Litmus binomial heap. */
+static void __update_ref(struct binheap_node *parent,
+	struct binheap_node *child)
+{
+	*(parent->ref_ptr) = child;
+	*(child->ref_ptr) = parent;
+
+	swap(parent->ref_ptr, child->ref_ptr);
+}
+
+/* Swaps data between two nodes. */
+static void __binheap_swap(struct binheap_node *parent,
+	struct binheap_node *child)
+{
+//	if(parent == BINHEAP_POISON || child == BINHEAP_POISON) {
+//		dump_node_data(parent, child);
+//		BUG();
+//	}
+
+	swap(parent->data, child->data);
+	__update_ref(parent, child);
+}
+
+
+/* Swaps memory and data between two nodes. Actual nodes swap instead of
+ * just data.  Needed when we delete nodes from the heap.
+ */
+static void __binheap_swap_safe(struct binheap_handle *handle,
+	struct binheap_node *a,
+	struct binheap_node *b)
+{
+	swap(a->data, b->data);
+	__update_ref(a, b);
+
+	if((a->parent != NULL) && (a->parent == b->parent)) {
+		/* special case: shared parent */
+		swap(a->parent->left, a->parent->right);
+	}
+	else {
+		/* Update pointers to swap parents. */
+
+		if(a->parent) {
+			if(a == a->parent->left) {
+				a->parent->left = b;
+			}
+			else {
+				a->parent->right = b;
+			}
+		}
+
+		if(b->parent) {
+			if(b == b->parent->left) {
+				b->parent->left = a;
+			}
+			else {
+				b->parent->right = a;
+			}
+		}
+
+		swap(a->parent, b->parent);
+	}
+
+	/* swap children */
+
+	if(a->left) {
+		a->left->parent = b;
+
+		if(a->right) {
+			a->right->parent = b;
+		}
+	}
+
+	if(b->left) {
+		b->left->parent = a;
+
+		if(b->right) {
+			b->right->parent = a;
+		}
+	}
+
+	swap(a->left, b->left);
+	swap(a->right, b->right);
+
+
+	/* update next/last/root pointers */
+
+	if(a == handle->next) {
+		handle->next = b;
+	}
+	else if(b == handle->next) {
+		handle->next = a;
+	}
+
+	if(a == handle->last) {
+		handle->last = b;
+	}
+	else if(b == handle->last) {
+		handle->last = a;
+	}
+
+	if(a == handle->root) {
+		handle->root = b;
+	}
+	else if(b == handle->root) {
+		handle->root = a;
+	}
+}
+
+
+/**
+ * Update the pointer to the last node in the complete binary tree.
+ * Called internally after the root node has been deleted.
+ */
+static void __binheap_update_last(struct binheap_handle *handle)
+{
+	struct binheap_node *temp = handle->last;
+
+	/* find a "bend" in the tree. */
+	while(temp->parent && (temp == temp->parent->left)) {
+		temp = temp->parent;
+	}
+
+	/* step over to sibling if we're not at root */
+	if(temp->parent != NULL) {
+		temp = temp->parent->left;
+	}
+
+	/* now travel right as far as possible. */
+	while(temp->right != NULL) {
+		temp = temp->right;
+	}
+
+	/* take one step to the left if we're not at the bottom-most level. */
+	if(temp->left != NULL) {
+		temp = temp->left;
+	}
+
+	//BUG_ON(!(temp->left == NULL && temp->right == NULL));
+
+	handle->last = temp;
+}
+
+/**
+ * Update the pointer to the node that will take the next inserted node.
+ * Called internally after a node has been inserted.
+ */
+static void __binheap_update_next(struct binheap_handle *handle)
+{
+	struct binheap_node *temp = handle->next;
+
+	/* find a "bend" in the tree. */
+	while(temp->parent && (temp == temp->parent->right)) {
+		temp = temp->parent;
+	}
+
+	/* step over to sibling if we're not at root */
+	if(temp->parent != NULL) {
+		temp = temp->parent->right;
+	}
+
+	/* now travel left as far as possible. */
+	while(temp->left != NULL) {
+		temp = temp->left;
+	}
+
+	handle->next = temp;
+}
+
+
+
+/* bubble node up towards root */
+static void __binheap_bubble_up(
+	struct binheap_handle *handle,
+	struct binheap_node *node)
+{
+	//BUG_ON(!binheap_is_in_heap(node));
+//	if(!binheap_is_in_heap(node))
+//	{
+//		dump_node_data2(handle, node);
+//		BUG();
+//	}
+
+	while((node->parent != NULL) &&
+		  ((node->data == BINHEAP_POISON) /* let BINHEAP_POISON data bubble to the top */ ||
+		   handle->compare(node, node->parent))) {
+			  __binheap_swap(node->parent, node);
+			  node = node->parent;
+
+//			  if(!binheap_is_in_heap(node))
+//			  {
+//				  dump_node_data2(handle, node);
+//				  BUG();
+//			  }
+	}
+}
+
+
+/* bubble node down, swapping with min-child */
+static void __binheap_bubble_down(struct binheap_handle *handle)
+{
+	struct binheap_node *node = handle->root;
+
+	while(node->left != NULL) {
+		if(node->right && handle->compare(node->right, node->left)) {
+			if(handle->compare(node->right, node)) {
+				__binheap_swap(node, node->right);
+				node = node->right;
+			}
+			else {
+				break;
+			}
+		}
+		else {
+			if(handle->compare(node->left, node)) {
+				__binheap_swap(node, node->left);
+				node = node->left;
+			}
+			else {
+				break;
+			}
+		}
+	}
+}
+
+
+
+void __binheap_add(struct binheap_node *new_node,
+	struct binheap_handle *handle,
+	void *data)
+{
+//	if(binheap_is_in_heap(new_node))
+//	{
+//		dump_node_data2(handle, new_node);
+//		BUG();
+//	}
+
+	new_node->data = data;
+	new_node->ref = new_node;
+	new_node->ref_ptr = &(new_node->ref);
+
+	if(!binheap_empty(handle)) {
+		/* insert left side first */
+		if(handle->next->left == NULL) {
+			handle->next->left = new_node;
+			new_node->parent = handle->next;
+			new_node->left = NULL;
+			new_node->right = NULL;
+
+			handle->last = new_node;
+
+			__binheap_bubble_up(handle, new_node);
+		}
+		else {
+			/* left occupied. insert right. */
+			handle->next->right = new_node;
+			new_node->parent = handle->next;
+			new_node->left = NULL;
+			new_node->right = NULL;
+
+			handle->last = new_node;
+
+			__binheap_update_next(handle);
+			__binheap_bubble_up(handle, new_node);
+		}
+	}
+	else {
+		/* first node in heap */
+
+		new_node->parent = NULL;
+		new_node->left = NULL;
+		new_node->right = NULL;
+
+		handle->root = new_node;
+		handle->next = new_node;
+		handle->last = new_node;
+	}
+}
+
+
+
+/**
+ * Removes the root node from the heap. The node is removed after coalescing
+ * the binheap_node with its original data pointer at the root of the tree.
+ *
+ * The 'last' node in the tree is then swapped up to the root and bubbled
+ * down.
+ */
+void __binheap_delete_root(struct binheap_handle *handle,
+	struct binheap_node *container)
+{
+	struct binheap_node *root = handle->root;
+
+//	if(!binheap_is_in_heap(container))
+//	{
+//		dump_node_data2(handle, container);
+//		BUG();
+//	}
+
+	if(root != container) {
+		/* coalesce */
+		__binheap_swap_safe(handle, root, container);
+		root = container;
+	}
+
+	if(handle->last != root) {
+		/* swap 'last' node up to root and bubble it down. */
+
+		struct binheap_node *to_move = handle->last;
+
+		if(to_move->parent != root) {
+			handle->next = to_move->parent;
+
+			if(handle->next->right == to_move) {
+				/* disconnect from parent */
+				to_move->parent->right = NULL;
+				handle->last = handle->next->left;
+			}
+			else {
+				/* find new 'last' before we disconnect */
+				__binheap_update_last(handle);
+
+				/* disconnect from parent */
+				to_move->parent->left = NULL;
+			}
+		}
+		else {
+			/* 'last' is direct child of root */
+
+			handle->next = to_move;
+
+			if(to_move == to_move->parent->right) {
+				to_move->parent->right = NULL;
+				handle->last = to_move->parent->left;
+			}
+			else {
+				to_move->parent->left = NULL;
+				handle->last = to_move;
+			}
+		}
+		to_move->parent = NULL;
+
+		/* reconnect as root.  We can't just swap data ptrs since root node
+		 * may be freed after this function returns.
+		 */
+		to_move->left = root->left;
+		to_move->right = root->right;
+		if(to_move->left != NULL) {
+			to_move->left->parent = to_move;
+		}
+		if(to_move->right != NULL) {
+			to_move->right->parent = to_move;
+		}
+
+		handle->root = to_move;
+
+		/* bubble down */
+		__binheap_bubble_down(handle);
+	}
+	else {
+		/* removing last node in tree */
+		handle->root = NULL;
+		handle->next = NULL;
+		handle->last = NULL;
+	}
+
+	/* mark as removed */
+	container->parent = BINHEAP_POISON;
+}
+
+
+/**
+ * Delete an arbitrary node.  Bubble node to delete up to the root,
+ * and then delete to root.
+ */
+void __binheap_delete(struct binheap_node *node_to_delete,
+	struct binheap_handle *handle)
+{
+	struct binheap_node *target = node_to_delete->ref;
+	void *temp_data = target->data;
+
+//	if(!binheap_is_in_heap(node_to_delete))
+//	{
+//		dump_node_data2(handle, node_to_delete);
+//		BUG();
+//	}
+//
+//	if(!binheap_is_in_heap(target))
+//	{
+//		dump_node_data2(handle, target);
+//		BUG();
+//	}
+
+	/* temporarily set data to null to allow node to bubble up to the top. */
+	target->data = BINHEAP_POISON;
+
+	__binheap_bubble_up(handle, target);
+	__binheap_delete_root(handle, node_to_delete);
+
+	node_to_delete->data = temp_data;  /* restore node data pointer */
+	//node_to_delete->parent = BINHEAP_POISON; /* poison the node */
+}
+
+/**
+ * Bubble up a node whose pointer has decreased in value.
+ */
+void __binheap_decrease(struct binheap_node *orig_node,
+	struct binheap_handle *handle)
+{
+	struct binheap_node *target = orig_node->ref;
+
+//	if(!binheap_is_in_heap(orig_node))
+//	{
+//		dump_node_data2(handle, orig_node);
+//		BUG();
+//	}
+//
+//	if(!binheap_is_in_heap(target))
+//	{
+//		dump_node_data2(handle, target);
+//		BUG();
+//	}
+//
+	__binheap_bubble_up(handle, target);
+}
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 9b44dc2d8d1e..b346bdd65b3b 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -12,40 +12,61 @@
 #include <litmus/sched_plugin.h>
 #include <litmus/sched_trace.h>
 
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+#include <litmus/locking.h>
+#endif
+
 #include <litmus/edf_common.h>
 
+
+
 /* edf_higher_prio -  returns true if first has a higher EDF priority
  *                    than second. Deadline ties are broken by PID.
  *
  * both first and second may be NULL
  */
-int edf_higher_prio(struct task_struct* first,
-		    struct task_struct* second)
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+int __edf_higher_prio(
+	struct task_struct* first, comparison_mode_t first_mode,
+	struct task_struct* second, comparison_mode_t second_mode)
+#else
+int edf_higher_prio(struct task_struct* first, struct task_struct* second)
+#endif
 {
 	struct task_struct *first_task = first;
 	struct task_struct *second_task = second;
 
 	/* There is no point in comparing a task to itself. */
 	if (first && first == second) {
-		TRACE_TASK(first,
-			   "WARNING: pointless edf priority comparison.\n");
+		TRACE_CUR("WARNING: pointless edf priority comparison: %s/%d\n", first->comm, first->pid);
+		WARN_ON(1);
 		return 0;
 	}
 
 
 	/* check for NULL tasks */
-	if (!first || !second)
+	if (!first || !second) {
 		return first && !second;
+	}
 
 #ifdef CONFIG_LITMUS_LOCKING
-
-	/* Check for inherited priorities. Change task
+	/* Check for EFFECTIVE priorities. Change task
 	 * used for comparison in such a case.
 	 */
-	if (unlikely(first->rt_param.inh_task))
+	if (unlikely(first->rt_param.inh_task)
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+		&& (first_mode == EFFECTIVE)
+#endif
+		) {
 		first_task = first->rt_param.inh_task;
-	if (unlikely(second->rt_param.inh_task))
+	}
+	if (unlikely(second->rt_param.inh_task)
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+		&& (second_mode == EFFECTIVE)
+#endif
+		) {
 		second_task = second->rt_param.inh_task;
+	}
 
 	/* Check for priority boosting. Tie-break by start of boosting.
 	 */
@@ -53,37 +74,109 @@ int edf_higher_prio(struct task_struct* first,
 		/* first_task is boosted, how about second_task? */
 		if (!is_priority_boosted(second_task) ||
 		    lt_before(get_boost_start(first_task),
-			      get_boost_start(second_task)))
+					  get_boost_start(second_task))) {
 			return 1;
-		else
+		}
+		else {
 			return 0;
-	} else if (unlikely(is_priority_boosted(second_task)))
+		}
+	}
+	else if (unlikely(is_priority_boosted(second_task))) {
 		/* second_task is boosted, first is not*/
 		return 0;
+	}
 
 #endif
 
+//	// rate-monotonic for testing
+//	if (!is_realtime(second_task)) {
+//		return true;
+//	}
+//
+//	if (shorter_period(first_task, second_task)) {
+//		return true;
+//	}
+//
+//	if (get_period(first_task) == get_period(second_task)) {
+//		if (first_task->pid < second_task->pid) {
+//			return true;
+//		}
+//		else if (first_task->pid == second_task->pid) {
+//			return !second->rt_param.inh_task;
+//		}
+//	}
+
+	if (!is_realtime(second_task)) {
+		return true;
+	}
+
+	if (earlier_deadline(first_task, second_task)) {
+		return true;
+	}
+	if (get_deadline(first_task) == get_deadline(second_task)) {
+
+		if (shorter_period(first_task, second_task)) {
+			return true;
+		}
+		if (get_rt_period(first_task) == get_rt_period(second_task)) {
+			if (first_task->pid < second_task->pid) {
+				return true;
+			}
+			if (first_task->pid == second_task->pid) {
+#ifdef CONFIG_LITMUS_SOFTIRQD
+				if (first_task->rt_param.is_proxy_thread <
+					second_task->rt_param.is_proxy_thread) {
+					return true;
+				}
+				if(first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) {
+					return !second->rt_param.inh_task;
+				}
+#else
+				return !second->rt_param.inh_task;
+#endif
+			}
+
+		}
+	}
+
+	return false;
+}
+
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+int edf_higher_prio(struct task_struct* first, struct task_struct* second)
+{
+	return __edf_higher_prio(first, EFFECTIVE, second, EFFECTIVE);
+}
+
+int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b)
+{
+	struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
+	struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
 
-	return !is_realtime(second_task)  ||
+	return __edf_higher_prio(l_a->hp_waiter_eff_prio, EFFECTIVE, l_b->hp_waiter_eff_prio, EFFECTIVE);
+}
 
-		/* is the deadline of the first task earlier?
-		 * Then it has higher priority.
-		 */
-		earlier_deadline(first_task, second_task) ||
+int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b)
+{
+	return edf_max_heap_order(b, a);  // swap comparison
+}
 
-		/* Do we have a deadline tie?
-		 * Then break by PID.
-		 */
-		(get_deadline(first_task) == get_deadline(second_task) &&
-	        (first_task->pid < second_task->pid ||
+int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
+{
+	struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
+	struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
 
-		/* If the PIDs are the same then the task with the inherited
-		 * priority wins.
-		 */
-		(first_task->pid == second_task->pid &&
-		 !second->rt_param.inh_task)));
+	return __edf_higher_prio(l_a->hp_waiter_eff_prio, BASE, l_b->hp_waiter_eff_prio, BASE);
 }
 
+int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
+{
+	return edf_max_heap_base_priority_order(b, a);  // swap comparison
+}
+#endif
+
+
 int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
 {
 	return edf_higher_prio(bheap2task(a), bheap2task(b));
diff --git a/litmus/fdso.c b/litmus/fdso.c
index aa7b384264e3..18fc61b6414a 100644
--- a/litmus/fdso.c
+++ b/litmus/fdso.c
@@ -20,9 +20,22 @@
 
 extern struct fdso_ops generic_lock_ops;
 
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+extern struct fdso_ops generic_affinity_ops;
+#endif
+
 static const struct fdso_ops* fdso_ops[] = {
 	&generic_lock_ops, /* FMLP_SEM */
 	&generic_lock_ops, /* SRP_SEM */
+	&generic_lock_ops, /* RSM_MUTEX */
+	&generic_lock_ops, /* IKGLP_SEM */
+	&generic_lock_ops, /* KFMLP_SEM */
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	&generic_affinity_ops, /* IKGLP_SIMPLE_GPU_AFF_OBS */
+	&generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */
+	&generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */
+	&generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */
+#endif
 };
 
 static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
new file mode 100644
index 000000000000..9762be1a085e
--- /dev/null
+++ b/litmus/gpu_affinity.c
@@ -0,0 +1,113 @@
+
+#ifdef CONFIG_LITMUS_NVIDIA
+
+#include <linux/sched.h>
+#include <litmus/litmus.h>
+#include <litmus/gpu_affinity.h>
+
+#include <litmus/sched_trace.h>
+
+#define OBSERVATION_CAP 2*1e9
+
+static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
+{
+	fp_t relative_err;
+	fp_t err, new;
+	fp_t actual = _integer_to_fp(observed);
+
+	err = _sub(actual, fb->est);
+	new = _add(_mul(a, err), _mul(b, fb->accum_err));
+
+	relative_err = _div(err, actual);
+
+	fb->est = new;
+	fb->accum_err = _add(fb->accum_err, err);
+
+	return relative_err;
+}
+
+void update_gpu_estimate(struct task_struct *t, lt_t observed)
+{
+	feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
+
+	BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
+
+	if(unlikely(fb->est.val == 0)) {
+		// kludge-- cap observed values to prevent whacky estimations.
+		// whacky stuff happens during the first few jobs.
+		if(unlikely(observed > OBSERVATION_CAP)) {
+			TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
+					   observed, OBSERVATION_CAP);
+			observed = OBSERVATION_CAP;
+		}
+
+		// take the first observation as our estimate
+		// (initial value of 0 was bogus anyhow)
+		fb->est = _integer_to_fp(observed);
+		fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
+	}
+	else {
+		fp_t rel_err = update_estimate(fb,
+									   tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
+									   tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
+									   observed);
+
+		if(unlikely(_fp_to_integer(fb->est) <= 0)) {
+			TRACE_TASK(t, "Invalid estimate. Patching.\n");
+			fb->est = _integer_to_fp(observed);
+			fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
+		}
+		else {
+//			struct migration_info mig_info;
+
+			sched_trace_prediction_err(t,
+									   &(tsk_rt(t)->gpu_migration),
+									   &rel_err);
+
+//			mig_info.observed = observed;
+//			mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
+//			mig_info.distance = tsk_rt(t)->gpu_migration;
+//
+//			sched_trace_migration(t, &mig_info);
+		}
+	}
+
+	TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
+			   tsk_rt(t)->gpu_migration,
+			   observed,
+			   _fp_to_integer(fb->est),
+			   _point(fb->est));
+}
+
+gpu_migration_dist_t gpu_migration_distance(int a, int b)
+{
+	// GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs
+	int i;
+	int dist;
+
+	if(likely(a >= 0 && b >= 0)) {
+		for(i = 0; i <= MIG_FAR; ++i) {
+			if(a>>i == b>>i) {
+				dist = i;
+				goto out;
+			}
+		}
+		dist = MIG_NONE; // hopefully never reached.
+		TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b);
+	}
+	else {
+		dist = MIG_NONE;
+	}
+
+out:
+	TRACE_CUR("Distance %d -> %d is %d\n",
+			  a, b, dist);
+
+	return dist;
+}
+
+
+
+
+#endif
+
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
new file mode 100644
index 000000000000..83b708ab85cb
--- /dev/null
+++ b/litmus/ikglp_lock.c
@@ -0,0 +1,2838 @@
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <litmus/trace.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/fdso.h>
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#include <litmus/nvidia_info.h>
+#endif
+
+#include <litmus/ikglp_lock.h>
+
+// big signed value.
+#define IKGLP_INVAL_DISTANCE 0x7FFFFFFF
+
+int ikglp_max_heap_base_priority_order(struct binheap_node *a,
+										   struct binheap_node *b)
+{
+	ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
+	ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
+
+	BUG_ON(!d_a);
+	BUG_ON(!d_b);
+
+	return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
+}
+
+int ikglp_min_heap_base_priority_order(struct binheap_node *a,
+										   struct binheap_node *b)
+{
+	ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
+	ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
+
+	return litmus->__compare(d_b->task, BASE, d_a->task, BASE);
+}
+
+int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a,
+												 struct binheap_node *b)
+{
+	ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node);
+	ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node);
+
+	return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
+}
+
+
+int ikglp_min_heap_donee_order(struct binheap_node *a,
+								   struct binheap_node *b)
+{
+	struct task_struct *prio_a, *prio_b;
+
+	ikglp_donee_heap_node_t *d_a =
+		binheap_entry(a, ikglp_donee_heap_node_t, node);
+	ikglp_donee_heap_node_t *d_b =
+		binheap_entry(b, ikglp_donee_heap_node_t, node);
+
+	if(!d_a->donor_info) {
+		prio_a = d_a->task;
+	}
+	else {
+		prio_a = d_a->donor_info->task;
+		BUG_ON(d_a->task != d_a->donor_info->donee_info->task);
+	}
+
+	if(!d_b->donor_info) {
+		prio_b = d_b->task;
+	}
+	else {
+		prio_b = d_b->donor_info->task;
+		BUG_ON(d_b->task != d_b->donor_info->donee_info->task);
+	}
+
+	// note reversed order
+	return litmus->__compare(prio_b, BASE, prio_a, BASE);
+}
+
+
+
+static inline int ikglp_get_idx(struct ikglp_semaphore *sem,
+								struct fifo_queue *queue)
+{
+	return (queue - &sem->fifo_queues[0]);
+}
+
+static inline struct fifo_queue* ikglp_get_queue(struct ikglp_semaphore *sem,
+												 struct task_struct *holder)
+{
+	int i;
+	for(i = 0; i < sem->nr_replicas; ++i)
+		if(sem->fifo_queues[i].owner == holder)
+			return(&sem->fifo_queues[i]);
+	return(NULL);
+}
+
+
+
+static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
+												struct task_struct *skip)
+{
+	struct list_head *pos;
+	struct task_struct *queued, *found = NULL;
+
+	list_for_each(pos, &kqueue->wait.task_list) {
+		queued  = (struct task_struct*) list_entry(pos,
+											wait_queue_t, task_list)->private;
+
+		/* Compare task prios, find high prio task. */
+		if(queued != skip && litmus->compare(queued, found))
+			found = queued;
+	}
+	return found;
+}
+
+static struct fifo_queue* ikglp_find_shortest(struct ikglp_semaphore *sem,
+											  struct fifo_queue *search_start)
+{
+	// we start our search at search_start instead of at the beginning of the
+	// queue list to load-balance across all resources.
+	struct fifo_queue* step = search_start;
+	struct fifo_queue* shortest = sem->shortest_fifo_queue;
+
+	do {
+		step = (step+1 != &sem->fifo_queues[sem->nr_replicas]) ?
+		step+1 : &sem->fifo_queues[0];
+
+		if(step->count < shortest->count) {
+			shortest = step;
+			if(step->count == 0)
+				break; /* can't get any shorter */
+		}
+
+	}while(step != search_start);
+
+	return(shortest);
+}
+
+static inline struct task_struct* ikglp_mth_highest(struct ikglp_semaphore *sem)
+{
+	return binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node)->task;
+}
+
+
+
+#if 0
+static void print_global_list(struct binheap_node* n, int depth)
+{
+	ikglp_heap_node_t *global_heap_node;
+	char padding[81] = "                                                                                ";
+
+	if(n == NULL) {
+		TRACE_CUR("+-> %p\n", NULL);
+		return;
+	}
+
+	global_heap_node = binheap_entry(n, ikglp_heap_node_t, node);
+
+	if(depth*2 <= 80)
+		padding[depth*2] = '\0';
+
+	TRACE_CUR("%s+-> %s/%d\n",
+			  padding,
+			  global_heap_node->task->comm,
+			  global_heap_node->task->pid);
+
+    if(n->left) print_global_list(n->left, depth+1);
+    if(n->right) print_global_list(n->right, depth+1);
+}
+
+static void print_donees(struct ikglp_semaphore *sem, struct binheap_node *n, int depth)
+{
+	ikglp_donee_heap_node_t *donee_node;
+	char padding[81] = "                                                                                ";
+	struct task_struct* donor = NULL;
+
+	if(n == NULL) {
+		TRACE_CUR("+-> %p\n", NULL);
+		return;
+	}
+
+	donee_node = binheap_entry(n, ikglp_donee_heap_node_t, node);
+
+	if(depth*2 <= 80)
+		padding[depth*2] = '\0';
+
+	if(donee_node->donor_info) {
+		donor = donee_node->donor_info->task;
+	}
+
+	TRACE_CUR("%s+-> %s/%d (d: %s/%d) (fq: %d)\n",
+			  padding,
+			  donee_node->task->comm,
+			  donee_node->task->pid,
+			  (donor) ? donor->comm : "nil",
+			  (donor) ? donor->pid : -1,
+			  ikglp_get_idx(sem, donee_node->fq));
+
+    if(n->left) print_donees(sem, n->left, depth+1);
+    if(n->right) print_donees(sem, n->right, depth+1);
+}
+
+static void print_donors(struct binheap_node *n, int depth)
+{
+	ikglp_wait_state_t *donor_node;
+	char padding[81] = "                                                                                ";
+
+	if(n == NULL) {
+		TRACE_CUR("+-> %p\n", NULL);
+		return;
+	}
+
+	donor_node = binheap_entry(n, ikglp_wait_state_t, node);
+
+	if(depth*2 <= 80)
+		padding[depth*2] = '\0';
+
+
+	TRACE_CUR("%s+-> %s/%d (donee: %s/%d)\n",
+			  padding,
+			  donor_node->task->comm,
+			  donor_node->task->pid,
+			  donor_node->donee_info->task->comm,
+			  donor_node->donee_info->task->pid);
+
+    if(n->left) print_donors(n->left, depth+1);
+    if(n->right) print_donors(n->right, depth+1);
+}
+#endif
+
+static void ikglp_add_global_list(struct ikglp_semaphore *sem,
+								  struct task_struct *t,
+								  ikglp_heap_node_t *node)
+{
+
+
+	node->task = t;
+	INIT_BINHEAP_NODE(&node->node);
+
+	if(sem->top_m_size < sem->m) {
+		TRACE_CUR("Trivially adding %s/%d to top-m global list.\n",
+				  t->comm, t->pid);
+//		TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
+//		print_global_list(sem->top_m.root, 1);
+
+		binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
+		++(sem->top_m_size);
+
+//		TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
+//		print_global_list(sem->top_m.root, 1);
+	}
+	else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
+		ikglp_heap_node_t *evicted =
+			binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node);
+
+		TRACE_CUR("Adding %s/%d to top-m and evicting %s/%d.\n",
+				  t->comm, t->pid,
+				  evicted->task->comm, evicted->task->pid);
+
+//		TRACE_CUR("Not-Top-M Before:\n");
+//		print_global_list(sem->not_top_m.root, 1);
+//		TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
+//		print_global_list(sem->top_m.root, 1);
+
+
+		binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node);
+		INIT_BINHEAP_NODE(&evicted->node);
+		binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node);
+
+		binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
+
+//		TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
+//		print_global_list(sem->top_m.root, 1);
+//		TRACE_CUR("Not-Top-M After:\n");
+//		print_global_list(sem->not_top_m.root, 1);
+	}
+	else {
+		TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n",
+				  t->comm, t->pid);
+//		TRACE_CUR("Not-Top-M Before:\n");
+//		print_global_list(sem->not_top_m.root, 1);
+
+		binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node);
+
+//		TRACE_CUR("Not-Top-M After:\n");
+//		print_global_list(sem->not_top_m.root, 1);
+	}
+}
+
+
+static void ikglp_del_global_list(struct ikglp_semaphore *sem,
+								  struct task_struct *t,
+								  ikglp_heap_node_t *node)
+{
+	BUG_ON(!binheap_is_in_heap(&node->node));
+
+	TRACE_CUR("Removing %s/%d from global list.\n", t->comm, t->pid);
+
+	if(binheap_is_in_this_heap(&node->node, &sem->top_m)) {
+		TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid);
+
+//		TRACE_CUR("Not-Top-M Before:\n");
+//		print_global_list(sem->not_top_m.root, 1);
+//		TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
+//		print_global_list(sem->top_m.root, 1);
+
+
+		binheap_delete(&node->node, &sem->top_m);
+
+		if(!binheap_empty(&sem->not_top_m)) {
+			ikglp_heap_node_t *promoted =
+				binheap_top_entry(&sem->not_top_m, ikglp_heap_node_t, node);
+
+			TRACE_CUR("Promoting %s/%d to top-m\n",
+					  promoted->task->comm, promoted->task->pid);
+
+			binheap_delete_root(&sem->not_top_m, ikglp_heap_node_t, node);
+			INIT_BINHEAP_NODE(&promoted->node);
+
+			binheap_add(&promoted->node, &sem->top_m, ikglp_heap_node_t, node);
+		}
+		else {
+			TRACE_CUR("No one to promote to top-m.\n");
+			--(sem->top_m_size);
+		}
+
+//		TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
+//		print_global_list(sem->top_m.root, 1);
+//		TRACE_CUR("Not-Top-M After:\n");
+//		print_global_list(sem->not_top_m.root, 1);
+	}
+	else {
+		TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid);
+//		TRACE_CUR("Not-Top-M Before:\n");
+//		print_global_list(sem->not_top_m.root, 1);
+
+		binheap_delete(&node->node, &sem->not_top_m);
+
+//		TRACE_CUR("Not-Top-M After:\n");
+//		print_global_list(sem->not_top_m.root, 1);
+	}
+}
+
+
+static void ikglp_add_donees(struct ikglp_semaphore *sem,
+							 struct fifo_queue *fq,
+							 struct task_struct *t,
+							 ikglp_donee_heap_node_t* node)
+{
+//	TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid);
+//	TRACE_CUR("donees Before:\n");
+//	print_donees(sem, sem->donees.root, 1);
+
+	node->task = t;
+	node->donor_info = NULL;
+	node->fq = fq;
+	INIT_BINHEAP_NODE(&node->node);
+
+	binheap_add(&node->node, &sem->donees, ikglp_donee_heap_node_t, node);
+
+//	TRACE_CUR("donees After:\n");
+//	print_donees(sem, sem->donees.root, 1);
+}
+
+
+static void ikglp_refresh_owners_prio_increase(struct task_struct *t,
+											   struct fifo_queue *fq,
+											   struct ikglp_semaphore *sem,
+											   unsigned long flags)
+{
+	// priority of 't' has increased (note: 't' might already be hp_waiter).
+	if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) {
+		struct task_struct *old_max_eff_prio;
+		struct task_struct *new_max_eff_prio;
+		struct task_struct *new_prio = NULL;
+		struct task_struct *owner = fq->owner;
+
+		if(fq->hp_waiter)
+			TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
+					   fq->hp_waiter->comm, fq->hp_waiter->pid);
+		else
+			TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
+
+		if(owner)
+		{
+			raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+
+//			TRACE_TASK(owner, "Heap Before:\n");
+//			print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
+
+			old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+
+			fq->hp_waiter = t;
+			fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
+
+			binheap_decrease(&fq->nest.hp_binheap_node,
+							 &tsk_rt(owner)->hp_blocked_tasks);
+
+//			TRACE_TASK(owner, "Heap After:\n");
+//			print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
+
+			new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+
+			if(new_max_eff_prio != old_max_eff_prio) {
+				TRACE_TASK(t, "is new hp_waiter.\n");
+
+				if ((effective_priority(owner) == old_max_eff_prio) ||
+					(litmus->__compare(new_max_eff_prio, BASE,
+									   owner, EFFECTIVE))){
+					new_prio = new_max_eff_prio;
+				}
+			}
+			else {
+				TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
+			}
+
+			if(new_prio) {
+				// set new inheritance and propagate
+				TRACE_TASK(t, "Effective priority changed for owner %s/%d to %s/%d\n",
+						   owner->comm, owner->pid,
+						   new_prio->comm, new_prio->pid);
+				litmus->nested_increase_prio(owner, new_prio, &sem->lock,
+											 flags);  // unlocks lock.
+			}
+			else {
+				TRACE_TASK(t, "No change in effective priority (is %s/%d).  Propagation halted.\n",
+						   new_max_eff_prio->comm, new_max_eff_prio->pid);
+				raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+				unlock_fine_irqrestore(&sem->lock, flags);
+			}
+		}
+		else {
+			fq->hp_waiter = t;
+			fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
+
+			TRACE_TASK(t, "no owner.\n");
+			unlock_fine_irqrestore(&sem->lock, flags);
+		}
+	}
+	else {
+		TRACE_TASK(t, "hp_waiter is unaffected.\n");
+		unlock_fine_irqrestore(&sem->lock, flags);
+	}
+}
+
+// hp_waiter has decreased
+static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq,
+											   struct ikglp_semaphore *sem,
+											   unsigned long flags)
+{
+	struct task_struct *owner = fq->owner;
+
+	struct task_struct *old_max_eff_prio;
+	struct task_struct *new_max_eff_prio;
+
+	if(!owner) {
+		TRACE_CUR("No owner.  Returning.\n");
+		unlock_fine_irqrestore(&sem->lock, flags);
+		return;
+	}
+
+	TRACE_CUR("ikglp_refresh_owners_prio_decrease\n");
+
+	raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+
+	old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+
+	binheap_delete(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
+	fq->nest.hp_waiter_eff_prio = fq->hp_waiter;
+	binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks,
+				struct nested_info, hp_binheap_node);
+
+	new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+
+	if((old_max_eff_prio != new_max_eff_prio) &&
+	   (effective_priority(owner) == old_max_eff_prio))
+	{
+		// Need to set new effective_priority for owner
+		struct task_struct *decreased_prio;
+
+		TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
+				  ikglp_get_idx(sem, fq));
+
+		if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
+			TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n",
+					  (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
+					  (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
+					  owner->comm,
+					  owner->pid,
+					  ikglp_get_idx(sem, fq));
+
+			decreased_prio = new_max_eff_prio;
+		}
+		else {
+			TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of fq %d.\n",
+					  (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
+					  (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
+					  owner->comm,
+					  owner->pid,
+					  ikglp_get_idx(sem, fq));
+
+			decreased_prio = NULL;
+		}
+
+		// beware: recursion
+		litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags);	// will unlock mutex->lock
+	}
+	else {
+		TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
+		raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+		unlock_fine_irqrestore(&sem->lock, flags);
+	}
+}
+
+
+static void ikglp_remove_donation_from_owner(struct binheap_node *n,
+											 struct fifo_queue *fq,
+											 struct ikglp_semaphore *sem,
+											 unsigned long flags)
+{
+	struct task_struct *owner = fq->owner;
+
+	struct task_struct *old_max_eff_prio;
+	struct task_struct *new_max_eff_prio;
+
+	BUG_ON(!owner);
+
+	raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+
+	old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+
+	binheap_delete(n, &tsk_rt(owner)->hp_blocked_tasks);
+
+	new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+
+	if((old_max_eff_prio != new_max_eff_prio) &&
+	   (effective_priority(owner) == old_max_eff_prio))
+	{
+		// Need to set new effective_priority for owner
+		struct task_struct *decreased_prio;
+
+		TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
+				  ikglp_get_idx(sem, fq));
+
+		if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
+			TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n",
+					  ikglp_get_idx(sem, fq));
+			decreased_prio = new_max_eff_prio;
+		}
+		else {
+			TRACE_CUR("has lesser base priority than base priority of owner of fq %d.\n",
+					  ikglp_get_idx(sem, fq));
+			decreased_prio = NULL;
+		}
+
+		// beware: recursion
+		litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags);	// will unlock mutex->lock
+	}
+	else {
+		TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
+		raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+		unlock_fine_irqrestore(&sem->lock, flags);
+	}
+}
+
+static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t,
+												 struct binheap_node *n)
+{
+	struct task_struct *old_max_eff_prio;
+	struct task_struct *new_max_eff_prio;
+
+	raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
+
+	old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
+
+	binheap_delete(n, &tsk_rt(t)->hp_blocked_tasks);
+
+	new_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
+
+	if((old_max_eff_prio != new_max_eff_prio) &&
+	   (effective_priority(t) == old_max_eff_prio))
+	{
+		// Need to set new effective_priority for owner
+		struct task_struct *decreased_prio;
+
+		if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) {
+			decreased_prio = new_max_eff_prio;
+		}
+		else {
+			decreased_prio = NULL;
+		}
+
+		tsk_rt(t)->inh_task = decreased_prio;
+	}
+
+	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
+}
+
+static void ikglp_get_immediate(struct task_struct* t,
+								struct fifo_queue *fq,
+								struct ikglp_semaphore *sem,
+								unsigned long flags)
+{
+	// resource available now
+	TRACE_CUR("queue %d: acquired immediately\n", ikglp_get_idx(sem, fq));
+
+	fq->owner = t;
+
+	raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
+	binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
+				struct nested_info, hp_binheap_node);
+	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
+
+	++(fq->count);
+
+	ikglp_add_global_list(sem, t, &fq->global_heap_node);
+	ikglp_add_donees(sem, fq, t, &fq->donee_heap_node);
+
+	sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue);
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
+		sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t);
+	}
+#endif
+
+	unlock_fine_irqrestore(&sem->lock, flags);
+}
+
+
+
+
+
+static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem,
+								  struct fifo_queue* fq,
+								  struct task_struct* t,
+								  wait_queue_t *wait,
+								  ikglp_heap_node_t *global_heap_node,
+								  ikglp_donee_heap_node_t *donee_heap_node)
+{
+	/* resource is not free => must suspend and wait */
+	TRACE_TASK(t, "Enqueuing on fq %d.\n",
+			   ikglp_get_idx(sem, fq));
+
+	init_waitqueue_entry(wait, t);
+
+	__add_wait_queue_tail_exclusive(&fq->wait, wait);
+
+	++(fq->count);
+	++(sem->nr_in_fifos);
+
+	// update global list.
+	if(likely(global_heap_node)) {
+		if(binheap_is_in_heap(&global_heap_node->node)) {
+			WARN_ON(1);
+			ikglp_del_global_list(sem, t, global_heap_node);
+		}
+		ikglp_add_global_list(sem, t, global_heap_node);
+	}
+	// update donor eligiblity list.
+	if(likely(donee_heap_node)) {
+//		if(binheap_is_in_heap(&donee_heap_node->node)) {
+//			WARN_ON(1);
+//		}
+		ikglp_add_donees(sem, fq, t, donee_heap_node);
+	}
+
+	if(sem->shortest_fifo_queue == fq) {
+		sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq);
+	}
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
+	}
+#endif
+
+	TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq));
+}
+
+
+static void ikglp_enqueue_on_fq(
+								struct ikglp_semaphore *sem,
+								struct fifo_queue *fq,
+								ikglp_wait_state_t *wait,
+								unsigned long flags)
+{
+	/* resource is not free => must suspend and wait */
+	TRACE_TASK(wait->task, "queue %d: Resource is not free => must suspend and wait.\n",
+			   ikglp_get_idx(sem, fq));
+
+	INIT_BINHEAP_NODE(&wait->global_heap_node.node);
+	INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
+
+	__ikglp_enqueue_on_fq(sem, fq, wait->task, &wait->fq_node,
+						  &wait->global_heap_node, &wait->donee_heap_node);
+
+	ikglp_refresh_owners_prio_increase(wait->task, fq, sem, flags);  // unlocks sem->lock
+}
+
+
+static void __ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
+								  ikglp_wait_state_t *wait)
+{
+	TRACE_TASK(wait->task, "goes to PQ.\n");
+
+	wait->pq_node.task = wait->task; // copy over task (little redundant...)
+
+	binheap_add(&wait->pq_node.node, &sem->priority_queue,
+				ikglp_heap_node_t, node);
+}
+
+static void ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
+								ikglp_wait_state_t *wait)
+{
+	INIT_BINHEAP_NODE(&wait->global_heap_node.node);
+	INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
+	INIT_BINHEAP_NODE(&wait->pq_node.node);
+
+	__ikglp_enqueue_on_pq(sem, wait);
+}
+
+static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
+								   ikglp_wait_state_t* wait,
+								   unsigned long flags)
+{
+	struct task_struct *t = wait->task;
+	ikglp_donee_heap_node_t *donee_node = NULL;
+	struct task_struct *donee;
+
+	struct task_struct *old_max_eff_prio;
+	struct task_struct *new_max_eff_prio;
+	struct task_struct *new_prio = NULL;
+
+	INIT_BINHEAP_NODE(&wait->global_heap_node.node);
+	INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
+	INIT_BINHEAP_NODE(&wait->pq_node.node);
+	INIT_BINHEAP_NODE(&wait->node);
+
+//	TRACE_CUR("Adding %s/%d as donor.\n", t->comm, t->pid);
+//	TRACE_CUR("donors Before:\n");
+//	print_donors(sem->donors.root, 1);
+
+	// Add donor to the global list.
+	ikglp_add_global_list(sem, t, &wait->global_heap_node);
+
+	// Select a donee
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	donee_node = (sem->aff_obs) ?
+		sem->aff_obs->ops->advise_donee_selection(sem->aff_obs, t) :
+		binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
+#else
+	donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
+#endif
+
+	donee = donee_node->task;
+
+	TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid);
+
+	TRACE_CUR("Temporarily removing %s/%d to donee list.\n",
+			  donee->comm, donee->pid);
+//	TRACE_CUR("donees Before:\n");
+//	print_donees(sem, sem->donees.root, 1);
+
+	//binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node);  // will re-add it shortly
+	binheap_delete(&donee_node->node, &sem->donees);
+
+//	TRACE_CUR("donees After:\n");
+//	print_donees(sem, sem->donees.root, 1);
+
+
+	wait->donee_info = donee_node;
+
+	// Add t to donor heap.
+	binheap_add(&wait->node, &sem->donors, ikglp_wait_state_t, node);
+
+	// Now adjust the donee's priority.
+
+	// Lock the donee's inheritance heap.
+	raw_spin_lock(&tsk_rt(donee)->hp_blocked_tasks_lock);
+
+	old_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
+
+	if(donee_node->donor_info) {
+		// Steal donation relation.  Evict old donor to PQ.
+
+		// Remove old donor from donor heap
+		ikglp_wait_state_t *old_wait = donee_node->donor_info;
+		struct task_struct *old_donor = old_wait->task;
+
+		TRACE_TASK(t, "Donee (%s/%d) had donor %s/%d.  Moving old donor to PQ.\n",
+				   donee->comm, donee->pid, old_donor->comm, old_donor->pid);
+
+		binheap_delete(&old_wait->node, &sem->donors);
+
+		// Remove donation from donee's inheritance heap.
+		binheap_delete(&old_wait->prio_donation.hp_binheap_node,
+					   &tsk_rt(donee)->hp_blocked_tasks);
+		// WARNING: have not updated inh_prio!
+
+		// Add old donor to PQ.
+		__ikglp_enqueue_on_pq(sem, old_wait);
+
+		// Remove old donor from the global heap.
+		ikglp_del_global_list(sem, old_donor, &old_wait->global_heap_node);
+	}
+
+	// Add back donee's node to the donees heap with increased prio
+	donee_node->donor_info = wait;
+	INIT_BINHEAP_NODE(&donee_node->node);
+
+
+	TRACE_CUR("Adding %s/%d back to donee list.\n", donee->comm, donee->pid);
+//	TRACE_CUR("donees Before:\n");
+//	print_donees(sem, sem->donees.root, 1);
+
+	binheap_add(&donee_node->node, &sem->donees, ikglp_donee_heap_node_t, node);
+
+//	TRACE_CUR("donees After:\n");
+//	print_donees(sem, sem->donees.root, 1);
+
+	// Add an inheritance/donation to the donee's inheritance heap.
+	wait->prio_donation.lock = (struct litmus_lock*)sem;
+	wait->prio_donation.hp_waiter_eff_prio = t;
+	wait->prio_donation.hp_waiter_ptr = NULL;
+	INIT_BINHEAP_NODE(&wait->prio_donation.hp_binheap_node);
+
+	binheap_add(&wait->prio_donation.hp_binheap_node,
+				&tsk_rt(donee)->hp_blocked_tasks,
+				struct nested_info, hp_binheap_node);
+
+	new_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
+
+	if(new_max_eff_prio != old_max_eff_prio) {
+		if ((effective_priority(donee) == old_max_eff_prio) ||
+			(litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){
+			TRACE_TASK(t, "Donation increases %s/%d's effective priority\n",
+					   donee->comm, donee->pid);
+			new_prio = new_max_eff_prio;
+		}
+//		else {
+//			// should be bug.  donor would not be in top-m.
+//			TRACE_TASK(t, "Donation is not greater than base prio of %s/%d?\n", donee->comm, donee->pid);
+//			WARN_ON(1);
+//		}
+//	}
+//	else {
+//		// should be bug.  donor would not be in top-m.
+//		TRACE_TASK(t, "No change in %s/%d's inheritance heap?\n", donee->comm, donee->pid);
+//		WARN_ON(1);
+	}
+
+	if(new_prio) {
+		struct fifo_queue *donee_fq = donee_node->fq;
+
+		if(donee != donee_fq->owner) {
+			TRACE_TASK(t, "%s/%d is not the owner. Propagating priority to owner %s/%d.\n",
+					   donee->comm, donee->pid,
+					   donee_fq->owner->comm, donee_fq->owner->pid);
+
+			raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
+			ikglp_refresh_owners_prio_increase(donee, donee_fq, sem, flags);  // unlocks sem->lock
+		}
+		else {
+			TRACE_TASK(t, "%s/%d is the owner. Progatating priority immediatly.\n",
+					   donee->comm, donee->pid);
+			litmus->nested_increase_prio(donee, new_prio, &sem->lock, flags);  // unlocks sem->lock and donee's heap lock
+		}
+	}
+	else {
+		TRACE_TASK(t, "No change in effective priority (it is %d/%s).  BUG?\n",
+				   new_max_eff_prio->comm, new_max_eff_prio->pid);
+		raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
+		unlock_fine_irqrestore(&sem->lock, flags);
+	}
+
+
+//	TRACE_CUR("donors After:\n");
+//	print_donors(sem->donors.root, 1);
+}
+
+int ikglp_lock(struct litmus_lock* l)
+{
+	struct task_struct* t = current;
+	struct ikglp_semaphore *sem = ikglp_from_lock(l);
+	unsigned long flags = 0, real_flags;
+	struct fifo_queue *fq = NULL;
+	int replica = -EINVAL;
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	raw_spinlock_t *dgl_lock;
+#endif
+
+	ikglp_wait_state_t wait;
+
+	if (!is_realtime(t))
+		return -EPERM;
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	dgl_lock = litmus->get_dgl_spinlock(t);
+#endif
+
+	raw_spin_lock_irqsave(&sem->real_lock, real_flags);
+
+	lock_global_irqsave(dgl_lock, flags);
+	lock_fine_irqsave(&sem->lock, flags);
+
+	if(sem->nr_in_fifos < sem->m) {
+		// enqueue somwhere
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		fq = (sem->aff_obs) ?
+			sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
+			sem->shortest_fifo_queue;
+#else
+		fq = sem->shortest_fifo_queue;
+#endif
+		if(fq->count == 0) {
+			// take available resource
+			replica = ikglp_get_idx(sem, fq);
+
+			ikglp_get_immediate(t, fq, sem, flags);  // unlocks sem->lock
+
+			unlock_global_irqrestore(dgl_lock, flags);
+			raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
+			goto acquired;
+		}
+		else {
+			wait.task = t;   // THIS IS CRITICALLY IMPORTANT!!!
+
+			tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem;  // record where we are blocked
+			mb();
+
+			/* FIXME: interruptible would be nice some day */
+			set_task_state(t, TASK_UNINTERRUPTIBLE);
+
+			ikglp_enqueue_on_fq(sem, fq, &wait, flags);  // unlocks sem->lock
+		}
+	}
+	else {
+		// donor!
+		wait.task = t;   // THIS IS CRITICALLY IMPORTANT!!!
+
+		tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem;  // record where we are blocked
+		mb();
+
+		/* FIXME: interruptible would be nice some day */
+		set_task_state(t, TASK_UNINTERRUPTIBLE);
+
+		if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
+			// enqueue on PQ
+			ikglp_enqueue_on_pq(sem, &wait);
+			unlock_fine_irqrestore(&sem->lock, flags);
+		}
+		else {
+			// enqueue as donor
+			ikglp_enqueue_on_donor(sem, &wait, flags);	 // unlocks sem->lock
+		}
+	}
+
+	unlock_global_irqrestore(dgl_lock, flags);
+	raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
+
+	TS_LOCK_SUSPEND;
+
+	schedule();
+
+	TS_LOCK_RESUME;
+
+	fq = ikglp_get_queue(sem, t);
+	BUG_ON(!fq);
+
+	replica = ikglp_get_idx(sem, fq);
+
+acquired:
+	TRACE_CUR("Acquired lock %d, queue %d\n",
+			  l->ident, replica);
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
+	}
+#endif
+
+	return replica;
+}
+
+//int ikglp_lock(struct litmus_lock* l)
+//{
+//	struct task_struct* t = current;
+//	struct ikglp_semaphore *sem = ikglp_from_lock(l);
+//	unsigned long flags = 0, real_flags;
+//	struct fifo_queue *fq = NULL;
+//	int replica = -EINVAL;
+//
+//#ifdef CONFIG_LITMUS_DGL_SUPPORT
+//	raw_spinlock_t *dgl_lock;
+//#endif
+//
+//	ikglp_wait_state_t wait;
+//
+//	if (!is_realtime(t))
+//		return -EPERM;
+//
+//#ifdef CONFIG_LITMUS_DGL_SUPPORT
+//	dgl_lock = litmus->get_dgl_spinlock(t);
+//#endif
+//
+//	raw_spin_lock_irqsave(&sem->real_lock, real_flags);
+//
+//	lock_global_irqsave(dgl_lock, flags);
+//	lock_fine_irqsave(&sem->lock, flags);
+//
+//
+//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+//	fq = (sem->aff_obs) ?
+//		sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
+//		sem->shortest_fifo_queue;
+//#else
+//	fq = sem->shortest_fifo_queue;
+//#endif
+//
+//	if(fq->count == 0) {
+//		// take available resource
+//		replica = ikglp_get_idx(sem, fq);
+//
+//		ikglp_get_immediate(t, fq, sem, flags);  // unlocks sem->lock
+//
+//		unlock_global_irqrestore(dgl_lock, flags);
+//		raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
+//	}
+//	else
+//	{
+//		// we have to suspend.
+//
+//		wait.task = t;   // THIS IS CRITICALLY IMPORTANT!!!
+//
+//		tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem;  // record where we are blocked
+//		mb();
+//
+//		/* FIXME: interruptible would be nice some day */
+//		set_task_state(t, TASK_UNINTERRUPTIBLE);
+//
+//		if(fq->count < sem->max_fifo_len) {
+//			// enqueue on fq
+//			ikglp_enqueue_on_fq(sem, fq, &wait, flags);  // unlocks sem->lock
+//		}
+//		else {
+//
+//			TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n");
+//
+//			// no room in fifos.  Go to PQ or donors.
+//
+//			if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
+//				// enqueue on PQ
+//				ikglp_enqueue_on_pq(sem, &wait);
+//				unlock_fine_irqrestore(&sem->lock, flags);
+//			}
+//			else {
+//				// enqueue as donor
+//				ikglp_enqueue_on_donor(sem, &wait, flags);	 // unlocks sem->lock
+//			}
+//		}
+//
+//		unlock_global_irqrestore(dgl_lock, flags);
+//		raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
+//
+//		TS_LOCK_SUSPEND;
+//
+//		schedule();
+//
+//		TS_LOCK_RESUME;
+//
+//		fq = ikglp_get_queue(sem, t);
+//		BUG_ON(!fq);
+//
+//		replica = ikglp_get_idx(sem, fq);
+//	}
+//
+//	TRACE_CUR("Acquired lock %d, queue %d\n",
+//			  l->ident, replica);
+//
+//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+//	if(sem->aff_obs) {
+//		return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
+//	}
+//#endif
+//
+//	return replica;
+//}
+
+static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem,
+								   struct fifo_queue *fq,
+								   ikglp_wait_state_t *donor_info)
+{
+	struct task_struct *t = donor_info->task;
+
+	TRACE_CUR("Donor %s/%d being moved to fq %d\n",
+			  t->comm,
+			  t->pid,
+			  ikglp_get_idx(sem, fq));
+
+	binheap_delete(&donor_info->node, &sem->donors);
+
+	__ikglp_enqueue_on_fq(sem, fq, t,
+						  &donor_info->fq_node,
+						  NULL, // already in global_list, so pass null to prevent adding 2nd time.
+						  &donor_info->donee_heap_node);
+
+	// warning:
+	// ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
+}
+
+static void ikglp_move_pq_to_fq(struct ikglp_semaphore *sem,
+								struct fifo_queue *fq,
+								ikglp_wait_state_t *wait)
+{
+	struct task_struct *t = wait->task;
+
+	TRACE_CUR("PQ request %s/%d being moved to fq %d\n",
+			  t->comm,
+			  t->pid,
+			  ikglp_get_idx(sem, fq));
+
+	binheap_delete(&wait->pq_node.node, &sem->priority_queue);
+
+	__ikglp_enqueue_on_fq(sem, fq, t,
+						  &wait->fq_node,
+						  &wait->global_heap_node,
+						  &wait->donee_heap_node);
+	// warning:
+	// ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
+}
+
+static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal(
+	struct ikglp_semaphore* sem)
+{
+	/* must hold sem->lock */
+
+	struct fifo_queue *fq = NULL;
+	struct list_head	*pos;
+	struct task_struct 	*queued;
+	int i;
+
+	for(i = 0; i < sem->nr_replicas; ++i) {
+		if( (sem->fifo_queues[i].count > 1) &&
+		   (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) {
+
+			TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n",
+					  ikglp_get_idx(sem, &sem->fifo_queues[i]),
+					  sem->fifo_queues[i].hp_waiter->comm,
+					  sem->fifo_queues[i].hp_waiter->pid,
+					  (fq) ? ikglp_get_idx(sem, fq) : -1,
+					  (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->comm : "nil") : "nilXX",
+					  (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->pid : -1) : -2);
+
+			fq = &sem->fifo_queues[i];
+
+			WARN_ON(!(fq->hp_waiter));
+		}
+	}
+
+	if(fq) {
+		struct task_struct *max_hp = fq->hp_waiter;
+		ikglp_wait_state_t* ret = NULL;
+
+		TRACE_CUR("Searching for %s/%d on fq %d\n",
+				  max_hp->comm,
+				  max_hp->pid,
+				  ikglp_get_idx(sem, fq));
+
+		BUG_ON(!max_hp);
+
+		list_for_each(pos, &fq->wait.task_list) {
+			wait_queue_t *wait = list_entry(pos, wait_queue_t, task_list);
+
+			queued  = (struct task_struct*) wait->private;
+
+			TRACE_CUR("fq %d entry: %s/%d\n",
+					  ikglp_get_idx(sem, fq),
+					  queued->comm,
+					  queued->pid);
+
+			/* Compare task prios, find high prio task. */
+			if (queued == max_hp) {
+				TRACE_CUR("Found it!\n");
+				ret = container_of(wait, ikglp_wait_state_t, fq_node);
+			}
+		}
+
+		WARN_ON(!ret);
+		return ret;
+	}
+
+	return(NULL);
+}
+
+static void ikglp_steal_to_fq(struct ikglp_semaphore *sem,
+							  struct fifo_queue *fq,
+							  ikglp_wait_state_t *fq_wait)
+{
+	struct task_struct *t = fq_wait->task;
+	struct fifo_queue *fq_steal = fq_wait->donee_heap_node.fq;
+
+	TRACE_CUR("FQ request %s/%d being moved to fq %d\n",
+			  t->comm,
+			  t->pid,
+			  ikglp_get_idx(sem, fq));
+
+	fq_wait->donee_heap_node.fq = fq;  // just to be safe
+
+
+	__remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node);
+	--(fq_steal->count);
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t);
+	}
+#endif
+
+	if(t == fq_steal->hp_waiter) {
+		fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL);
+		TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
+				   ikglp_get_idx(sem, fq_steal),
+				   (fq_steal->hp_waiter) ? fq_steal->hp_waiter->comm : "nil",
+				   (fq_steal->hp_waiter) ? fq_steal->hp_waiter->pid : -1);
+	}
+
+
+	// Update shortest.
+	if(fq_steal->count < sem->shortest_fifo_queue->count) {
+		sem->shortest_fifo_queue = fq_steal;
+	}
+
+	__ikglp_enqueue_on_fq(sem, fq, t,
+						  &fq_wait->fq_node,
+						  NULL,
+						  NULL);
+
+	// warning: We have not checked the priority inheritance of fq's owner yet.
+}
+
+
+static void ikglp_migrate_fq_to_owner_heap_nodes(struct ikglp_semaphore *sem,
+												 struct fifo_queue *fq,
+												 ikglp_wait_state_t *old_wait)
+{
+	struct task_struct *t = old_wait->task;
+
+	BUG_ON(old_wait->donee_heap_node.fq != fq);
+
+	TRACE_TASK(t, "Migrating wait_state to memory of queue %d.\n",
+			   ikglp_get_idx(sem, fq));
+
+	// need to migrate global_heap_node and donee_heap_node off of the stack
+	// to the nodes allocated for the owner of this fq.
+
+	// TODO: Enhance binheap() to perform this operation in place.
+
+	ikglp_del_global_list(sem, t, &old_wait->global_heap_node); // remove
+	fq->global_heap_node = old_wait->global_heap_node;			// copy
+	ikglp_add_global_list(sem, t, &fq->global_heap_node);		// re-add
+
+	binheap_delete(&old_wait->donee_heap_node.node, &sem->donees);  // remove
+	fq->donee_heap_node = old_wait->donee_heap_node;  // copy
+
+	if(fq->donee_heap_node.donor_info) {
+		// let donor know that our location has changed
+		BUG_ON(fq->donee_heap_node.donor_info->donee_info->task != t);	// validate cross-link
+		fq->donee_heap_node.donor_info->donee_info = &fq->donee_heap_node;
+	}
+	INIT_BINHEAP_NODE(&fq->donee_heap_node.node);
+	binheap_add(&fq->donee_heap_node.node, &sem->donees,
+				ikglp_donee_heap_node_t, node);  // re-add
+}
+
+int ikglp_unlock(struct litmus_lock* l)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(l);
+	struct task_struct *t = current;
+	struct task_struct *donee = NULL;
+	struct task_struct *next = NULL;
+	struct task_struct *new_on_fq = NULL;
+	struct fifo_queue *fq_of_new_on_fq = NULL;
+
+	ikglp_wait_state_t *other_donor_info = NULL;
+	struct fifo_queue *to_steal = NULL;
+	int need_steal_prio_reeval = 0;
+	struct fifo_queue *fq;
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	raw_spinlock_t *dgl_lock;
+#endif
+
+	unsigned long flags = 0, real_flags;
+
+	int err = 0;
+
+	fq = ikglp_get_queue(sem, t);  // returns NULL if 't' is not owner.
+
+	if (!fq) {
+		err = -EINVAL;
+		goto out;
+	}
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	dgl_lock = litmus->get_dgl_spinlock(t);
+#endif
+	raw_spin_lock_irqsave(&sem->real_lock, real_flags);
+
+	lock_global_irqsave(dgl_lock, flags);  // TODO: Push this deeper
+	lock_fine_irqsave(&sem->lock, flags);
+
+	TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq));
+
+
+	// Remove 't' from the heaps, but data in nodes will still be good.
+	ikglp_del_global_list(sem, t, &fq->global_heap_node);
+	binheap_delete(&fq->donee_heap_node.node, &sem->donees);
+
+	fq->owner = NULL;  // no longer owned!!
+	--(fq->count);
+	if(fq->count < sem->shortest_fifo_queue->count) {
+		sem->shortest_fifo_queue = fq;
+	}
+	--(sem->nr_in_fifos);
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t);
+		sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t);
+	}
+#endif
+
+	// Move the next request into the FQ and update heaps as needed.
+	// We defer re-evaluation of priorities to later in the function.
+	if(fq->donee_heap_node.donor_info) {  // move my donor to FQ
+		ikglp_wait_state_t *donor_info = fq->donee_heap_node.donor_info;
+
+		new_on_fq = donor_info->task;
+
+		// donor moved to FQ
+		donee = t;
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
+			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+			if(fq_of_new_on_fq->count == 0) {
+				// ignore it?
+//				fq_of_new_on_fq = fq;
+			}
+		}
+		else {
+			fq_of_new_on_fq = fq;
+		}
+#else
+		fq_of_new_on_fq = fq;
+#endif
+
+		TRACE_TASK(t, "Moving MY donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
+				   new_on_fq->comm, new_on_fq->pid,
+				   ikglp_get_idx(sem, fq_of_new_on_fq),
+				   ikglp_get_idx(sem, fq));
+
+
+		ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, donor_info);
+	}
+	else if(!binheap_empty(&sem->donors)) {  // No donor, so move any donor to FQ
+											 // move other donor to FQ
+		// Select a donor
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		other_donor_info = (sem->aff_obs) ?
+			sem->aff_obs->ops->advise_donor_to_fq(sem->aff_obs, fq) :
+			binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
+#else
+		other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
+#endif
+
+		new_on_fq = other_donor_info->task;
+		donee = other_donor_info->donee_info->task;
+
+		// update the donee's heap position.
+		other_donor_info->donee_info->donor_info = NULL;  // clear the cross-link
+		binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
+			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+			if(fq_of_new_on_fq->count == 0) {
+				// ignore it?
+//				fq_of_new_on_fq = fq;
+			}
+		}
+		else {
+			fq_of_new_on_fq = fq;
+		}
+#else
+		fq_of_new_on_fq = fq;
+#endif
+
+		TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
+				   new_on_fq->comm, new_on_fq->pid,
+				   ikglp_get_idx(sem, fq_of_new_on_fq),
+				   ikglp_get_idx(sem, fq));
+
+		ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, other_donor_info);
+	}
+	else if(!binheap_empty(&sem->priority_queue)) {  // No donors, so move PQ
+		ikglp_heap_node_t *pq_node = binheap_top_entry(&sem->priority_queue,
+													   ikglp_heap_node_t, node);
+		ikglp_wait_state_t *pq_wait = container_of(pq_node, ikglp_wait_state_t,
+												   pq_node);
+
+		new_on_fq = pq_wait->task;
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
+			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+			if(fq_of_new_on_fq->count == 0) {
+				// ignore it?
+//				fq_of_new_on_fq = fq;
+			}
+		}
+		else {
+			fq_of_new_on_fq = fq;
+		}
+#else
+		fq_of_new_on_fq = fq;
+#endif
+
+		TRACE_TASK(t, "Moving a pq waiter (%s/%d) to fq %d (non-aff wanted fq %d).\n",
+				   new_on_fq->comm, new_on_fq->pid,
+				   ikglp_get_idx(sem, fq_of_new_on_fq),
+				   ikglp_get_idx(sem, fq));
+
+		ikglp_move_pq_to_fq(sem, fq_of_new_on_fq, pq_wait);
+	}
+	else if(fq->count == 0) {  // No PQ and this queue is empty, so steal.
+		ikglp_wait_state_t *fq_wait;
+
+		TRACE_TASK(t, "Looking to steal a request for fq %d...\n",
+				   ikglp_get_idx(sem, fq));
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		fq_wait = (sem->aff_obs) ?
+			sem->aff_obs->ops->advise_steal(sem->aff_obs, fq) :
+			ikglp_find_hp_waiter_to_steal(sem);
+#else
+		fq_wait = ikglp_find_hp_waiter_to_steal(sem);
+#endif
+
+		if(fq_wait) {
+			to_steal = fq_wait->donee_heap_node.fq;
+
+			new_on_fq = fq_wait->task;
+			fq_of_new_on_fq = fq;
+			need_steal_prio_reeval = (new_on_fq == to_steal->hp_waiter);
+
+			TRACE_TASK(t, "Found %s/%d of fq %d to steal for fq %d...\n",
+					   new_on_fq->comm, new_on_fq->pid,
+					   ikglp_get_idx(sem, to_steal),
+					   ikglp_get_idx(sem, fq));
+
+			ikglp_steal_to_fq(sem, fq, fq_wait);
+		}
+		else {
+			TRACE_TASK(t, "Found nothing to steal for fq %d.\n",
+					   ikglp_get_idx(sem, fq));
+		}
+	}
+	else { // move no one
+	}
+
+	// 't' must drop all priority and clean up data structures before hand-off.
+
+	// DROP ALL INHERITANCE.  IKGLP MUST BE OUTER-MOST
+	raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
+	{
+		int count = 0;
+		while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) {
+			binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks,
+								struct nested_info, hp_binheap_node);
+			++count;
+		}
+		litmus->decrease_prio(t, NULL);
+		WARN_ON(count > 2); // should not be greater than 2.  only local fq inh and donation can be possible.
+	}
+	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
+
+
+
+	// Now patch up other priorities.
+	//
+	// At most one of the following:
+	//   if(donee && donee != t), decrease prio, propagate to owner, or onward
+	//   if(to_steal), update owner's prio (hp_waiter has already been set)
+	//
+
+	BUG_ON((other_donor_info != NULL) && (to_steal != NULL));
+
+	if(other_donor_info) {
+		struct fifo_queue *other_fq = other_donor_info->donee_info->fq;
+
+		BUG_ON(!donee);
+		BUG_ON(donee == t);
+
+		TRACE_TASK(t, "Terminating donation relation of donor %s/%d to donee %s/%d!\n",
+				   other_donor_info->task->comm, other_donor_info->task->pid,
+				   donee->comm, donee->pid);
+
+		// need to terminate donation relation.
+		if(donee == other_fq->owner) {
+			TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n",
+					   donee->comm, donee->pid,
+					   ikglp_get_idx(sem, other_fq));
+
+			ikglp_remove_donation_from_owner(&other_donor_info->prio_donation.hp_binheap_node, other_fq, sem, flags);
+			lock_fine_irqsave(&sem->lock, flags);  // there should be no contention!!!!
+		}
+		else {
+			TRACE_TASK(t, "Donee %s/%d is an blocked in of fq %d.\n",
+					   donee->comm, donee->pid,
+					   ikglp_get_idx(sem, other_fq));
+
+			ikglp_remove_donation_from_fq_waiter(donee, &other_donor_info->prio_donation.hp_binheap_node);
+			if(donee == other_fq->hp_waiter) {
+				TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. Rechecking hp_waiter.\n",
+						   donee->comm, donee->pid,
+						   ikglp_get_idx(sem, other_fq));
+
+				other_fq->hp_waiter = ikglp_find_hp_waiter(other_fq, NULL);
+				TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
+						   ikglp_get_idx(sem, other_fq),
+						   (other_fq->hp_waiter) ? other_fq->hp_waiter->comm : "nil",
+						   (other_fq->hp_waiter) ? other_fq->hp_waiter->pid : -1);
+
+				ikglp_refresh_owners_prio_decrease(other_fq, sem, flags); // unlocks sem->lock.  reacquire it.
+				lock_fine_irqsave(&sem->lock, flags);  // there should be no contention!!!!
+			}
+		}
+	}
+	else if(to_steal) {
+		TRACE_TASK(t, "Rechecking priority inheritance of fq %d, triggered by stealing.\n",
+				   ikglp_get_idx(sem, to_steal));
+
+		if(need_steal_prio_reeval) {
+			ikglp_refresh_owners_prio_decrease(to_steal, sem, flags); // unlocks sem->lock.  reacquire it.
+			lock_fine_irqsave(&sem->lock, flags);  // there should be no contention!!!!
+		}
+	}
+
+	// check for new HP waiter.
+	if(new_on_fq) {
+		if(fq == fq_of_new_on_fq) {
+			// fq->owner is null, so just update the hp_waiter without locking.
+			if(new_on_fq == fq->hp_waiter) {
+				TRACE_TASK(t, "new_on_fq is already hp_waiter.\n",
+						   fq->hp_waiter->comm, fq->hp_waiter->pid);
+				fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);  // set this just to be sure...
+			}
+			else if(litmus->compare(new_on_fq, fq->hp_waiter)) {
+				if(fq->hp_waiter)
+					TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
+							   fq->hp_waiter->comm, fq->hp_waiter->pid);
+				else
+					TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
+
+				fq->hp_waiter = new_on_fq;
+				fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
+
+				TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
+						   ikglp_get_idx(sem, fq),
+						   (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
+						   (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
+			}
+		}
+		else {
+			ikglp_refresh_owners_prio_increase(new_on_fq, fq_of_new_on_fq, sem, flags); // unlocks sem->lock.  reacquire it.
+			lock_fine_irqsave(&sem->lock, flags);  // there should be no contention!!!!
+		}
+	}
+
+wake_kludge:
+	if(waitqueue_active(&fq->wait))
+	{
+		wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
+		ikglp_wait_state_t *fq_wait = container_of(wait, ikglp_wait_state_t, fq_node);
+		next = (struct task_struct*) wait->private;
+
+		__remove_wait_queue(&fq->wait, wait);
+
+		TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
+				  ikglp_get_idx(sem, fq),
+				  next->comm, next->pid);
+
+		// migrate wait-state to fifo-memory.
+		ikglp_migrate_fq_to_owner_heap_nodes(sem, fq, fq_wait);
+
+		/* next becomes the resouce holder */
+		fq->owner = next;
+		tsk_rt(next)->blocked_lock = NULL;
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		if(sem->aff_obs) {
+			sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next);
+		}
+#endif
+
+		/* determine new hp_waiter if necessary */
+		if (next == fq->hp_waiter) {
+
+			TRACE_TASK(next, "was highest-prio waiter\n");
+			/* next has the highest priority --- it doesn't need to
+			 * inherit.  However, we need to make sure that the
+			 * next-highest priority in the queue is reflected in
+			 * hp_waiter. */
+			fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL);
+			TRACE_TASK(next, "New hp_waiter for fq %d is %s/%d!\n",
+					   ikglp_get_idx(sem, fq),
+					   (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
+					   (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
+
+			fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ?
+								effective_priority(fq->hp_waiter) : NULL;
+
+			if (fq->hp_waiter)
+				TRACE_TASK(fq->hp_waiter, "is new highest-prio waiter\n");
+			else
+				TRACE("no further waiters\n");
+
+			raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
+
+//			TRACE_TASK(next, "Heap Before:\n");
+//			print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
+
+			binheap_add(&fq->nest.hp_binheap_node,
+						&tsk_rt(next)->hp_blocked_tasks,
+						struct nested_info,
+						hp_binheap_node);
+
+//			TRACE_TASK(next, "Heap After:\n");
+//			print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
+
+			raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
+		}
+		else {
+			/* Well, if 'next' is not the highest-priority waiter,
+			 * then it (probably) ought to inherit the highest-priority
+			 * waiter's priority. */
+			TRACE_TASK(next, "is not hp_waiter of replica %d. hp_waiter is %s/%d\n",
+					   ikglp_get_idx(sem, fq),
+					   (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
+					   (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
+
+			raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
+
+			binheap_add(&fq->nest.hp_binheap_node,
+						&tsk_rt(next)->hp_blocked_tasks,
+						struct nested_info,
+						hp_binheap_node);
+
+			/* It is possible that 'next' *should* be the hp_waiter, but isn't
+		     * because that update hasn't yet executed (update operation is
+			 * probably blocked on mutex->lock). So only inherit if the top of
+			 * 'next's top heap node is indeed the effective prio. of hp_waiter.
+			 * (We use fq->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
+			 * since the effective priority of hp_waiter can change (and the
+			 * update has not made it to this lock).)
+			 */
+			if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
+												fq->nest.hp_waiter_eff_prio))
+			{
+				if(fq->nest.hp_waiter_eff_prio)
+					litmus->increase_prio(next, fq->nest.hp_waiter_eff_prio);
+				else
+					WARN_ON(1);
+			}
+
+			raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
+		}
+
+
+		// wake up the new resource holder!
+		wake_up_process(next);
+	}
+	if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) {
+		// The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?)
+		// Wake up the new guy too.
+
+		BUG_ON(fq_of_new_on_fq->owner != NULL);
+
+		fq = fq_of_new_on_fq;
+		fq_of_new_on_fq = NULL;
+		goto wake_kludge;
+	}
+
+	unlock_fine_irqrestore(&sem->lock, flags);
+	unlock_global_irqrestore(dgl_lock, flags);
+
+	raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
+
+out:
+	return err;
+}
+
+
+
+int ikglp_close(struct litmus_lock* l)
+{
+	struct task_struct *t = current;
+	struct ikglp_semaphore *sem = ikglp_from_lock(l);
+	unsigned long flags;
+
+	int owner = 0;
+	int i;
+
+	raw_spin_lock_irqsave(&sem->real_lock, flags);
+
+	for(i = 0; i < sem->nr_replicas; ++i) {
+		if(sem->fifo_queues[i].owner == t) {
+			owner = 1;
+			break;
+		}
+	}
+
+	raw_spin_unlock_irqrestore(&sem->real_lock, flags);
+
+	if (owner)
+		ikglp_unlock(l);
+
+	return 0;
+}
+
+void ikglp_free(struct litmus_lock* l)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(l);
+
+	kfree(sem->fifo_queues);
+	kfree(sem);
+}
+
+
+
+struct litmus_lock* ikglp_new(int m,
+							  struct litmus_lock_ops* ops,
+							  void* __user arg)
+{
+	struct ikglp_semaphore* sem;
+	int nr_replicas = 0;
+	int i;
+
+	if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas)))
+	{
+		return(NULL);
+	}
+	if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas)))
+	{
+		return(NULL);
+	}
+	if(nr_replicas < 1)
+	{
+		return(NULL);
+	}
+
+	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
+	if(!sem)
+	{
+		return NULL;
+	}
+
+	sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL);
+	if(!sem->fifo_queues)
+	{
+		kfree(sem);
+		return NULL;
+	}
+
+	sem->litmus_lock.ops = ops;
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+	{
+		__raw_spin_lock_init(&sem->lock, ((struct litmus_lock*)sem)->cheat_lockdep, &((struct litmus_lock*)sem)->key);
+	}
+#else
+	raw_spin_lock_init(&sem->lock);
+#endif
+
+	raw_spin_lock_init(&sem->real_lock);
+
+	sem->nr_replicas = nr_replicas;
+	sem->m = m;
+	sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0);
+	sem->nr_in_fifos = 0;
+
+	TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n",
+		  sem->m,
+		  sem->nr_replicas,
+		  sem->max_fifo_len);
+
+	for(i = 0; i < nr_replicas; ++i)
+	{
+		struct fifo_queue* q = &(sem->fifo_queues[i]);
+
+		q->owner = NULL;
+		q->hp_waiter = NULL;
+		init_waitqueue_head(&q->wait);
+		q->count = 0;
+
+		q->global_heap_node.task = NULL;
+		INIT_BINHEAP_NODE(&q->global_heap_node.node);
+
+		q->donee_heap_node.task = NULL;
+		q->donee_heap_node.donor_info = NULL;
+		q->donee_heap_node.fq = NULL;
+		INIT_BINHEAP_NODE(&q->donee_heap_node.node);
+
+		q->nest.lock = (struct litmus_lock*)sem;
+		q->nest.hp_waiter_eff_prio = NULL;
+		q->nest.hp_waiter_ptr = &q->hp_waiter;
+		INIT_BINHEAP_NODE(&q->nest.hp_binheap_node);
+	}
+
+	sem->shortest_fifo_queue = &sem->fifo_queues[0];
+
+	sem->top_m_size = 0;
+
+	// init heaps
+	INIT_BINHEAP_HANDLE(&sem->top_m, ikglp_min_heap_base_priority_order);
+	INIT_BINHEAP_HANDLE(&sem->not_top_m, ikglp_max_heap_base_priority_order);
+	INIT_BINHEAP_HANDLE(&sem->donees, ikglp_min_heap_donee_order);
+	INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order);
+	INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order);
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	sem->aff_obs = NULL;
+#endif
+
+	return &sem->litmus_lock;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+
+static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
+{
+	int gpu = replica % aff->nr_rsrc;
+	return gpu;
+}
+
+static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica)
+{
+	int gpu = __replica_to_gpu(aff, replica) + aff->offset;
+	return gpu;
+}
+
+static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
+{
+	int replica = gpu - aff->offset;
+	return replica;
+}
+
+
+int ikglp_aff_obs_close(struct affinity_observer* obs)
+{
+	return 0;
+}
+
+void ikglp_aff_obs_free(struct affinity_observer* obs)
+{
+	struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs);
+	kfree(ikglp_aff->nr_cur_users_on_rsrc);
+	kfree(ikglp_aff->q_info);
+	kfree(ikglp_aff);
+}
+
+static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops,
+												   struct ikglp_affinity_ops* ikglp_ops,
+												   void* __user args)
+{
+	struct ikglp_affinity* ikglp_aff;
+	struct gpu_affinity_observer_args aff_args;
+	struct ikglp_semaphore* sem;
+	int i;
+	unsigned long flags;
+
+	if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
+		return(NULL);
+	}
+	if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
+		return(NULL);
+	}
+
+	sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
+
+	if(sem->litmus_lock.type != IKGLP_SEM) {
+		TRACE_CUR("Lock type not supported.  Type = %d\n", sem->litmus_lock.type);
+		return(NULL);
+	}
+
+	if((aff_args.nr_simult_users <= 0) ||
+	   (sem->nr_replicas%aff_args.nr_simult_users != 0)) {
+		TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
+				  "(%d) per replica.  #replicas should be evenly divisible "
+				  "by #simult_users.\n",
+				  sem->litmus_lock.ident,
+				  sem->nr_replicas,
+				  aff_args.nr_simult_users);
+		return(NULL);
+	}
+
+	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
+		TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
+				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
+//		return(NULL);
+	}
+
+	ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
+	if(!ikglp_aff) {
+		return(NULL);
+	}
+
+	ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL);
+	if(!ikglp_aff->q_info) {
+		kfree(ikglp_aff);
+		return(NULL);
+	}
+
+	ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
+	if(!ikglp_aff->nr_cur_users_on_rsrc) {
+		kfree(ikglp_aff->q_info);
+		kfree(ikglp_aff);
+		return(NULL);
+	}
+
+	affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs);
+
+	ikglp_aff->ops = ikglp_ops;
+	ikglp_aff->offset = aff_args.replica_to_gpu_offset;
+	ikglp_aff->nr_simult = aff_args.nr_simult_users;
+	ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
+	ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0;
+
+	TRACE_CUR("GPU affinity_observer: offset = %d, nr_simult = %d, "
+			  "nr_rsrc = %d, relaxed_fifo_len = %d\n",
+			  ikglp_aff->offset, ikglp_aff->nr_simult, ikglp_aff->nr_rsrc,
+			  ikglp_aff->relax_max_fifo_len);
+
+	memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
+
+	for(i = 0; i < sem->nr_replicas; ++i) {
+		ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
+		ikglp_aff->q_info[i].estimated_len = 0;
+
+		// multiple q_info's will point to the same resource (aka GPU) if
+		// aff_args.nr_simult_users > 1
+		ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
+	}
+
+	// attach observer to the lock
+	raw_spin_lock_irqsave(&sem->real_lock, flags);
+	sem->aff_obs = ikglp_aff;
+	raw_spin_unlock_irqrestore(&sem->real_lock, flags);
+
+	return &ikglp_aff->obs;
+}
+
+
+
+
+static int gpu_replica_to_resource(struct ikglp_affinity* aff,
+								   struct fifo_queue* fq) {
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	return(replica_to_gpu(aff, ikglp_get_idx(sem, fq)));
+}
+
+
+// Smart IKGLP Affinity
+
+//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff)
+//{
+//	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+//	struct ikglp_queue_info *shortest = &aff->q_info[0];
+//	int i;
+//
+//	for(i = 1; i < sem->nr_replicas; ++i) {
+//		if(aff->q_info[i].estimated_len < shortest->estimated_len) {
+//			shortest = &aff->q_info[i];
+//		}
+//	}
+//
+//	return(shortest);
+//}
+
+struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
+{
+	// advise_enqueue must be smart as not not break IKGLP rules:
+	//  * No queue can be greater than ceil(m/k) in length.  We may return
+	//    such a queue, but IKGLP will be smart enough as to send requests
+	//    to donors or PQ.
+	//  * Cannot let a queue idle if there exist waiting PQ/donors
+	//      -- needed to guarantee parallel progress of waiters.
+	//
+	// We may be able to relax some of these constraints, but this will have to
+	// be carefully evaluated.
+	//
+	// Huristic strategy: Find the shortest queue that is not full.
+
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	lt_t min_len;
+	int min_nr_users;
+	struct ikglp_queue_info *shortest;
+	struct fifo_queue *to_enqueue;
+	int i;
+	int affinity_gpu;
+
+	int max_fifo_len = (aff->relax_max_fifo_len) ?
+		sem->m : sem->max_fifo_len;
+
+	// simply pick the shortest queue if, we have no affinity, or we have
+	// affinity with the shortest
+	if(unlikely(tsk_rt(t)->last_gpu < 0)) {
+		affinity_gpu = aff->offset;  // first gpu
+		TRACE_CUR("no affinity\n");
+	}
+	else {
+		affinity_gpu = tsk_rt(t)->last_gpu;
+	}
+
+	// all things being equal, let's start with the queue with which we have
+	// affinity.  this helps us maintain affinity even when we don't have
+	// an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
+	shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
+
+	//	if(shortest == aff->shortest_queue) {
+	//		TRACE_CUR("special case: have affinity with shortest queue\n");
+	//		goto out;
+	//	}
+
+	min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
+	min_nr_users = *(shortest->nr_cur_users);
+
+	TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
+			  get_gpu_estimate(t, MIG_LOCAL),
+			  ikglp_get_idx(sem, shortest->q),
+			  shortest->q->count,
+			  min_len);
+
+	for(i = 0; i < sem->nr_replicas; ++i) {
+		if(&aff->q_info[i] != shortest) {
+			if(aff->q_info[i].q->count < max_fifo_len) {
+
+				lt_t est_len =
+					aff->q_info[i].estimated_len +
+					get_gpu_estimate(t,
+								gpu_migration_distance(tsk_rt(t)->last_gpu,
+													replica_to_gpu(aff, i)));
+
+		// queue is smaller, or they're equal and the other has a smaller number
+		// of total users.
+		//
+		// tie-break on the shortest number of simult users.  this only kicks in
+		// when there are more than 1 empty queues.
+				if((shortest->q->count >= max_fifo_len) ||		/* 'shortest' is full and i-th queue is not */
+				   (est_len < min_len) ||						/* i-th queue has shortest length */
+				   ((est_len == min_len) &&						/* equal lengths, but one has fewer over-all users */
+					(*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
+
+					shortest = &aff->q_info[i];
+					min_len = est_len;
+					min_nr_users = *(aff->q_info[i].nr_cur_users);
+				}
+
+				TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
+						  get_gpu_estimate(t,
+								gpu_migration_distance(tsk_rt(t)->last_gpu,
+													   replica_to_gpu(aff, i))),
+						  ikglp_get_idx(sem, aff->q_info[i].q),
+						  aff->q_info[i].q->count,
+						  est_len);
+			}
+			else {
+				TRACE_CUR("queue %d is too long.  ineligible for enqueue.\n",
+						  ikglp_get_idx(sem, aff->q_info[i].q));
+			}
+		}
+	}
+
+	if(shortest->q->count >= max_fifo_len) {
+		TRACE_CUR("selected fq %d is too long, but returning it anyway.\n",
+				  ikglp_get_idx(sem, shortest->q));
+	}
+
+	to_enqueue = shortest->q;
+	TRACE_CUR("enqueue on fq %d (count = %d) (non-aff wanted fq %d)\n",
+			  ikglp_get_idx(sem, to_enqueue),
+			  to_enqueue->count,
+			  ikglp_get_idx(sem, sem->shortest_fifo_queue));
+
+	return to_enqueue;
+
+	//return(sem->shortest_fifo_queue);
+}
+
+
+
+
+static ikglp_wait_state_t* pick_steal(struct ikglp_affinity* aff,
+									  int dest_gpu,
+									  struct fifo_queue* fq)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	ikglp_wait_state_t *wait = NULL;
+	int max_improvement = -(MIG_NONE+1);
+	int replica = ikglp_get_idx(sem, fq);
+
+	if(waitqueue_active(&fq->wait)) {
+		int this_gpu = replica_to_gpu(aff, replica);
+		struct list_head *pos;
+
+		list_for_each(pos, &fq->wait.task_list) {
+			wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
+			ikglp_wait_state_t *tmp_wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
+
+			int tmp_improvement =
+				gpu_migration_distance(this_gpu, tsk_rt(tmp_wait->task)->last_gpu) -
+				gpu_migration_distance(dest_gpu, tsk_rt(tmp_wait->task)->last_gpu);
+
+			if(tmp_improvement > max_improvement) {
+				wait = tmp_wait;
+				max_improvement = tmp_improvement;
+
+				if(max_improvement >= (MIG_NONE-1)) {
+					goto out;
+				}
+			}
+		}
+
+		BUG_ON(!wait);
+	}
+	else {
+		TRACE_CUR("fq %d is empty!\n", replica);
+	}
+
+out:
+
+	TRACE_CUR("Candidate victim from fq %d is %s/%d.  aff improvement = %d.\n",
+			  replica,
+			  (wait) ? wait->task->comm : "nil",
+			  (wait) ? wait->task->pid  : -1,
+			  max_improvement);
+
+	return wait;
+}
+
+
+ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
+										   struct fifo_queue* dst)
+{
+	// Huristic strategy: Find task with greatest improvement in affinity.
+	//
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	ikglp_wait_state_t *to_steal_state = NULL;
+//	ikglp_wait_state_t *default_to_steal_state = ikglp_find_hp_waiter_to_steal(sem);
+	int max_improvement = -(MIG_NONE+1);
+	int replica, i;
+	int dest_gpu;
+
+	replica = ikglp_get_idx(sem, dst);
+	dest_gpu = replica_to_gpu(aff, replica);
+
+	for(i = 0; i < sem->nr_replicas; ++i) {
+		ikglp_wait_state_t *tmp_to_steal_state =
+			pick_steal(aff, dest_gpu, &sem->fifo_queues[i]);
+
+		if(tmp_to_steal_state) {
+			int tmp_improvement =
+				gpu_migration_distance(replica_to_gpu(aff, i), tsk_rt(tmp_to_steal_state->task)->last_gpu) -
+				gpu_migration_distance(dest_gpu, tsk_rt(tmp_to_steal_state->task)->last_gpu);
+
+			if(tmp_improvement > max_improvement) {
+				to_steal_state = tmp_to_steal_state;
+				max_improvement = tmp_improvement;
+
+				if(max_improvement >= (MIG_NONE-1)) {
+					goto out;
+				}
+			}
+		}
+	}
+
+out:
+	if(!to_steal_state) {
+		TRACE_CUR("Could not find anyone to steal.\n");
+	}
+	else {
+		TRACE_CUR("Selected victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
+				  to_steal_state->task->comm, to_steal_state->task->pid,
+				  ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq),
+				  replica_to_gpu(aff, ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq)),
+				  ikglp_get_idx(sem, dst),
+				  dest_gpu,
+				  max_improvement);
+
+//		TRACE_CUR("Non-aff wanted to select victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
+//				  default_to_steal_state->task->comm, default_to_steal_state->task->pid,
+//				  ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq),
+//				  replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
+//				  ikglp_get_idx(sem, dst),
+//				  replica_to_gpu(aff, ikglp_get_idx(sem, dst)),
+//
+//				  gpu_migration_distance(
+//					  replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
+//					  tsk_rt(default_to_steal_state->task)->last_gpu) -
+//				  gpu_migration_distance(dest_gpu, tsk_rt(default_to_steal_state->task)->last_gpu));
+	}
+
+	return(to_steal_state);
+}
+
+
+static inline int has_donor(wait_queue_t* fq_wait)
+{
+	ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
+	return(wait->donee_heap_node.donor_info != NULL);
+}
+
+static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff,
+					  struct fifo_queue* fq,
+					  int* dist_from_head)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	struct task_struct *donee;
+	ikglp_donee_heap_node_t *donee_node;
+	struct task_struct *mth_highest = ikglp_mth_highest(sem);
+
+//	lt_t now = litmus_clock();
+//
+//	TRACE_CUR("fq %d: mth_highest: %s/%d, deadline = %d: (donor) = ??? ",
+//			  ikglp_get_idx(sem, fq),
+//			  mth_highest->comm, mth_highest->pid,
+//			  (int)get_deadline(mth_highest) - now);
+
+	if(fq->owner &&
+	   fq->donee_heap_node.donor_info == NULL &&
+	   mth_highest != fq->owner &&
+	   litmus->__compare(mth_highest, BASE, fq->owner, BASE)) {
+		donee = fq->owner;
+		donee_node = &(fq->donee_heap_node);
+		*dist_from_head = 0;
+
+		BUG_ON(donee != donee_node->task);
+
+		TRACE_CUR("picked owner of fq %d as donee\n",
+				  ikglp_get_idx(sem, fq));
+
+		goto out;
+	}
+	else if(waitqueue_active(&fq->wait)) {
+		struct list_head	*pos;
+
+
+//		TRACE_CUR("fq %d: owner: %s/%d, deadline = %d: (donor) = %s/%d "
+//				  "(mth_highest != fq->owner) = %d "
+//				  "(mth_highest > fq->owner) = %d\n",
+//				  ikglp_get_idx(sem, fq),
+//				  (fq->owner) ? fq->owner->comm : "nil",
+//				  (fq->owner) ? fq->owner->pid : -1,
+//				  (fq->owner) ? (int)get_deadline(fq->owner) - now : -999,
+//				  (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->comm : "nil",
+//				  (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->pid : -1,
+//				  (mth_highest != fq->owner),
+//				  (litmus->__compare(mth_highest, BASE, fq->owner, BASE)));
+
+
+		*dist_from_head = 1;
+
+		// iterating from the start of the queue is nice since this means
+		// the donee will be closer to obtaining a resource.
+		list_for_each(pos, &fq->wait.task_list) {
+			wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
+			ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
+
+//			TRACE_CUR("fq %d: waiter %d: %s/%d, deadline = %d (donor) = %s/%d "
+//					  "(mth_highest != wait->task) = %d "
+//					  "(mth_highest > wait->task) = %d\n",
+//					  ikglp_get_idx(sem, fq),
+//					  dist_from_head,
+//					  wait->task->comm, wait->task->pid,
+//					  (int)get_deadline(wait->task) - now,
+//					  (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->comm : "nil",
+//					  (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->pid : -1,
+//					  (mth_highest != wait->task),
+//					  (litmus->__compare(mth_highest, BASE, wait->task, BASE)));
+
+
+			if(!has_donor(fq_wait) &&
+			   mth_highest != wait->task &&
+			   litmus->__compare(mth_highest, BASE, wait->task, BASE)) {
+				donee = (struct task_struct*) fq_wait->private;
+				donee_node = &wait->donee_heap_node;
+
+				BUG_ON(donee != donee_node->task);
+
+				TRACE_CUR("picked waiter in fq %d as donee\n",
+						  ikglp_get_idx(sem, fq));
+
+				goto out;
+			}
+			++(*dist_from_head);
+		}
+	}
+
+	donee = NULL;
+	donee_node = NULL;
+	//*dist_from_head = sem->max_fifo_len + 1;
+	*dist_from_head = IKGLP_INVAL_DISTANCE;
+
+	TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq));
+
+out:
+
+	TRACE_CUR("Candidate donee for fq %d is %s/%d (dist_from_head = %d)\n",
+			  ikglp_get_idx(sem, fq),
+			  (donee) ? (donee)->comm : "nil",
+			  (donee) ? (donee)->pid  : -1,
+			  *dist_from_head);
+
+	return donee_node;
+}
+
+ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection(
+											struct ikglp_affinity* aff,
+											struct task_struct* donor)
+{
+	// Huristic strategy: Find the highest-priority donee that is waiting on
+	// a queue closest to our affinity.  (1) The donee CANNOT already have a
+	// donor (exception: donee is the lowest-prio task in the donee heap).
+	// (2) Requests in 'top_m' heap are ineligible.
+	//
+	// Further strategy: amongst elible donees waiting for the same GPU, pick
+	// the one closest to the head of the FIFO queue (including owners).
+	//
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	ikglp_donee_heap_node_t *donee_node;
+	gpu_migration_dist_t distance;
+	int start, i, j;
+
+	ikglp_donee_heap_node_t *default_donee;
+	ikglp_wait_state_t *default_donee_donor_info;
+
+	if(tsk_rt(donor)->last_gpu < 0) {
+		// no affinity.  just return the min prio, like standard IKGLP
+		// TODO: Find something closer to the head of the queue??
+		donee_node = binheap_top_entry(&sem->donees,
+									   ikglp_donee_heap_node_t,
+									   node);
+		goto out;
+	}
+
+
+	// Temporarily break any donation relation the default donee (the lowest
+	// prio task in the FIFO queues) to make it eligible for selection below.
+	//
+	// NOTE: The original donor relation *must* be restored, even if we select
+	// the default donee throug affinity-aware selection, before returning
+	// from this function so we don't screw up our heap ordering.
+	// The standard IKGLP algorithm will steal the donor relationship if needed.
+	default_donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
+	default_donee_donor_info = default_donee->donor_info;  // back-up donor relation
+	default_donee->donor_info = NULL;  // temporarily break any donor relation.
+
+	// initialize our search
+	donee_node = NULL;
+	distance = MIG_NONE;
+
+	// TODO: The below search logic may work well for locating nodes to steal
+	// when an FQ goes idle.  Validate this code and apply it to stealing.
+
+	// begin search with affinity GPU.
+	start = gpu_to_base_replica(aff, tsk_rt(donor)->last_gpu);
+	i = start;
+	do {  // "for each gpu" / "for each aff->nr_rsrc"
+		gpu_migration_dist_t temp_distance = gpu_migration_distance(start, i);
+
+		// only interested in queues that will improve our distance
+		if(temp_distance < distance || donee_node == NULL) {
+			int dist_from_head = IKGLP_INVAL_DISTANCE;
+
+			TRACE_CUR("searching for donor on GPU %d", i);
+
+			// visit each queue and pick a donee.  bail as soon as we find
+			// one for this class.
+
+			for(j = 0; j < aff->nr_simult; ++j) {
+				int temp_dist_from_head;
+				ikglp_donee_heap_node_t *temp_donee_node;
+				struct fifo_queue *fq;
+
+				fq = &(sem->fifo_queues[i + j*aff->nr_rsrc]);
+				temp_donee_node = pick_donee(aff, fq, &temp_dist_from_head);
+
+				if(temp_dist_from_head < dist_from_head)
+				{
+					// we check all the FQs for this GPU to spread priorities
+					// out across the queues.  does this decrease jitter?
+					donee_node = temp_donee_node;
+					dist_from_head = temp_dist_from_head;
+				}
+			}
+
+			if(dist_from_head != IKGLP_INVAL_DISTANCE) {
+				TRACE_CUR("found donee %s/%d and is the %d-th waiter.\n",
+						  donee_node->task->comm, donee_node->task->pid,
+						  dist_from_head);
+			}
+			else {
+				TRACE_CUR("found no eligible donors from GPU %d\n", i);
+			}
+		}
+		else {
+			TRACE_CUR("skipping GPU %d (distance = %d, best donor "
+					  "distance = %d)\n", i, temp_distance, distance);
+		}
+
+		i = (i+1 < aff->nr_rsrc) ? i+1 : 0;  // increment with wrap-around
+	} while (i != start);
+
+
+	// restore old donor info state.
+	default_donee->donor_info = default_donee_donor_info;
+
+	if(!donee_node) {
+		donee_node = default_donee;
+
+		TRACE_CUR("Could not find a donee. We have to steal one.\n");
+		WARN_ON(default_donee->donor_info == NULL);
+	}
+
+out:
+
+	TRACE_CUR("Selected donee %s/%d on fq %d (GPU %d) for %s/%d with affinity for GPU %d\n",
+			  donee_node->task->comm, donee_node->task->pid,
+			  ikglp_get_idx(sem, donee_node->fq),
+			  replica_to_gpu(aff, ikglp_get_idx(sem, donee_node->fq)),
+			  donor->comm, donor->pid, tsk_rt(donor)->last_gpu);
+
+	return(donee_node);
+}
+
+
+
+static void __find_closest_donor(int target_gpu,
+								 struct binheap_node* donor_node,
+								 ikglp_wait_state_t** cur_closest,
+								 int* cur_dist)
+{
+	ikglp_wait_state_t *this_donor =
+		binheap_entry(donor_node, ikglp_wait_state_t, node);
+
+	int this_dist =
+		gpu_migration_distance(target_gpu, tsk_rt(this_donor->task)->last_gpu);
+
+//	TRACE_CUR("%s/%d: dist from target = %d\n",
+//			  this_donor->task->comm,
+//			  this_donor->task->pid,
+//			  this_dist);
+
+	if(this_dist < *cur_dist) {
+		// take this donor
+		*cur_dist = this_dist;
+		*cur_closest = this_donor;
+	}
+	else if(this_dist == *cur_dist) {
+		// priority tie-break.  Even though this is a pre-order traversal,
+		// this is a heap, not a binary tree, so we still need to do a priority
+		// comparision.
+		if(!(*cur_closest) ||
+		   litmus->compare(this_donor->task, (*cur_closest)->task)) {
+			*cur_dist = this_dist;
+			*cur_closest = this_donor;
+		}
+	}
+
+    if(donor_node->left) __find_closest_donor(target_gpu, donor_node->left, cur_closest, cur_dist);
+    if(donor_node->right) __find_closest_donor(target_gpu, donor_node->right, cur_closest, cur_dist);
+}
+
+ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
+{
+	// Huristic strategy: Find donor with the closest affinity to fq.
+	// Tie-break on priority.
+
+	// We need to iterate over all the donors to do this.  Unfortunatly,
+	// our donors are organized in a heap.  We'll visit each node with a
+	// recurisve call.  This is realitively safe since there are only sem->m
+	// donors, at most.  We won't recurse too deeply to have to worry about
+	// our stack.  (even with 128 CPUs, our nest depth is at most 7 deep).
+
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	ikglp_wait_state_t *donor = NULL;
+	int distance = MIG_NONE;
+	int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq));
+	ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
+
+	__find_closest_donor(gpu, sem->donors.root, &donor, &distance);
+
+	TRACE_CUR("Selected donor %s/%d (distance = %d) to move to fq %d "
+			  "(non-aff wanted %s/%d). differs = %d\n",
+			  donor->task->comm, donor->task->pid,
+			  distance,
+			  ikglp_get_idx(sem, fq),
+			  default_donor->task->comm, default_donor->task->pid,
+			  (donor->task != default_donor->task)
+			  );
+
+	return(donor);
+}
+
+
+
+void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	int replica = ikglp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
+	struct ikglp_queue_info *info = &aff->q_info[replica];
+	lt_t est_time;
+	lt_t est_len_before;
+
+	if(current == t) {
+		tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
+	}
+
+	est_len_before = info->estimated_len;
+	est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+	info->estimated_len += est_time;
+
+	TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
+			  ikglp_get_idx(sem, info->q),
+			  est_len_before, est_time,
+			  info->estimated_len);
+
+	//	if(aff->shortest_queue == info) {
+	//		// we may no longer be the shortest
+	//		aff->shortest_queue = ikglp_aff_find_shortest(aff);
+	//
+	//		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+	//				  ikglp_get_idx(sem, aff->shortest_queue->q),
+	//				  aff->shortest_queue->q->count,
+	//				  aff->shortest_queue->estimated_len);
+	//	}
+}
+
+void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	int replica = ikglp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
+	struct ikglp_queue_info *info = &aff->q_info[replica];
+	lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+
+	if(est_time > info->estimated_len) {
+		WARN_ON(1);
+		info->estimated_len = 0;
+	}
+	else {
+		info->estimated_len -= est_time;
+	}
+
+	TRACE_CUR("fq %d est len is now %llu\n",
+			  ikglp_get_idx(sem, info->q),
+			  info->estimated_len);
+
+	// check to see if we're the shortest queue now.
+	//	if((aff->shortest_queue != info) &&
+	//	   (aff->shortest_queue->estimated_len > info->estimated_len)) {
+	//
+	//		aff->shortest_queue = info;
+	//
+	//		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+	//				  ikglp_get_idx(sem, info->q),
+	//				  info->q->count,
+	//				  info->estimated_len);
+	//	}
+}
+
+void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
+							   struct fifo_queue* fq,
+							   struct task_struct* t)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	int replica = ikglp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
+
+	tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
+
+	TRACE_CUR("%s/%d acquired gpu %d (prev = %d).  migration type = %d\n",
+			  t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration);
+
+	// count the number or resource holders
+	++(*(aff->q_info[replica].nr_cur_users));
+
+	reg_nv_device(gpu, 1, t);  // register
+
+	tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
+	reset_gpu_tracker(t);
+	start_gpu_tracker(t);
+}
+
+void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
+							struct fifo_queue* fq,
+							struct task_struct* t)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	int replica = ikglp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
+	lt_t est_time;
+
+	stop_gpu_tracker(t);  // stop the tracker before we do anything else.
+
+	est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+
+	// count the number or resource holders
+	--(*(aff->q_info[replica].nr_cur_users));
+
+	reg_nv_device(gpu, 0, t);	// unregister
+
+	// update estimates
+	update_gpu_estimate(t, get_gpu_time(t));
+
+	TRACE_CUR("%s/%d freed gpu %d (prev = %d).  mig type = %d.  actual time was %llu.  "
+			  "estimated was %llu.  diff is %d\n",
+			  t->comm, t->pid, gpu, tsk_rt(t)->last_gpu,
+			  tsk_rt(t)->gpu_migration,
+			  get_gpu_time(t),
+			  est_time,
+			  (long long)get_gpu_time(t) - (long long)est_time);
+
+	tsk_rt(t)->last_gpu = gpu;
+}
+
+struct ikglp_affinity_ops gpu_ikglp_affinity =
+{
+	.advise_enqueue = gpu_ikglp_advise_enqueue,
+	.advise_steal = gpu_ikglp_advise_steal,
+	.advise_donee_selection = gpu_ikglp_advise_donee_selection,
+	.advise_donor_to_fq = gpu_ikglp_advise_donor_to_fq,
+
+	.notify_enqueue = gpu_ikglp_notify_enqueue,
+	.notify_dequeue = gpu_ikglp_notify_dequeue,
+	.notify_acquired = gpu_ikglp_notify_acquired,
+	.notify_freed = gpu_ikglp_notify_freed,
+
+	.replica_to_resource = gpu_replica_to_resource,
+};
+
+struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
+												void* __user args)
+{
+	return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args);
+}
+
+
+
+
+
+
+
+
+// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
+
+struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	int min_count;
+	int min_nr_users;
+	struct ikglp_queue_info *shortest;
+	struct fifo_queue *to_enqueue;
+	int i;
+
+	//	TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
+
+	shortest = &aff->q_info[0];
+	min_count = shortest->q->count;
+	min_nr_users = *(shortest->nr_cur_users);
+
+	TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
+			  ikglp_get_idx(sem, shortest->q),
+			  shortest->q->count,
+			  min_nr_users);
+
+	for(i = 1; i < sem->nr_replicas; ++i) {
+		int len = aff->q_info[i].q->count;
+
+		// queue is smaller, or they're equal and the other has a smaller number
+		// of total users.
+		//
+		// tie-break on the shortest number of simult users.  this only kicks in
+		// when there are more than 1 empty queues.
+		if((len < min_count) ||
+		   ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
+			shortest = &aff->q_info[i];
+			min_count = shortest->q->count;
+			min_nr_users = *(aff->q_info[i].nr_cur_users);
+		}
+
+		TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
+				  ikglp_get_idx(sem, aff->q_info[i].q),
+				  aff->q_info[i].q->count,
+				  *(aff->q_info[i].nr_cur_users));
+	}
+
+	to_enqueue = shortest->q;
+	TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
+			  ikglp_get_idx(sem, to_enqueue),
+			  ikglp_get_idx(sem, sem->shortest_fifo_queue));
+
+	return to_enqueue;
+}
+
+ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
+												  struct fifo_queue* dst)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	//	TRACE_CUR("Simple GPU ikglp advise_steal invoked\n");
+	return ikglp_find_hp_waiter_to_steal(sem);
+}
+
+ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff, struct task_struct* donor)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
+	return(donee);
+}
+
+ikglp_wait_state_t* simple_gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
+	return(donor);
+}
+
+void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+	//	TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n");
+}
+
+void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+	//	TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n");
+}
+
+void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	int replica = ikglp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
+
+	//	TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n");
+
+	// count the number or resource holders
+	++(*(aff->q_info[replica].nr_cur_users));
+
+	reg_nv_device(gpu, 1, t);  // register
+}
+
+void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	int replica = ikglp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
+
+	//	TRACE_CUR("Simple GPU ikglp notify_freed invoked\n");
+	// count the number or resource holders
+	--(*(aff->q_info[replica].nr_cur_users));
+
+	reg_nv_device(gpu, 0, t);	// unregister
+}
+
+struct ikglp_affinity_ops simple_gpu_ikglp_affinity =
+{
+	.advise_enqueue = simple_gpu_ikglp_advise_enqueue,
+	.advise_steal = simple_gpu_ikglp_advise_steal,
+	.advise_donee_selection = simple_gpu_ikglp_advise_donee_selection,
+	.advise_donor_to_fq = simple_gpu_ikglp_advise_donor_to_fq,
+
+	.notify_enqueue = simple_gpu_ikglp_notify_enqueue,
+	.notify_dequeue = simple_gpu_ikglp_notify_dequeue,
+	.notify_acquired = simple_gpu_ikglp_notify_acquired,
+	.notify_freed = simple_gpu_ikglp_notify_freed,
+
+	.replica_to_resource = gpu_replica_to_resource,
+};
+
+struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
+													   void* __user args)
+{
+	return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args);
+}
+
+#endif
+
+
+
+
+
+
+
+
+
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 36e314625d86..1d97462cc128 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -10,8 +10,21 @@ void prepare_for_next_period(struct task_struct *t)
 {
 	BUG_ON(!t);
 	/* prepare next release */
-	t->rt_param.job_params.release   = t->rt_param.job_params.deadline;
-	t->rt_param.job_params.deadline += get_rt_period(t);
+
+	if(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT_W_SLIP) {
+		/* allow the release point to slip if we've passed our deadline. */
+		lt_t now = litmus_clock();
+		t->rt_param.job_params.release =
+			(t->rt_param.job_params.deadline < now) ?
+				now : t->rt_param.job_params.deadline;
+		t->rt_param.job_params.deadline =
+			t->rt_param.job_params.release + get_rt_period(t);
+	}
+	else {
+		t->rt_param.job_params.release   = t->rt_param.job_params.deadline;
+		t->rt_param.job_params.deadline += get_rt_period(t);
+	}
+
 	t->rt_param.job_params.exec_time = 0;
 	/* update job sequence number */
 	t->rt_param.job_params.job_no++;
diff --git a/litmus/kexclu_affinity.c b/litmus/kexclu_affinity.c
new file mode 100644
index 000000000000..5ef5e54d600d
--- /dev/null
+++ b/litmus/kexclu_affinity.c
@@ -0,0 +1,92 @@
+#include <litmus/fdso.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/trace.h>
+#include <litmus/litmus.h>
+#include <litmus/locking.h>
+
+#include <litmus/kexclu_affinity.h>
+
+static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg);
+static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg);
+static int close_generic_aff_obs(struct od_table_entry* entry);
+static void destroy_generic_aff_obs(obj_type_t type, void* sem);
+
+struct fdso_ops generic_affinity_ops = {
+	.create  = create_generic_aff_obs,
+	.open    = open_generic_aff_obs,
+	.close   = close_generic_aff_obs,
+	.destroy = destroy_generic_aff_obs
+};
+
+static atomic_t aff_obs_id_gen = ATOMIC_INIT(0);
+
+static inline bool is_affinity_observer(struct od_table_entry *entry)
+{
+	return (entry->class == &generic_affinity_ops);
+}
+
+static inline struct affinity_observer* get_affinity_observer(struct od_table_entry* entry)
+{
+	BUG_ON(!is_affinity_observer(entry));
+	return (struct affinity_observer*) entry->obj->obj;
+}
+
+static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg)
+{
+	struct affinity_observer* aff_obs;
+	int err;
+
+	err = litmus->allocate_aff_obs(&aff_obs, type, arg);
+	if (err == 0) {
+		BUG_ON(!aff_obs->lock);
+		aff_obs->type = type;
+		*obj_ref = aff_obs;
+    }
+	return err;
+}
+
+static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg)
+{
+	struct affinity_observer* aff_obs = get_affinity_observer(entry);
+	if (aff_obs->ops->open)
+		return aff_obs->ops->open(aff_obs, arg);
+	else
+		return 0; /* default: any task can open it */
+}
+
+static int close_generic_aff_obs(struct od_table_entry* entry)
+{
+	struct affinity_observer* aff_obs = get_affinity_observer(entry);
+	if (aff_obs->ops->close)
+		return aff_obs->ops->close(aff_obs);
+	else
+		return 0; /* default: closing succeeds */
+}
+
+static void destroy_generic_aff_obs(obj_type_t type, void* obj)
+{
+	struct affinity_observer* aff_obs = (struct affinity_observer*) obj;
+	aff_obs->ops->deallocate(aff_obs);
+}
+
+
+struct litmus_lock* get_lock_from_od(int od)
+{
+	extern struct fdso_ops generic_lock_ops;
+
+	struct od_table_entry *entry = get_entry_for_od(od);
+
+	if(entry && entry->class == &generic_lock_ops) {
+		return (struct litmus_lock*) entry->obj->obj;
+	}
+	return NULL;
+}
+
+void affinity_observer_new(struct affinity_observer* aff,
+						   struct affinity_observer_ops* ops,
+						   struct affinity_observer_args* args)
+{
+	aff->ops = ops;
+	aff->lock = get_lock_from_od(args->lock_od);
+	aff->ident = atomic_inc_return(&aff_obs_id_gen);
+}
\ No newline at end of file
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
new file mode 100644
index 000000000000..bff857ed8d4e
--- /dev/null
+++ b/litmus/kfmlp_lock.c
@@ -0,0 +1,1002 @@
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <litmus/trace.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/fdso.h>
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#include <litmus/nvidia_info.h>
+#endif
+
+#include <litmus/kfmlp_lock.h>
+
+static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
+								struct kfmlp_queue* queue)
+{
+	return (queue - &sem->queues[0]);
+}
+
+static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
+												  struct task_struct* holder)
+{
+	int i;
+	for(i = 0; i < sem->num_resources; ++i)
+		if(sem->queues[i].owner == holder)
+			return(&sem->queues[i]);
+	return(NULL);
+}
+
+/* caller is responsible for locking */
+static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
+												struct task_struct *skip)
+{
+	struct list_head	*pos;
+	struct task_struct 	*queued, *found = NULL;
+
+	list_for_each(pos, &kqueue->wait.task_list) {
+		queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
+												   task_list)->private;
+
+		/* Compare task prios, find high prio task. */
+		//if (queued != skip && edf_higher_prio(queued, found))
+		if (queued != skip && litmus->compare(queued, found))
+			found = queued;
+	}
+	return found;
+}
+
+static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* sem,
+													  struct kfmlp_queue* search_start)
+{
+	// we start our search at search_start instead of at the beginning of the
+	// queue list to load-balance across all resources.
+	struct kfmlp_queue* step = search_start;
+	struct kfmlp_queue* shortest = sem->shortest_queue;
+
+	do
+	{
+		step = (step+1 != &sem->queues[sem->num_resources]) ?
+		step+1 : &sem->queues[0];
+
+		if(step->count < shortest->count)
+		{
+			shortest = step;
+			if(step->count == 0)
+				break; /* can't get any shorter */
+		}
+
+	}while(step != search_start);
+
+	return(shortest);
+}
+
+
+static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
+												 wait_queue_t** to_steal,
+												 struct kfmlp_queue** to_steal_from)
+{
+	/* must hold sem->lock */
+
+	int i;
+
+	*to_steal = NULL;
+	*to_steal_from = NULL;
+
+	for(i = 0; i < sem->num_resources; ++i)
+	{
+		if( (sem->queues[i].count > 1) &&
+		   ((*to_steal_from == NULL) ||
+			//(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
+			(litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) )
+		{
+			*to_steal_from = &sem->queues[i];
+		}
+	}
+
+	if(*to_steal_from)
+	{
+		struct list_head *pos;
+		struct task_struct *target = (*to_steal_from)->hp_waiter;
+
+		TRACE_CUR("want to steal hp_waiter (%s/%d) from queue %d\n",
+				  target->comm,
+				  target->pid,
+				  kfmlp_get_idx(sem, *to_steal_from));
+
+		list_for_each(pos, &(*to_steal_from)->wait.task_list)
+		{
+			wait_queue_t *node = list_entry(pos, wait_queue_t, task_list);
+			struct task_struct *queued = (struct task_struct*) node->private;
+			/* Compare task prios, find high prio task. */
+			if (queued == target)
+			{
+				*to_steal = node;
+
+				TRACE_CUR("steal: selected %s/%d from queue %d\n",
+						  queued->comm, queued->pid,
+						  kfmlp_get_idx(sem, *to_steal_from));
+
+				return queued;
+			}
+		}
+
+		TRACE_CUR("Could not find %s/%d in queue %d!!!  THIS IS A BUG!\n",
+				  target->comm,
+				  target->pid,
+				  kfmlp_get_idx(sem, *to_steal_from));
+	}
+
+	return NULL;
+}
+
+static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
+							 struct kfmlp_queue *dst,
+							 wait_queue_t *wait,
+							 struct kfmlp_queue *src)
+{
+	struct task_struct* t = (struct task_struct*) wait->private;
+
+	__remove_wait_queue(&src->wait, wait);
+	--(src->count);
+
+	if(t == src->hp_waiter) {
+		src->hp_waiter = kfmlp_find_hp_waiter(src, NULL);
+
+		TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
+				  kfmlp_get_idx(sem, src),
+				  (src->hp_waiter) ? src->hp_waiter->comm : "nil",
+				  (src->hp_waiter) ? src->hp_waiter->pid : -1);
+
+		if(src->owner && tsk_rt(src->owner)->inh_task == t) {
+			litmus->decrease_prio(src->owner, src->hp_waiter);
+		}
+	}
+
+	if(sem->shortest_queue->count > src->count) {
+		sem->shortest_queue = src;
+		TRACE_CUR("queue %d is the shortest\n", kfmlp_get_idx(sem, sem->shortest_queue));
+	}
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t);
+	}
+#endif
+
+	init_waitqueue_entry(wait, t);
+	__add_wait_queue_tail_exclusive(&dst->wait, wait);
+	++(dst->count);
+
+	if(litmus->compare(t, dst->hp_waiter)) {
+		dst->hp_waiter = t;
+
+		TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
+				  kfmlp_get_idx(sem, dst),
+				  t->comm, t->pid);
+
+		if(dst->owner && litmus->compare(t, dst->owner))
+		{
+			litmus->increase_prio(dst->owner, t);
+		}
+	}
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t);
+	}
+#endif
+}
+
+
+int kfmlp_lock(struct litmus_lock* l)
+{
+	struct task_struct* t = current;
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
+	struct kfmlp_queue* my_queue = NULL;
+	wait_queue_t wait;
+	unsigned long flags;
+
+	if (!is_realtime(t))
+		return -EPERM;
+
+	spin_lock_irqsave(&sem->lock, flags);
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t);
+	}
+	if(!my_queue) {
+		my_queue = sem->shortest_queue;
+	}
+#else
+	my_queue = sem->shortest_queue;
+#endif
+
+	if (my_queue->owner) {
+		/* resource is not free => must suspend and wait */
+		TRACE_CUR("queue %d: Resource is not free => must suspend and wait. (queue size = %d)\n",
+				  kfmlp_get_idx(sem, my_queue),
+				  my_queue->count);
+
+		init_waitqueue_entry(&wait, t);
+
+		/* FIXME: interruptible would be nice some day */
+		set_task_state(t, TASK_UNINTERRUPTIBLE);
+
+		__add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
+
+		TRACE_CUR("queue %d: hp_waiter is currently %s/%d\n",
+				  kfmlp_get_idx(sem, my_queue),
+				  (my_queue->hp_waiter) ? my_queue->hp_waiter->comm : "nil",
+				  (my_queue->hp_waiter) ? my_queue->hp_waiter->pid : -1);
+
+		/* check if we need to activate priority inheritance */
+		//if (edf_higher_prio(t, my_queue->hp_waiter))
+		if (litmus->compare(t, my_queue->hp_waiter)) {
+			my_queue->hp_waiter = t;
+			TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
+					  kfmlp_get_idx(sem, my_queue),
+					  t->comm, t->pid);
+
+			//if (edf_higher_prio(t, my_queue->owner))
+			if (litmus->compare(t, my_queue->owner)) {
+				litmus->increase_prio(my_queue->owner, my_queue->hp_waiter);
+			}
+		}
+
+		++(my_queue->count);
+
+		if(my_queue == sem->shortest_queue) {
+			sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
+			TRACE_CUR("queue %d is the shortest\n",
+					  kfmlp_get_idx(sem, sem->shortest_queue));
+		}
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		if(sem->aff_obs) {
+			sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
+		}
+#endif
+
+		/* release lock before sleeping */
+		spin_unlock_irqrestore(&sem->lock, flags);
+
+		/* We depend on the FIFO order.  Thus, we don't need to recheck
+		 * when we wake up; we are guaranteed to have the lock since
+		 * there is only one wake up per release (or steal).
+		 */
+		schedule();
+
+
+		if(my_queue->owner == t) {
+			TRACE_CUR("queue %d: acquired through waiting\n",
+					  kfmlp_get_idx(sem, my_queue));
+		}
+		else {
+			/* this case may happen if our wait entry was stolen
+			 between queues. record where we went. */
+			my_queue = kfmlp_get_queue(sem, t);
+
+			BUG_ON(!my_queue);
+			TRACE_CUR("queue %d: acquired through stealing\n",
+					  kfmlp_get_idx(sem, my_queue));
+		}
+	}
+	else {
+		TRACE_CUR("queue %d: acquired immediately\n",
+				  kfmlp_get_idx(sem, my_queue));
+
+		my_queue->owner = t;
+
+		++(my_queue->count);
+
+		if(my_queue == sem->shortest_queue) {
+			sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
+			TRACE_CUR("queue %d is the shortest\n",
+					  kfmlp_get_idx(sem, sem->shortest_queue));
+		}
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		if(sem->aff_obs) {
+			sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
+			sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t);
+		}
+#endif
+
+		spin_unlock_irqrestore(&sem->lock, flags);
+	}
+
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue);
+	}
+#endif
+	return kfmlp_get_idx(sem, my_queue);
+}
+
+
+int kfmlp_unlock(struct litmus_lock* l)
+{
+	struct task_struct *t = current, *next;
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
+	struct kfmlp_queue *my_queue, *to_steal_from;
+	unsigned long flags;
+	int err = 0;
+
+	my_queue = kfmlp_get_queue(sem, t);
+
+	if (!my_queue) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	spin_lock_irqsave(&sem->lock, flags);
+
+	TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue));
+
+	my_queue->owner = NULL;  // clear ownership
+	--(my_queue->count);
+
+	if(my_queue->count < sem->shortest_queue->count)
+	{
+		sem->shortest_queue = my_queue;
+		TRACE_CUR("queue %d is the shortest\n",
+				  kfmlp_get_idx(sem, sem->shortest_queue));
+	}
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	if(sem->aff_obs) {
+		sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t);
+		sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t);
+	}
+#endif
+
+	/* we lose the benefit of priority inheritance (if any) */
+	if (tsk_rt(t)->inh_task)
+		litmus->decrease_prio(t, NULL);
+
+
+	/* check if there are jobs waiting for this resource */
+RETRY:
+	next = __waitqueue_remove_first(&my_queue->wait);
+	if (next) {
+		/* next becomes the resouce holder */
+		my_queue->owner = next;
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		if(sem->aff_obs) {
+			sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next);
+		}
+#endif
+
+		TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
+				  kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
+
+		/* determine new hp_waiter if necessary */
+		if (next == my_queue->hp_waiter) {
+			TRACE_TASK(next, "was highest-prio waiter\n");
+			my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
+			if (my_queue->hp_waiter)
+				TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
+			else
+				TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
+		} else {
+			/* Well, if next is not the highest-priority waiter,
+			 * then it ought to inherit the highest-priority
+			 * waiter's priority. */
+			litmus->increase_prio(next, my_queue->hp_waiter);
+		}
+
+		/* wake up next */
+		wake_up_process(next);
+	}
+	else {
+		// TODO: put this stealing logic before we attempt to release
+		// our resource.  (simplifies code and gets rid of ugly goto RETRY.
+		wait_queue_t *wait;
+
+		TRACE_CUR("queue %d: looking to steal someone...\n",
+				  kfmlp_get_idx(sem, my_queue));
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+		next = (sem->aff_obs) ?
+			sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) :
+			kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
+#else
+		next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
+#endif
+
+		if(next) {
+			TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
+					  kfmlp_get_idx(sem, my_queue),
+					  next->comm, next->pid,
+					  kfmlp_get_idx(sem, to_steal_from));
+
+			kfmlp_steal_node(sem, my_queue, wait, to_steal_from);
+
+			goto RETRY;  // will succeed this time.
+		}
+		else {
+			TRACE_CUR("queue %d: no one to steal.\n",
+					  kfmlp_get_idx(sem, my_queue));
+		}
+	}
+
+	spin_unlock_irqrestore(&sem->lock, flags);
+
+out:
+	return err;
+}
+
+int kfmlp_close(struct litmus_lock* l)
+{
+	struct task_struct *t = current;
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
+	struct kfmlp_queue *my_queue;
+	unsigned long flags;
+
+	int owner;
+
+	spin_lock_irqsave(&sem->lock, flags);
+
+	my_queue = kfmlp_get_queue(sem, t);
+	owner = (my_queue) ? (my_queue->owner == t) : 0;
+
+	spin_unlock_irqrestore(&sem->lock, flags);
+
+	if (owner)
+		kfmlp_unlock(l);
+
+	return 0;
+}
+
+void kfmlp_free(struct litmus_lock* l)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
+	kfree(sem->queues);
+	kfree(sem);
+}
+
+
+
+struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
+{
+	struct kfmlp_semaphore* sem;
+	int num_resources = 0;
+	int i;
+
+	if(!access_ok(VERIFY_READ, args, sizeof(num_resources)))
+	{
+		return(NULL);
+	}
+	if(__copy_from_user(&num_resources, args, sizeof(num_resources)))
+	{
+		return(NULL);
+	}
+	if(num_resources < 1)
+	{
+		return(NULL);
+	}
+
+	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
+	if(!sem)
+	{
+		return(NULL);
+	}
+
+	sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
+	if(!sem->queues)
+	{
+		kfree(sem);
+		return(NULL);
+	}
+
+	sem->litmus_lock.ops = ops;
+	spin_lock_init(&sem->lock);
+	sem->num_resources = num_resources;
+
+	for(i = 0; i < num_resources; ++i)
+	{
+		sem->queues[i].owner = NULL;
+		sem->queues[i].hp_waiter = NULL;
+		init_waitqueue_head(&sem->queues[i].wait);
+		sem->queues[i].count = 0;
+	}
+
+	sem->shortest_queue = &sem->queues[0];
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	sem->aff_obs = NULL;
+#endif
+
+	return &sem->litmus_lock;
+}
+
+
+
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+
+static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica)
+{
+	int gpu = replica % aff->nr_rsrc;
+	return gpu;
+}
+
+static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica)
+{
+	int gpu = __replica_to_gpu(aff, replica) + aff->offset;
+	return gpu;
+}
+
+static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu)
+{
+	int replica = gpu - aff->offset;
+	return replica;
+}
+
+
+int kfmlp_aff_obs_close(struct affinity_observer* obs)
+{
+	return 0;
+}
+
+void kfmlp_aff_obs_free(struct affinity_observer* obs)
+{
+	struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
+	kfree(kfmlp_aff->nr_cur_users_on_rsrc);
+	kfree(kfmlp_aff->q_info);
+	kfree(kfmlp_aff);
+}
+
+static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops,
+												   struct kfmlp_affinity_ops* kfmlp_ops,
+												   void* __user args)
+{
+	struct kfmlp_affinity* kfmlp_aff;
+	struct gpu_affinity_observer_args aff_args;
+	struct kfmlp_semaphore* sem;
+	int i;
+	unsigned long flags;
+
+	if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
+		return(NULL);
+	}
+	if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
+		return(NULL);
+	}
+
+	sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
+
+	if(sem->litmus_lock.type != KFMLP_SEM) {
+		TRACE_CUR("Lock type not supported.  Type = %d\n", sem->litmus_lock.type);
+		return(NULL);
+	}
+
+	if((aff_args.nr_simult_users <= 0) ||
+	   (sem->num_resources%aff_args.nr_simult_users != 0)) {
+		TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
+				  "(%d) per replica.  #replicas should be evenly divisible "
+				  "by #simult_users.\n",
+				  sem->litmus_lock.ident,
+				  sem->num_resources,
+				  aff_args.nr_simult_users);
+		return(NULL);
+	}
+
+	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
+		TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
+				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
+//		return(NULL);
+	}
+
+	kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
+	if(!kfmlp_aff) {
+		return(NULL);
+	}
+
+	kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
+	if(!kfmlp_aff->q_info) {
+		kfree(kfmlp_aff);
+		return(NULL);
+	}
+
+	kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
+	if(!kfmlp_aff->nr_cur_users_on_rsrc) {
+		kfree(kfmlp_aff->q_info);
+		kfree(kfmlp_aff);
+		return(NULL);
+	}
+
+	affinity_observer_new(&kfmlp_aff->obs, ops, &aff_args.obs);
+
+	kfmlp_aff->ops = kfmlp_ops;
+	kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
+	kfmlp_aff->nr_simult = aff_args.nr_simult_users;
+	kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
+
+	memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
+
+	for(i = 0; i < sem->num_resources; ++i) {
+		kfmlp_aff->q_info[i].q = &sem->queues[i];
+		kfmlp_aff->q_info[i].estimated_len = 0;
+
+		// multiple q_info's will point to the same resource (aka GPU) if
+		// aff_args.nr_simult_users > 1
+		kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)];
+	}
+
+	// attach observer to the lock
+	spin_lock_irqsave(&sem->lock, flags);
+	sem->aff_obs = kfmlp_aff;
+	spin_unlock_irqrestore(&sem->lock, flags);
+
+	return &kfmlp_aff->obs;
+}
+
+
+
+
+static int gpu_replica_to_resource(struct kfmlp_affinity* aff,
+								   struct kfmlp_queue* fq) {
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq)));
+}
+
+
+// Smart KFMLP Affinity
+
+//static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
+//{
+//	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+//	struct kfmlp_queue_info *shortest = &aff->q_info[0];
+//	int i;
+//
+//	for(i = 1; i < sem->num_resources; ++i) {
+//		if(aff->q_info[i].estimated_len < shortest->estimated_len) {
+//			shortest = &aff->q_info[i];
+//		}
+//	}
+//
+//	return(shortest);
+//}
+
+struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	lt_t min_len;
+	int min_nr_users;
+	struct kfmlp_queue_info *shortest;
+	struct kfmlp_queue *to_enqueue;
+	int i;
+	int affinity_gpu;
+
+	// simply pick the shortest queue if, we have no affinity, or we have
+	// affinity with the shortest
+	if(unlikely(tsk_rt(t)->last_gpu < 0)) {
+		affinity_gpu = aff->offset;  // first gpu
+		TRACE_CUR("no affinity\n");
+	}
+	else {
+		affinity_gpu = tsk_rt(t)->last_gpu;
+	}
+
+	// all things being equal, let's start with the queue with which we have
+	// affinity.  this helps us maintain affinity even when we don't have
+	// an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
+	shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
+
+//	if(shortest == aff->shortest_queue) {
+//		TRACE_CUR("special case: have affinity with shortest queue\n");
+//		goto out;
+//	}
+
+	min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
+	min_nr_users = *(shortest->nr_cur_users);
+
+	TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
+			  get_gpu_estimate(t, MIG_LOCAL),
+			  kfmlp_get_idx(sem, shortest->q),
+			  min_len);
+
+	for(i = 0; i < sem->num_resources; ++i) {
+		if(&aff->q_info[i] != shortest) {
+
+			lt_t est_len =
+				aff->q_info[i].estimated_len +
+				get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
+
+			// queue is smaller, or they're equal and the other has a smaller number
+			// of total users.
+			//
+			// tie-break on the shortest number of simult users.  this only kicks in
+			// when there are more than 1 empty queues.
+			if((est_len < min_len) ||
+			   ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
+				shortest = &aff->q_info[i];
+				min_len = est_len;
+				min_nr_users = *(aff->q_info[i].nr_cur_users);
+			}
+
+			TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
+					  get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
+					  kfmlp_get_idx(sem, aff->q_info[i].q),
+					  est_len);
+		}
+	}
+
+	to_enqueue = shortest->q;
+	TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
+			  kfmlp_get_idx(sem, to_enqueue),
+			  kfmlp_get_idx(sem, sem->shortest_queue));
+
+	return to_enqueue;
+}
+
+struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+
+	// For now, just steal highest priority waiter
+	// TODO: Implement affinity-aware stealing.
+
+	return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
+}
+
+
+void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int replica = kfmlp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
+	struct kfmlp_queue_info *info = &aff->q_info[replica];
+	lt_t est_time;
+	lt_t est_len_before;
+
+	if(current == t) {
+		tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
+	}
+
+	est_len_before = info->estimated_len;
+	est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+	info->estimated_len += est_time;
+
+	TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
+			  kfmlp_get_idx(sem, info->q),
+			  est_len_before, est_time,
+			  info->estimated_len);
+
+//	if(aff->shortest_queue == info) {
+//		// we may no longer be the shortest
+//		aff->shortest_queue = kfmlp_aff_find_shortest(aff);
+//
+//		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+//				  kfmlp_get_idx(sem, aff->shortest_queue->q),
+//				  aff->shortest_queue->q->count,
+//				  aff->shortest_queue->estimated_len);
+//	}
+}
+
+void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int replica = kfmlp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
+	struct kfmlp_queue_info *info = &aff->q_info[replica];
+	lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+
+	if(est_time > info->estimated_len) {
+		WARN_ON(1);
+		info->estimated_len = 0;
+	}
+	else {
+		info->estimated_len -= est_time;
+	}
+
+	TRACE_CUR("fq %d est len is now %llu\n",
+			  kfmlp_get_idx(sem, info->q),
+			  info->estimated_len);
+
+	// check to see if we're the shortest queue now.
+//	if((aff->shortest_queue != info) &&
+//	   (aff->shortest_queue->estimated_len > info->estimated_len)) {
+//
+//		aff->shortest_queue = info;
+//
+//		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+//				  kfmlp_get_idx(sem, info->q),
+//				  info->q->count,
+//				  info->estimated_len);
+//	}
+}
+
+void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int replica = kfmlp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
+
+	tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
+
+	TRACE_CUR("%s/%d acquired gpu %d.  migration type = %d\n",
+			  t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
+
+	// count the number or resource holders
+	++(*(aff->q_info[replica].nr_cur_users));
+
+	reg_nv_device(gpu, 1, t);  // register
+
+	tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
+	reset_gpu_tracker(t);
+	start_gpu_tracker(t);
+}
+
+void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int replica = kfmlp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
+	lt_t est_time;
+
+	stop_gpu_tracker(t);  // stop the tracker before we do anything else.
+
+	est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+
+	tsk_rt(t)->last_gpu = gpu;
+
+	// count the number or resource holders
+	--(*(aff->q_info[replica].nr_cur_users));
+
+	reg_nv_device(gpu, 0, t);	// unregister
+
+	// update estimates
+	update_gpu_estimate(t, get_gpu_time(t));
+
+	TRACE_CUR("%s/%d freed gpu %d.  actual time was %llu.  estimated was %llu.  diff is %d\n",
+			  t->comm, t->pid, gpu,
+			  get_gpu_time(t),
+			  est_time,
+			  (long long)get_gpu_time(t) - (long long)est_time);
+}
+
+struct kfmlp_affinity_ops gpu_kfmlp_affinity =
+{
+	.advise_enqueue = gpu_kfmlp_advise_enqueue,
+	.advise_steal = gpu_kfmlp_advise_steal,
+	.notify_enqueue = gpu_kfmlp_notify_enqueue,
+	.notify_dequeue = gpu_kfmlp_notify_dequeue,
+	.notify_acquired = gpu_kfmlp_notify_acquired,
+	.notify_freed = gpu_kfmlp_notify_freed,
+	.replica_to_resource = gpu_replica_to_resource,
+};
+
+struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
+											void* __user args)
+{
+	return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args);
+}
+
+
+
+
+
+
+
+
+// Simple KFMLP Affinity (standard KFMLP with auto-gpu registration)
+
+struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int min_count;
+	int min_nr_users;
+	struct kfmlp_queue_info *shortest;
+	struct kfmlp_queue *to_enqueue;
+	int i;
+
+//	TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
+
+	shortest = &aff->q_info[0];
+	min_count = shortest->q->count;
+	min_nr_users = *(shortest->nr_cur_users);
+
+	TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
+			  kfmlp_get_idx(sem, shortest->q),
+			  shortest->q->count,
+			  min_nr_users);
+
+	for(i = 1; i < sem->num_resources; ++i) {
+		int len = aff->q_info[i].q->count;
+
+		// queue is smaller, or they're equal and the other has a smaller number
+		// of total users.
+		//
+		// tie-break on the shortest number of simult users.  this only kicks in
+		// when there are more than 1 empty queues.
+		if((len < min_count) ||
+		   ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
+			shortest = &aff->q_info[i];
+			min_count = shortest->q->count;
+			min_nr_users = *(aff->q_info[i].nr_cur_users);
+		}
+
+		TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
+				  kfmlp_get_idx(sem, aff->q_info[i].q),
+				  aff->q_info[i].q->count,
+				  *(aff->q_info[i].nr_cur_users));
+	}
+
+	to_enqueue = shortest->q;
+	TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
+			  kfmlp_get_idx(sem, to_enqueue),
+			  kfmlp_get_idx(sem, sem->shortest_queue));
+
+	return to_enqueue;
+}
+
+struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+//	TRACE_CUR("Simple GPU KFMLP advise_steal invoked\n");
+	return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
+}
+
+void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+//	TRACE_CUR("Simple GPU KFMLP notify_enqueue invoked\n");
+}
+
+void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+//	TRACE_CUR("Simple GPU KFMLP notify_dequeue invoked\n");
+}
+
+void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int replica = kfmlp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
+
+//	TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n");
+
+	// count the number or resource holders
+	++(*(aff->q_info[replica].nr_cur_users));
+
+	reg_nv_device(gpu, 1, t);  // register
+}
+
+void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+	int replica = kfmlp_get_idx(sem, fq);
+	int gpu = replica_to_gpu(aff, replica);
+
+//	TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n");
+	// count the number or resource holders
+	--(*(aff->q_info[replica].nr_cur_users));
+
+	reg_nv_device(gpu, 0, t);	// unregister
+}
+
+struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
+{
+	.advise_enqueue = simple_gpu_kfmlp_advise_enqueue,
+	.advise_steal = simple_gpu_kfmlp_advise_steal,
+	.notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
+	.notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
+	.notify_acquired = simple_gpu_kfmlp_notify_acquired,
+	.notify_freed = simple_gpu_kfmlp_notify_freed,
+	.replica_to_resource = gpu_replica_to_resource,
+};
+
+struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
+												void* __user args)
+{
+	return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args);
+}
+
+#endif
+
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 301390148d02..d1f836c8af6e 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -21,6 +21,10 @@
 #include <litmus/affinity.h>
 #endif
 
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/nvidia_info.h>
+#endif
+
 /* Number of RT tasks that exist in the system */
 atomic_t rt_task_count 		= ATOMIC_INIT(0);
 static DEFINE_RAW_SPINLOCK(task_transition_lock);
@@ -51,6 +55,28 @@ void bheap_node_free(struct bheap_node* hn)
 struct release_heap* release_heap_alloc(int gfp_flags);
 void release_heap_free(struct release_heap* rh);
 
+#ifdef CONFIG_LITMUS_NVIDIA
+/*
+ * sys_register_nv_device
+ * @nv_device_id: The Nvidia device id that the task want to register
+ * @reg_action: set to '1' to register the specified device. zero otherwise.
+ * Syscall for register task's designated nvidia device into NV_DEVICE_REG array
+ * Returns EFAULT  if nv_device_id is out of range.
+ *	   0       if success
+ */
+asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
+{
+	/* register the device to caller (aka 'current') */
+	return(reg_nv_device(nv_device_id, reg_action, current));
+}
+#else
+asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
+{
+	return(-EINVAL);
+}
+#endif
+
+
 /*
  * sys_set_task_rt_param
  * @pid: Pid of the task which scheduling parameters must be changed
@@ -269,6 +295,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job)
 	return retval;
 }
 
+
 /* sys_null_call() is only used for determining raw system call
  * overheads (kernel entry, kernel exit). It has no useful side effects.
  * If ts is non-NULL, then the current Feather-Trace time is recorded.
@@ -286,12 +313,42 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
 	return ret;
 }
 
+
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
+void init_gpu_affinity_state(struct task_struct* p)
+{
+	// under-damped
+	//p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
+	//p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
+
+	// emperical;
+	p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
+	p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
+
+	p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000);
+	p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000);
+
+	p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000);
+	p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000);
+
+	p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
+	p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
+
+	p->rt_param.gpu_migration = MIG_NONE;
+	p->rt_param.last_gpu = -1;
+}
+#endif
+
 /* p is a real-time task. Re-init its state as a best-effort task. */
 static void reinit_litmus_state(struct task_struct* p, int restore)
 {
 	struct rt_task  user_config = {};
 	void*  ctrl_page     = NULL;
 
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	binheap_order_t	prio_order = NULL;
+#endif
+
 	if (restore) {
 		/* Safe user-space provided configuration data.
 		 * and allocated page. */
@@ -299,11 +356,38 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 		ctrl_page   = p->rt_param.ctrl_page;
 	}
 
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	prio_order = p->rt_param.hp_blocked_tasks.compare;
+#endif
+
 	/* We probably should not be inheriting any task's priority
 	 * at this point in time.
 	 */
 	WARN_ON(p->rt_param.inh_task);
 
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	WARN_ON(p->rt_param.blocked_lock);
+    WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks));
+#endif
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	/* We probably should not have any tasklets executing for
+     * us at this time.
+	 */
+    WARN_ON(p->rt_param.cur_klitirqd);
+	WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD);
+
+	if(p->rt_param.cur_klitirqd)
+		flush_pending(p->rt_param.cur_klitirqd, p);
+
+	if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD)
+		up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem);
+#endif
+
+#ifdef CONFIG_LITMUS_NVIDIA
+	WARN_ON(p->rt_param.held_gpus != 0);
+#endif
+
 	/* Cleanup everything else. */
 	memset(&p->rt_param, 0, sizeof(p->rt_param));
 
@@ -312,6 +396,15 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 		p->rt_param.task_params = user_config;
 		p->rt_param.ctrl_page   = ctrl_page;
 	}
+
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
+	init_gpu_affinity_state(p);
+#endif
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order);
+	raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock);
+#endif
 }
 
 long litmus_admit_task(struct task_struct* tsk)
@@ -358,6 +451,26 @@ long litmus_admit_task(struct task_struct* tsk)
 		bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
 	}
 
+
+#ifdef CONFIG_LITMUS_NVIDIA
+	atomic_set(&tsk_rt(tsk)->nv_int_count, 0);
+#endif
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
+	init_gpu_affinity_state(tsk);
+#endif
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	tsk_rt(tsk)->blocked_lock = NULL;
+	raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock);
+	//INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, prio_order);  // done by scheduler
+#endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	/* proxy thread off by default */
+	tsk_rt(tsk)is_proxy_thread = 0;
+    tsk_rt(tsk)cur_klitirqd = NULL;
+	mutex_init(&tsk_rt(tsk)->klitirqd_sem);
+	atomic_set(&tsk_rt(tsk)->klitirqd_sem_stat, NOT_HELD);
+#endif
+
 	retval = litmus->admit_task(tsk);
 
 	if (!retval) {
@@ -403,7 +516,7 @@ static void synch_on_plugin_switch(void* info)
  */
 int switch_sched_plugin(struct sched_plugin* plugin)
 {
-	unsigned long flags;
+	//unsigned long flags;
 	int ret = 0;
 
 	BUG_ON(!plugin);
@@ -417,8 +530,15 @@ int switch_sched_plugin(struct sched_plugin* plugin)
 	while (atomic_read(&cannot_use_plugin) < num_online_cpus())
 		cpu_relax();
 
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	if(!klitirqd_is_dead())
+	{
+		kill_klitirqd();
+	}
+#endif
+
 	/* stop task transitions */
-	raw_spin_lock_irqsave(&task_transition_lock, flags);
+	//raw_spin_lock_irqsave(&task_transition_lock, flags);
 
 	/* don't switch if there are active real-time tasks */
 	if (atomic_read(&rt_task_count) == 0) {
@@ -436,7 +556,7 @@ int switch_sched_plugin(struct sched_plugin* plugin)
 	} else
 		ret = -EBUSY;
 out:
-	raw_spin_unlock_irqrestore(&task_transition_lock, flags);
+	//raw_spin_unlock_irqrestore(&task_transition_lock, flags);
 	atomic_set(&cannot_use_plugin, 0);
 	return ret;
 }
diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c
new file mode 100644
index 000000000000..300571a81bbd
--- /dev/null
+++ b/litmus/litmus_pai_softirq.c
@@ -0,0 +1,64 @@
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+#include <linux/kthread.h>
+#include <linux/ftrace.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+
+#include <linux/sched.h>
+#include <linux/cpuset.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_trace.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/litmus_softirq.h>
+
+
+
+int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
+{
+	int ret = 0; /* assume failure */
+    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
+    {
+        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
+        BUG();
+    }
+
+    ret = litmus->enqueue_pai_tasklet(t);
+
+	return(ret);
+}
+
+EXPORT_SYMBOL(__litmus_tasklet_schedule);
+
+
+
+// failure causes default Linux handling.
+int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
+{
+	int ret = 0; /* assume failure */
+	return(ret);
+}
+EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
+
+
+// failure causes default Linux handling.
+int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
+{
+	int ret = 0; /* assume failure */
+	return(ret);
+}
+EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
+
+
+// failure causes default Linux handling.
+int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
+{
+	int ret = 0; /* assume failure */
+	return(ret);
+}
+EXPORT_SYMBOL(__litmus_schedule_work);
+
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
index 4bf725a36c9c..9ab7e015a3c1 100644
--- a/litmus/litmus_proc.c
+++ b/litmus/litmus_proc.c
@@ -19,12 +19,19 @@ static struct proc_dir_entry *litmus_dir = NULL,
 	*plugs_dir = NULL,
 #ifdef CONFIG_RELEASE_MASTER
 	*release_master_file = NULL,
+#endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	*klitirqd_file = NULL,
 #endif
 	*plugs_file = NULL;
 
 /* in litmus/sync.c */
 int count_tasks_waiting_for_release(void);
 
+extern int proc_read_klitirqd_stats(char *page, char **start,
+									off_t off, int count,
+									int *eof, void *data);
+
 static int proc_read_stats(char *page, char **start,
 			   off_t off, int count,
 			   int *eof, void *data)
@@ -161,6 +168,12 @@ int __init init_litmus_proc(void)
 	release_master_file->write_proc  = proc_write_release_master;
 #endif
 
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	klitirqd_file =
+		create_proc_read_entry("klitirqd_stats", 0444, litmus_dir,
+							   proc_read_klitirqd_stats, NULL);
+#endif
+
 	stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
 					   proc_read_stats, NULL);
 
@@ -187,6 +200,10 @@ void exit_litmus_proc(void)
 		remove_proc_entry("stats", litmus_dir);
 	if (curr_file)
 		remove_proc_entry("active_plugin", litmus_dir);
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	if (klitirqd_file)
+		remove_proc_entry("klitirqd_stats", litmus_dir);
+#endif
 #ifdef CONFIG_RELEASE_MASTER
 	if (release_master_file)
 		remove_proc_entry("release_master", litmus_dir);
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
new file mode 100644
index 000000000000..9f7d9da5facb
--- /dev/null
+++ b/litmus/litmus_softirq.c
@@ -0,0 +1,1582 @@
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+#include <linux/kthread.h>
+#include <linux/ftrace.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+
+#include <linux/sched.h>
+#include <linux/cpuset.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_trace.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/litmus_softirq.h>
+
+/* TODO: Remove unneeded mb() and other barriers. */
+
+
+/* counts number of daemons ready to handle litmus irqs. */
+static atomic_t num_ready_klitirqds = ATOMIC_INIT(0);
+
+enum pending_flags
+{
+    LIT_TASKLET_LOW = 0x1,
+    LIT_TASKLET_HI  = LIT_TASKLET_LOW<<1,
+	LIT_WORK = LIT_TASKLET_HI<<1
+};
+
+/* only support tasklet processing for now. */
+struct tasklet_head
+{
+	struct tasklet_struct *head;
+	struct tasklet_struct **tail;
+};
+
+struct klitirqd_info
+{
+	struct task_struct*		klitirqd;
+    struct task_struct*     current_owner;
+    int						terminating;
+
+
+	raw_spinlock_t			lock;
+
+	u32						pending;
+	atomic_t				num_hi_pending;
+	atomic_t				num_low_pending;
+	atomic_t				num_work_pending;
+
+	/* in order of priority */
+	struct tasklet_head     pending_tasklets_hi;
+	struct tasklet_head		pending_tasklets;
+	struct list_head		worklist;
+};
+
+/* one list for each klitirqd */
+static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD];
+
+
+
+
+
+int proc_read_klitirqd_stats(char *page, char **start,
+							 off_t off, int count,
+							 int *eof, void *data)
+{
+	int len = snprintf(page, PAGE_SIZE,
+				"num ready klitirqds: %d\n\n",
+				atomic_read(&num_ready_klitirqds));
+
+	if(klitirqd_is_ready())
+	{
+		int i;
+		for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+		{
+			len +=
+				snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
+						 "klitirqd_th%d: %s/%d\n"
+						 "\tcurrent_owner: %s/%d\n"
+						 "\tpending: %x\n"
+						 "\tnum hi: %d\n"
+						 "\tnum low: %d\n"
+						 "\tnum work: %d\n\n",
+						 i,
+						 klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid,
+						 (klitirqds[i].current_owner != NULL) ?
+						 	klitirqds[i].current_owner->comm : "(null)",
+						 (klitirqds[i].current_owner != NULL) ?
+							klitirqds[i].current_owner->pid : 0,
+						 klitirqds[i].pending,
+						 atomic_read(&klitirqds[i].num_hi_pending),
+						 atomic_read(&klitirqds[i].num_low_pending),
+						 atomic_read(&klitirqds[i].num_work_pending));
+		}
+	}
+
+	return(len);
+}
+
+
+
+
+
+#if 0
+static atomic_t dump_id = ATOMIC_INIT(0);
+
+static void __dump_state(struct klitirqd_info* which, const char* caller)
+{
+	struct tasklet_struct* list;
+
+	int id = atomic_inc_return(&dump_id);
+
+	//if(in_interrupt())
+	{
+		if(which->current_owner)
+		{
+			TRACE("(id: %d  caller: %s)\n"
+				"klitirqd: %s/%d\n"
+				"current owner: %s/%d\n"
+				"pending: %x\n",
+				id, caller,
+				which->klitirqd->comm, which->klitirqd->pid,
+				which->current_owner->comm, which->current_owner->pid,
+				which->pending);
+		}
+		else
+		{
+			TRACE("(id: %d  caller: %s)\n"
+				"klitirqd: %s/%d\n"
+				"current owner: %p\n"
+				"pending: %x\n",
+				id, caller,
+				which->klitirqd->comm, which->klitirqd->pid,
+				NULL,
+				which->pending);
+		}
+
+		list = which->pending_tasklets.head;
+		while(list)
+		{
+			struct tasklet_struct *t = list;
+			list = list->next; /* advance */
+			if(t->owner)
+				TRACE("(id: %d  caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid);
+			else
+				TRACE("(id: %d  caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL);
+		}
+	}
+}
+
+static void dump_state(struct klitirqd_info* which, const char* caller)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&which->lock, flags);
+    __dump_state(which, caller);
+    raw_spin_unlock_irqrestore(&which->lock, flags);
+}
+#endif
+
+
+/* forward declarations */
+static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
+									   struct klitirqd_info *which,
+									   int wakeup);
+static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
+										  struct klitirqd_info *which,
+										  int wakeup);
+static void ___litmus_schedule_work(struct work_struct *w,
+									struct klitirqd_info *which,
+									int wakeup);
+
+
+
+inline unsigned int klitirqd_id(struct task_struct* tsk)
+{
+    int i;
+    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+    {
+        if(klitirqds[i].klitirqd == tsk)
+        {
+            return i;
+        }
+    }
+
+    BUG();
+
+    return 0;
+}
+
+
+inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which)
+{
+    return (which->pending & LIT_TASKLET_HI);
+}
+
+inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which)
+{
+    return (which->pending & LIT_TASKLET_LOW);
+}
+
+inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which)
+{
+	return (which->pending & LIT_WORK);
+}
+
+inline static u32 litirq_pending_irqoff(struct klitirqd_info* which)
+{
+    return(which->pending);
+}
+
+
+inline static u32 litirq_pending(struct klitirqd_info* which)
+{
+    unsigned long flags;
+    u32 pending;
+
+    raw_spin_lock_irqsave(&which->lock, flags);
+    pending = litirq_pending_irqoff(which);
+    raw_spin_unlock_irqrestore(&which->lock, flags);
+
+    return pending;
+};
+
+inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner)
+{
+	unsigned long flags;
+	u32 pending;
+
+	raw_spin_lock_irqsave(&which->lock, flags);
+	pending = litirq_pending_irqoff(which);
+	if(pending)
+	{
+		if(which->current_owner != owner)
+		{
+			pending = 0;  // owner switch!
+		}
+	}
+	raw_spin_unlock_irqrestore(&which->lock, flags);
+
+	return pending;
+}
+
+
+inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which,
+				struct mutex** sem,
+				struct task_struct** t)
+{
+	unsigned long flags;
+	u32 pending;
+
+	/* init values */
+	*sem = NULL;
+	*t = NULL;
+
+	raw_spin_lock_irqsave(&which->lock, flags);
+
+	pending = litirq_pending_irqoff(which);
+	if(pending)
+	{
+		if(which->current_owner != NULL)
+		{
+			*t = which->current_owner;
+			*sem = &tsk_rt(which->current_owner)->klitirqd_sem;
+		}
+		else
+		{
+			BUG();
+		}
+	}
+	raw_spin_unlock_irqrestore(&which->lock, flags);
+
+	if(likely(*sem))
+	{
+		return pending;
+	}
+	else
+	{
+		return 0;
+	}
+}
+
+/* returns true if the next piece of work to do is from a different owner.
+ */
+static int tasklet_ownership_change(
+				struct klitirqd_info* which,
+				enum pending_flags taskletQ)
+{
+	/* this function doesn't have to look at work objects since they have
+	   priority below tasklets. */
+
+    unsigned long flags;
+    int ret = 0;
+
+    raw_spin_lock_irqsave(&which->lock, flags);
+
+	switch(taskletQ)
+	{
+	case LIT_TASKLET_HI:
+		if(litirq_pending_hi_irqoff(which))
+		{
+			ret = (which->pending_tasklets_hi.head->owner !=
+						which->current_owner);
+		}
+		break;
+	case LIT_TASKLET_LOW:
+		if(litirq_pending_low_irqoff(which))
+		{
+			ret = (which->pending_tasklets.head->owner !=
+						which->current_owner);
+		}
+		break;
+	default:
+		break;
+	}
+
+    raw_spin_unlock_irqrestore(&which->lock, flags);
+
+    TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret);
+
+    return ret;
+}
+
+
+static void __reeval_prio(struct klitirqd_info* which)
+{
+    struct task_struct* next_owner = NULL;
+	struct task_struct* klitirqd = which->klitirqd;
+
+	/* Check in prio-order */
+	u32 pending = litirq_pending_irqoff(which);
+
+	//__dump_state(which, "__reeval_prio: before");
+
+	if(pending)
+	{
+		if(pending & LIT_TASKLET_HI)
+		{
+			next_owner = which->pending_tasklets_hi.head->owner;
+		}
+		else if(pending & LIT_TASKLET_LOW)
+		{
+			next_owner = which->pending_tasklets.head->owner;
+		}
+		else if(pending & LIT_WORK)
+		{
+			struct work_struct* work =
+				list_first_entry(&which->worklist, struct work_struct, entry);
+			next_owner = work->owner;
+		}
+	}
+
+	if(next_owner != which->current_owner)
+	{
+		struct task_struct* old_owner = which->current_owner;
+
+		/* bind the next owner. */
+		which->current_owner = next_owner;
+		mb();
+
+        if(next_owner != NULL)
+        {
+			if(!in_interrupt())
+			{
+				TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
+						((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
+						((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
+						next_owner->comm, next_owner->pid);
+			}
+			else
+			{
+				TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
+					((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
+					((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
+					next_owner->comm, next_owner->pid);
+			}
+
+			litmus->increase_prio_inheritance_klitirqd(klitirqd, old_owner, next_owner);
+        }
+        else
+        {
+			if(likely(!in_interrupt()))
+			{
+				TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n",
+						__FUNCTION__, klitirqd->comm, klitirqd->pid);
+			}
+			else
+			{
+				// is this a bug?
+				TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n",
+					__FUNCTION__, klitirqd->comm, klitirqd->pid);
+			}
+
+			BUG_ON(pending != 0);
+			litmus->decrease_prio_inheritance_klitirqd(klitirqd, old_owner, NULL);
+        }
+    }
+
+	//__dump_state(which, "__reeval_prio: after");
+}
+
+static void reeval_prio(struct klitirqd_info* which)
+{
+    unsigned long flags;
+
+    raw_spin_lock_irqsave(&which->lock, flags);
+    __reeval_prio(which);
+    raw_spin_unlock_irqrestore(&which->lock, flags);
+}
+
+
+static void wakeup_litirqd_locked(struct klitirqd_info* which)
+{
+	/* Interrupts are disabled: no need to stop preemption */
+	if (which && which->klitirqd)
+	{
+        __reeval_prio(which); /* configure the proper priority */
+
+		if(which->klitirqd->state != TASK_RUNNING)
+		{
+        	TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__,
+			  	which->klitirqd->comm, which->klitirqd->pid);
+
+			wake_up_process(which->klitirqd);
+		}
+    }
+}
+
+
+static void do_lit_tasklet(struct klitirqd_info* which,
+						   struct tasklet_head* pending_tasklets)
+{
+    unsigned long flags;
+	struct tasklet_struct *list;
+	atomic_t* count;
+
+    raw_spin_lock_irqsave(&which->lock, flags);
+
+	//__dump_state(which, "do_lit_tasklet: before steal");
+
+	/* copy out the tasklets for our private use. */
+	list = pending_tasklets->head;
+	pending_tasklets->head = NULL;
+	pending_tasklets->tail = &pending_tasklets->head;
+
+	/* remove pending flag */
+	which->pending &= (pending_tasklets == &which->pending_tasklets) ?
+		~LIT_TASKLET_LOW :
+		~LIT_TASKLET_HI;
+
+	count = (pending_tasklets == &which->pending_tasklets) ?
+		&which->num_low_pending:
+		&which->num_hi_pending;
+
+	//__dump_state(which, "do_lit_tasklet: after steal");
+
+    raw_spin_unlock_irqrestore(&which->lock, flags);
+
+
+    while(list)
+    {
+        struct tasklet_struct *t = list;
+
+        /* advance, lest we forget */
+		list = list->next;
+
+        /* execute tasklet if it has my priority and is free */
+		if ((t->owner == which->current_owner) && tasklet_trylock(t)) {
+			if (!atomic_read(&t->count)) {
+
+				sched_trace_tasklet_begin(t->owner);
+
+				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+                {
+					BUG();
+                }
+                TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__);
+				t->func(t->data);
+				tasklet_unlock(t);
+
+				atomic_dec(count);
+
+				sched_trace_tasklet_end(t->owner, 0ul);
+
+				continue;  /* process more tasklets */
+			}
+			tasklet_unlock(t);
+		}
+
+        TRACE_CUR("%s: Could not invoke tasklet.  Requeuing.\n", __FUNCTION__);
+
+		/* couldn't process tasklet.  put it back at the end of the queue. */
+		if(pending_tasklets == &which->pending_tasklets)
+			___litmus_tasklet_schedule(t, which, 0);
+		else
+			___litmus_tasklet_hi_schedule(t, which, 0);
+    }
+}
+
+
+// returns 1 if priorities need to be changed to continue processing
+// pending tasklets.
+static int do_litirq(struct klitirqd_info* which)
+{
+    u32 pending;
+    int resched = 0;
+
+    if(in_interrupt())
+    {
+        TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__);
+        return(0);
+    }
+
+	if(which->klitirqd != current)
+	{
+        TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
+				  __FUNCTION__, current->comm, current->pid,
+				  which->klitirqd->comm, which->klitirqd->pid);
+        return(0);
+	}
+
+    if(!is_realtime(current))
+    {
+        TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n",
+				  __FUNCTION__, current->policy);
+        return(0);
+    }
+
+
+    /* We only handle tasklets & work objects, no need for RCU triggers? */
+
+    pending = litirq_pending(which);
+    if(pending)
+    {
+        /* extract the work to do and do it! */
+        if(pending & LIT_TASKLET_HI)
+        {
+            TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__);
+            do_lit_tasklet(which, &which->pending_tasklets_hi);
+            resched = tasklet_ownership_change(which, LIT_TASKLET_HI);
+
+            if(resched)
+            {
+                TRACE_CUR("%s: HI tasklets of another owner remain. "
+						  "Skipping any LOW tasklets.\n", __FUNCTION__);
+            }
+        }
+
+        if(!resched && (pending & LIT_TASKLET_LOW))
+        {
+            TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__);
+			do_lit_tasklet(which, &which->pending_tasklets);
+			resched = tasklet_ownership_change(which, LIT_TASKLET_LOW);
+
+            if(resched)
+            {
+                TRACE_CUR("%s: LOW tasklets of another owner remain. "
+						  "Skipping any work objects.\n", __FUNCTION__);
+            }
+        }
+    }
+
+	return(resched);
+}
+
+
+static void do_work(struct klitirqd_info* which)
+{
+	unsigned long flags;
+	work_func_t f;
+	struct work_struct* work;
+
+	// only execute one work-queue item to yield to tasklets.
+	// ...is this a good idea, or should we just batch them?
+	raw_spin_lock_irqsave(&which->lock, flags);
+
+	if(!litirq_pending_work_irqoff(which))
+	{
+		raw_spin_unlock_irqrestore(&which->lock, flags);
+		goto no_work;
+	}
+
+	work = list_first_entry(&which->worklist, struct work_struct, entry);
+	list_del_init(&work->entry);
+
+	if(list_empty(&which->worklist))
+	{
+		which->pending &= ~LIT_WORK;
+	}
+
+	raw_spin_unlock_irqrestore(&which->lock, flags);
+
+
+
+	/* safe to read current_owner outside of lock since only this thread
+	 may write to the pointer. */
+	if(work->owner == which->current_owner)
+	{
+		TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
+		// do the work!
+		work_clear_pending(work);
+		f = work->func;
+		f(work);  /* can't touch 'work' after this point,
+				   the user may have freed it. */
+
+		atomic_dec(&which->num_work_pending);
+	}
+	else
+	{
+		TRACE_CUR("%s: Could not invoke work object.  Requeuing.\n",
+				  __FUNCTION__);
+		___litmus_schedule_work(work, which, 0);
+	}
+
+no_work:
+	return;
+}
+
+
+static int set_litmus_daemon_sched(void)
+{
+    /* set up a daemon job that will never complete.
+       it should only ever run on behalf of another
+       real-time task.
+
+       TODO: Transition to a new job whenever a
+       new tasklet is handled */
+
+    int ret = 0;
+
+	struct rt_task tp = {
+		.exec_cost = 0,
+		.period = 1000000000, /* dummy 1 second period */
+		.phase = 0,
+		.cpu = task_cpu(current),
+		.budget_policy = NO_ENFORCEMENT,
+		.cls = RT_CLASS_BEST_EFFORT
+	};
+
+	struct sched_param param = { .sched_priority = 0};
+
+
+	/* set task params, mark as proxy thread, and init other data */
+	tsk_rt(current)->task_params = tp;
+	tsk_rt(current)->is_proxy_thread = 1;
+	tsk_rt(current)->cur_klitirqd = NULL;
+	mutex_init(&tsk_rt(current)->klitirqd_sem);
+	atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD);
+
+	/* inform the OS we're SCHED_LITMUS --
+	   sched_setscheduler_nocheck() calls litmus_admit_task(). */
+	sched_setscheduler_nocheck(current, SCHED_LITMUS, &param);
+
+    return ret;
+}
+
+static void enter_execution_phase(struct klitirqd_info* which,
+								  struct mutex* sem,
+								  struct task_struct* t)
+{
+	TRACE_CUR("%s: Trying to enter execution phase. "
+			  "Acquiring semaphore of %s/%d\n", __FUNCTION__,
+			  t->comm, t->pid);
+	down_and_set_stat(current, HELD, sem);
+	TRACE_CUR("%s: Execution phase entered! "
+			  "Acquired semaphore of %s/%d\n", __FUNCTION__,
+			  t->comm, t->pid);
+}
+
+static void exit_execution_phase(struct klitirqd_info* which,
+								 struct mutex* sem,
+								 struct task_struct* t)
+{
+	TRACE_CUR("%s: Exiting execution phase. "
+			  "Releasing semaphore of %s/%d\n", __FUNCTION__,
+			  t->comm, t->pid);
+	if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD)
+	{
+		up_and_set_stat(current, NOT_HELD, sem);
+		TRACE_CUR("%s: Execution phase exited! "
+				  "Released semaphore of %s/%d\n", __FUNCTION__,
+				  t->comm, t->pid);
+	}
+	else
+	{
+		TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__);
+	}
+}
+
+/* main loop for klitsoftirqd */
+static int run_klitirqd(void* unused)
+{
+	struct klitirqd_info* which = &klitirqds[klitirqd_id(current)];
+	struct mutex* sem;
+	struct task_struct* owner;
+
+    int rt_status = set_litmus_daemon_sched();
+
+    if(rt_status != 0)
+    {
+        TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
+        goto rt_failed;
+    }
+
+	atomic_inc(&num_ready_klitirqds);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	while (!kthread_should_stop())
+	{
+		preempt_disable();
+		if (!litirq_pending(which))
+		{
+            /* sleep for work */
+            TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n",
+					  __FUNCTION__);
+			preempt_enable_no_resched();
+            schedule();
+
+			if(kthread_should_stop()) /* bail out */
+			{
+				TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
+				continue;
+			}
+
+			preempt_disable();
+		}
+
+		__set_current_state(TASK_RUNNING);
+
+		while (litirq_pending_and_sem_and_owner(which, &sem, &owner))
+		{
+			int needs_resched = 0;
+
+			preempt_enable_no_resched();
+
+			BUG_ON(sem == NULL);
+
+			// wait to enter execution phase; wait for 'current_owner' to block.
+			enter_execution_phase(which, sem, owner);
+
+			if(kthread_should_stop())
+			{
+				TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
+				break;
+			}
+
+			preempt_disable();
+
+			/* Double check that there's still pending work and the owner hasn't
+			 * changed. Pending items may have been flushed while we were sleeping.
+			 */
+			if(litirq_pending_with_owner(which, owner))
+			{
+				TRACE_CUR("%s: Executing tasklets and/or work objects.\n",
+						  __FUNCTION__);
+
+				needs_resched = do_litirq(which);
+
+				preempt_enable_no_resched();
+
+				// work objects are preemptible.
+				if(!needs_resched)
+				{
+					do_work(which);
+				}
+
+				// exit execution phase.
+				exit_execution_phase(which, sem, owner);
+
+				TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__);
+				reeval_prio(which); /* check if we need to change priority here */
+			}
+			else
+			{
+				TRACE_CUR("%s: Pending work was flushed!  Prev owner was %s/%d\n",
+								__FUNCTION__,
+								owner->comm, owner->pid);
+				preempt_enable_no_resched();
+
+				// exit execution phase.
+				exit_execution_phase(which, sem, owner);
+			}
+
+			cond_resched();
+			preempt_disable();
+		}
+		preempt_enable();
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+
+	atomic_dec(&num_ready_klitirqds);
+
+rt_failed:
+    litmus_exit_task(current);
+
+	return rt_status;
+}
+
+
+struct klitirqd_launch_data
+{
+	int* cpu_affinity;
+	struct work_struct work;
+};
+
+/* executed by a kworker from workqueues */
+static void launch_klitirqd(struct work_struct *work)
+{
+    int i;
+
+	struct klitirqd_launch_data* launch_data =
+		container_of(work, struct klitirqd_launch_data, work);
+
+    TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
+
+    /* create the daemon threads */
+    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+    {
+		if(launch_data->cpu_affinity)
+		{
+			klitirqds[i].klitirqd =
+				kthread_create(
+				   run_klitirqd,
+				   /* treat the affinity as a pointer, we'll cast it back later */
+				   (void*)(long long)launch_data->cpu_affinity[i],
+				   "klitirqd_th%d/%d",
+				   i,
+				   launch_data->cpu_affinity[i]);
+
+			/* litmus will put is in the right cluster. */
+			kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]);
+		}
+		else
+		{
+			klitirqds[i].klitirqd =
+				kthread_create(
+				   run_klitirqd,
+				   /* treat the affinity as a pointer, we'll cast it back later */
+				   (void*)(long long)(-1),
+				   "klitirqd_th%d",
+				   i);
+		}
+    }
+
+    TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
+
+    /* unleash the daemons */
+    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+    {
+        wake_up_process(klitirqds[i].klitirqd);
+    }
+
+	if(launch_data->cpu_affinity)
+		kfree(launch_data->cpu_affinity);
+	kfree(launch_data);
+}
+
+
+void spawn_klitirqd(int* affinity)
+{
+    int i;
+    struct klitirqd_launch_data* delayed_launch;
+
+	if(atomic_read(&num_ready_klitirqds) != 0)
+	{
+		TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n");
+		return;
+	}
+
+    /* init the tasklet & work queues */
+    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+    {
+		klitirqds[i].terminating = 0;
+		klitirqds[i].pending = 0;
+
+		klitirqds[i].num_hi_pending.counter = 0;
+		klitirqds[i].num_low_pending.counter = 0;
+		klitirqds[i].num_work_pending.counter = 0;
+
+        klitirqds[i].pending_tasklets_hi.head = NULL;
+        klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head;
+
+        klitirqds[i].pending_tasklets.head = NULL;
+        klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head;
+
+		INIT_LIST_HEAD(&klitirqds[i].worklist);
+
+		raw_spin_lock_init(&klitirqds[i].lock);
+    }
+
+    /* wait to flush the initializations to memory since other threads
+       will access it. */
+    mb();
+
+    /* tell a work queue to launch the threads.  we can't make scheduling
+       calls since we're in an atomic state. */
+    TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__);
+	delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC);
+	if(affinity)
+	{
+		delayed_launch->cpu_affinity =
+			kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC);
+
+		memcpy(delayed_launch->cpu_affinity, affinity,
+			sizeof(int)*NR_LITMUS_SOFTIRQD);
+	}
+	else
+	{
+		delayed_launch->cpu_affinity = NULL;
+	}
+    INIT_WORK(&delayed_launch->work, launch_klitirqd);
+    schedule_work(&delayed_launch->work);
+}
+
+
+void kill_klitirqd(void)
+{
+	if(!klitirqd_is_dead())
+	{
+    	int i;
+
+    	TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
+
+    	for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+    	{
+			if(klitirqds[i].terminating != 1)
+			{
+				klitirqds[i].terminating = 1;
+				mb(); /* just to be sure? */
+				flush_pending(klitirqds[i].klitirqd, NULL);
+
+				/* signal termination */
+       			kthread_stop(klitirqds[i].klitirqd);
+			}
+    	}
+	}
+}
+
+
+int klitirqd_is_ready(void)
+{
+	return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD);
+}
+
+int klitirqd_is_dead(void)
+{
+	return(atomic_read(&num_ready_klitirqds) == 0);
+}
+
+
+struct task_struct* get_klitirqd(unsigned int k_id)
+{
+	return(klitirqds[k_id].klitirqd);
+}
+
+
+void flush_pending(struct task_struct* klitirqd_thread,
+				   struct task_struct* owner)
+{
+	unsigned int k_id = klitirqd_id(klitirqd_thread);
+	struct klitirqd_info *which = &klitirqds[k_id];
+
+	unsigned long flags;
+	struct tasklet_struct *list;
+
+	u32 work_flushed = 0;
+
+	raw_spin_lock_irqsave(&which->lock, flags);
+
+	//__dump_state(which, "flush_pending: before");
+
+	// flush hi tasklets.
+	if(litirq_pending_hi_irqoff(which))
+	{
+		which->pending &= ~LIT_TASKLET_HI;
+
+		list = which->pending_tasklets_hi.head;
+		which->pending_tasklets_hi.head = NULL;
+		which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head;
+
+		TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__);
+
+		while(list)
+		{
+			struct tasklet_struct *t = list;
+			list = list->next;
+
+			if(likely((t->owner == owner) || (owner == NULL)))
+			{
+				if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
+				{
+					BUG();
+				}
+
+				work_flushed |= LIT_TASKLET_HI;
+
+				t->owner = NULL;
+
+				// WTF?
+				if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+				{
+					atomic_dec(&which->num_hi_pending);
+					___tasklet_hi_schedule(t);
+				}
+				else
+				{
+					TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
+					BUG();
+				}
+			}
+			else
+			{
+				TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__);
+				// put back on queue.
+				___litmus_tasklet_hi_schedule(t, which, 0);
+			}
+		}
+	}
+
+	// flush low tasklets.
+	if(litirq_pending_low_irqoff(which))
+	{
+		which->pending &= ~LIT_TASKLET_LOW;
+
+		list = which->pending_tasklets.head;
+		which->pending_tasklets.head = NULL;
+		which->pending_tasklets.tail = &which->pending_tasklets.head;
+
+		TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__);
+
+		while(list)
+		{
+			struct tasklet_struct *t = list;
+			list = list->next;
+
+			if(likely((t->owner == owner) || (owner == NULL)))
+			{
+				if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
+				{
+					BUG();
+				}
+
+				work_flushed |= LIT_TASKLET_LOW;
+
+				t->owner = NULL;
+				sched_trace_tasklet_end(owner, 1ul);
+
+				if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+				{
+					atomic_dec(&which->num_low_pending);
+					___tasklet_schedule(t);
+				}
+				else
+				{
+					TRACE("%s: dropped tasklet??\n", __FUNCTION__);
+					BUG();
+				}
+			}
+			else
+			{
+				TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__);
+				// put back on queue
+				___litmus_tasklet_schedule(t, which, 0);
+			}
+		}
+	}
+
+	// flush work objects
+	if(litirq_pending_work_irqoff(which))
+	{
+		which->pending &= ~LIT_WORK;
+
+		TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__);
+
+		while(!list_empty(&which->worklist))
+		{
+			struct work_struct* work =
+				list_first_entry(&which->worklist, struct work_struct, entry);
+			list_del_init(&work->entry);
+
+			if(likely((work->owner == owner) || (owner == NULL)))
+			{
+				work_flushed |= LIT_WORK;
+				atomic_dec(&which->num_work_pending);
+
+				work->owner = NULL;
+				sched_trace_work_end(owner, current, 1ul);
+				__schedule_work(work);
+			}
+			else
+			{
+				TRACE("%s: Could not flush a work object.\n", __FUNCTION__);
+				// put back on queue
+				___litmus_schedule_work(work, which, 0);
+			}
+		}
+	}
+
+	//__dump_state(which, "flush_pending: after (before reeval prio)");
+
+
+	mb(); /* commit changes to pending flags */
+
+	/* reset the scheduling priority */
+	if(work_flushed)
+	{
+		__reeval_prio(which);
+
+		/* Try to offload flushed tasklets to Linux's ksoftirqd. */
+		if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI))
+		{
+			wakeup_softirqd();
+		}
+	}
+	else
+	{
+		TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__);
+	}
+
+	raw_spin_unlock_irqrestore(&which->lock, flags);
+}
+
+
+
+
+static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
+									   struct klitirqd_info *which,
+									   int wakeup)
+{
+	unsigned long flags;
+	u32 old_pending;
+
+	t->next = NULL;
+
+    raw_spin_lock_irqsave(&which->lock, flags);
+
+	//__dump_state(which, "___litmus_tasklet_schedule: before queuing");
+
+    *(which->pending_tasklets.tail) = t;
+    which->pending_tasklets.tail = &t->next;
+
+	old_pending = which->pending;
+	which->pending |= LIT_TASKLET_LOW;
+
+	atomic_inc(&which->num_low_pending);
+
+	mb();
+
+	if(!old_pending && wakeup)
+	{
+		wakeup_litirqd_locked(which); /* wake up the klitirqd */
+	}
+
+	//__dump_state(which, "___litmus_tasklet_schedule: after queuing");
+
+    raw_spin_unlock_irqrestore(&which->lock, flags);
+}
+
+int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
+{
+	int ret = 0; /* assume failure */
+    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
+    {
+        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
+        BUG();
+    }
+
+    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
+    {
+        TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
+        BUG();
+    }
+
+	if(likely(!klitirqds[k_id].terminating))
+	{
+		/* Can't accept tasklets while we're processing a workqueue
+		   because they're handled by the same thread. This case is
+		   very RARE.
+
+		   TODO: Use a separate thread for work objects!!!!!!
+         */
+		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
+		{
+			ret = 1;
+			___litmus_tasklet_schedule(t, &klitirqds[k_id], 1);
+		}
+		else
+		{
+			TRACE("%s: rejected tasklet because of pending work.\n",
+						__FUNCTION__);
+		}
+	}
+	return(ret);
+}
+
+EXPORT_SYMBOL(__litmus_tasklet_schedule);
+
+
+static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
+									   struct klitirqd_info *which,
+									   int wakeup)
+{
+	unsigned long flags;
+	u32 old_pending;
+
+	t->next = NULL;
+
+    raw_spin_lock_irqsave(&which->lock, flags);
+
+    *(which->pending_tasklets_hi.tail) = t;
+    which->pending_tasklets_hi.tail = &t->next;
+
+	old_pending = which->pending;
+	which->pending |= LIT_TASKLET_HI;
+
+	atomic_inc(&which->num_hi_pending);
+
+	mb();
+
+	if(!old_pending && wakeup)
+	{
+		wakeup_litirqd_locked(which); /* wake up the klitirqd */
+	}
+
+    raw_spin_unlock_irqrestore(&which->lock, flags);
+}
+
+int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
+{
+	int ret = 0; /* assume failure */
+    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
+    {
+        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
+        BUG();
+    }
+
+    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
+    {
+        TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
+        BUG();
+    }
+
+    if(unlikely(!klitirqd_is_ready()))
+    {
+        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
+        BUG();
+    }
+
+	if(likely(!klitirqds[k_id].terminating))
+	{
+		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
+		{
+			ret = 1;
+			___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1);
+		}
+		else
+		{
+			TRACE("%s: rejected tasklet because of pending work.\n",
+						__FUNCTION__);
+		}
+	}
+	return(ret);
+}
+
+EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
+
+
+int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
+{
+	int ret = 0; /* assume failure */
+	u32 old_pending;
+
+	BUG_ON(!irqs_disabled());
+
+    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
+    {
+        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
+        BUG();
+    }
+
+    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
+    {
+        TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id);
+        BUG();
+    }
+
+    if(unlikely(!klitirqd_is_ready()))
+    {
+        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
+        BUG();
+    }
+
+	if(likely(!klitirqds[k_id].terminating))
+	{
+    	raw_spin_lock(&klitirqds[k_id].lock);
+
+		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
+		{
+			ret = 1;  // success!
+
+			t->next = klitirqds[k_id].pending_tasklets_hi.head;
+    		klitirqds[k_id].pending_tasklets_hi.head = t;
+
+			old_pending = klitirqds[k_id].pending;
+			klitirqds[k_id].pending |= LIT_TASKLET_HI;
+
+			atomic_inc(&klitirqds[k_id].num_hi_pending);
+
+			mb();
+
+			if(!old_pending)
+    			wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */
+		}
+		else
+		{
+			TRACE("%s: rejected tasklet because of pending work.\n",
+					__FUNCTION__);
+		}
+
+    	raw_spin_unlock(&klitirqds[k_id].lock);
+	}
+	return(ret);
+}
+
+EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
+
+
+
+static void ___litmus_schedule_work(struct work_struct *w,
+									struct klitirqd_info *which,
+									int wakeup)
+{
+	unsigned long flags;
+	u32 old_pending;
+
+	raw_spin_lock_irqsave(&which->lock, flags);
+
+	work_pending(w);
+	list_add_tail(&w->entry, &which->worklist);
+
+	old_pending = which->pending;
+	which->pending |= LIT_WORK;
+
+	atomic_inc(&which->num_work_pending);
+
+	mb();
+
+	if(!old_pending && wakeup)
+	{
+		wakeup_litirqd_locked(which); /* wakeup the klitirqd */
+	}
+
+	raw_spin_unlock_irqrestore(&which->lock, flags);
+}
+
+int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
+{
+	int ret = 1; /* assume success */
+	if(unlikely(w->owner == NULL) || !is_realtime(w->owner))
+	{
+		TRACE("%s: No owner associated with this work object!\n", __FUNCTION__);
+		BUG();
+	}
+
+	if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
+	{
+		TRACE("%s: No klitirqd_th%u!\n", k_id);
+		BUG();
+	}
+
+    if(unlikely(!klitirqd_is_ready()))
+    {
+        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
+        BUG();
+    }
+
+	if(likely(!klitirqds[k_id].terminating))
+		___litmus_schedule_work(w, &klitirqds[k_id], 1);
+	else
+		ret = 0;
+	return(ret);
+}
+EXPORT_SYMBOL(__litmus_schedule_work);
+
+
+static int set_klitirqd_sem_status(unsigned long stat)
+{
+	TRACE_CUR("SETTING STATUS FROM %d TO %d\n",
+					atomic_read(&tsk_rt(current)->klitirqd_sem_stat),
+					stat);
+	atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat);
+	//mb();
+
+	return(0);
+}
+
+static int set_klitirqd_sem_status_if_not_held(unsigned long stat)
+{
+	if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD)
+	{
+		return(set_klitirqd_sem_status(stat));
+	}
+	return(-1);
+}
+
+
+void __down_and_reset_and_set_stat(struct task_struct* t,
+					   enum klitirqd_sem_status to_reset,
+					   enum klitirqd_sem_status to_set,
+					   struct mutex* sem)
+{
+#if 0
+	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
+	struct task_struct* task = container_of(param, struct task_struct, rt_param);
+
+	TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
+					__FUNCTION__, task->comm, task->pid);
+#endif
+
+	mutex_lock_sfx(sem,
+				   set_klitirqd_sem_status_if_not_held, to_reset,
+				   set_klitirqd_sem_status, to_set);
+#if 0
+	TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
+					__FUNCTION__, task->comm, task->pid);
+#endif
+}
+
+void down_and_set_stat(struct task_struct* t,
+					   enum klitirqd_sem_status to_set,
+					   struct mutex* sem)
+{
+#if 0
+	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
+	struct task_struct* task = container_of(param, struct task_struct, rt_param);
+
+	TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
+					__FUNCTION__, task->comm, task->pid);
+#endif
+
+	mutex_lock_sfx(sem,
+				   NULL, 0,
+				   set_klitirqd_sem_status, to_set);
+
+#if 0
+	TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
+					__FUNCTION__, task->comm, task->pid);
+#endif
+}
+
+
+void up_and_set_stat(struct task_struct* t,
+					 enum klitirqd_sem_status to_set,
+					 struct mutex* sem)
+{
+#if 0
+	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
+	struct task_struct* task = container_of(param, struct task_struct, rt_param);
+
+	TRACE_CUR("%s: entered.  Unlocking semaphore of %s/%d\n",
+					__FUNCTION__,
+					task->comm, task->pid);
+#endif
+
+	mutex_unlock_sfx(sem, NULL, 0,
+					 set_klitirqd_sem_status, to_set);
+
+#if 0
+	TRACE_CUR("%s: exiting.  Unlocked semaphore of %s/%d\n",
+					__FUNCTION__,
+					task->comm, task->pid);
+#endif
+}
+
+
+
+void release_klitirqd_lock(struct task_struct* t)
+{
+	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD))
+	{
+		struct mutex* sem;
+		struct task_struct* owner = t;
+
+		if(t->state == TASK_RUNNING)
+		{
+			TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n");
+			return;
+		}
+
+		if(likely(!tsk_rt(t)->is_proxy_thread))
+		{
+			sem = &tsk_rt(t)->klitirqd_sem;
+		}
+		else
+		{
+			unsigned int k_id = klitirqd_id(t);
+			owner = klitirqds[k_id].current_owner;
+
+			BUG_ON(t != klitirqds[k_id].klitirqd);
+
+			if(likely(owner))
+			{
+				sem = &tsk_rt(owner)->klitirqd_sem;
+			}
+			else
+			{
+				BUG();
+
+				// We had the rug pulled out from under us.  Abort attempt
+				// to reacquire the lock since our client no longer needs us.
+				TRACE_CUR("HUH?!  How did this happen?\n");
+				atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
+				return;
+			}
+		}
+
+		//TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid);
+		up_and_set_stat(t, NEED_TO_REACQUIRE, sem);
+		//TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid);
+	}
+	/*
+	else if(is_realtime(t))
+	{
+		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
+	}
+	*/
+}
+
+int reacquire_klitirqd_lock(struct task_struct* t)
+{
+	int ret = 0;
+
+	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE))
+	{
+		struct mutex* sem;
+		struct task_struct* owner = t;
+
+		if(likely(!tsk_rt(t)->is_proxy_thread))
+		{
+			sem = &tsk_rt(t)->klitirqd_sem;
+		}
+		else
+		{
+			unsigned int k_id = klitirqd_id(t);
+			//struct task_struct* owner = klitirqds[k_id].current_owner;
+			owner = klitirqds[k_id].current_owner;
+
+			BUG_ON(t != klitirqds[k_id].klitirqd);
+
+			if(likely(owner))
+			{
+				sem = &tsk_rt(owner)->klitirqd_sem;
+			}
+			else
+			{
+				// We had the rug pulled out from under us.  Abort attempt
+				// to reacquire the lock since our client no longer needs us.
+				TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n");
+				atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
+				return(0);
+			}
+		}
+
+		//TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid);
+		__down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem);
+		//TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid);
+	}
+	/*
+	else if(is_realtime(t))
+	{
+		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
+	}
+	*/
+
+	return(ret);
+}
+
diff --git a/litmus/locking.c b/litmus/locking.c
index 0c1aa6aa40b7..718a5a3281d7 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -4,6 +4,15 @@
 
 #include <litmus/sched_plugin.h>
 #include <litmus/trace.h>
+#include <litmus/litmus.h>
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+#include <linux/uaccess.h>
+#endif
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#endif
 
 static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
 static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
@@ -17,6 +26,9 @@ struct fdso_ops generic_lock_ops = {
 	.destroy = destroy_generic_lock
 };
 
+static atomic_t lock_id_gen = ATOMIC_INIT(0);
+
+
 static inline bool is_lock(struct od_table_entry* entry)
 {
 	return entry->class == &generic_lock_ops;
@@ -34,8 +46,21 @@ static  int create_generic_lock(void** obj_ref, obj_type_t type, void* __user ar
 	int err;
 
 	err = litmus->allocate_lock(&lock, type, arg);
-	if (err == 0)
+	if (err == 0) {
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+		lock->nest.lock = lock;
+		lock->nest.hp_waiter_eff_prio = NULL;
+
+		INIT_BINHEAP_NODE(&lock->nest.hp_binheap_node);
+		if(!lock->nest.hp_waiter_ptr) {
+			TRACE_CUR("BEWARE: hp_waiter_ptr should probably not be NULL in "
+					  "most uses. (exception: IKGLP donors)\n");
+		}
+#endif
+		lock->type = type;
+		lock->ident = atomic_inc_return(&lock_id_gen);
 		*obj_ref = lock;
+    }
 	return err;
 }
 
@@ -74,7 +99,8 @@ asmlinkage long sys_litmus_lock(int lock_od)
 	entry = get_entry_for_od(lock_od);
 	if (entry && is_lock(entry)) {
 		l = get_lock(entry);
-		TRACE_CUR("attempts to lock 0x%p\n", l);
+		//TRACE_CUR("attempts to lock 0x%p\n", l);
+		TRACE_CUR("attempts to lock %d\n", l->ident);
 		err = l->ops->lock(l);
 	}
 
@@ -96,7 +122,8 @@ asmlinkage long sys_litmus_unlock(int lock_od)
 	entry = get_entry_for_od(lock_od);
 	if (entry && is_lock(entry)) {
 		l = get_lock(entry);
-		TRACE_CUR("attempts to unlock 0x%p\n", l);
+		//TRACE_CUR("attempts to unlock 0x%p\n", l);
+		TRACE_CUR("attempts to unlock %d\n", l->ident);
 		err = l->ops->unlock(l);
 	}
 
@@ -121,8 +148,366 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
 	return(t);
 }
 
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+
+void print_hp_waiters(struct binheap_node* n, int depth)
+{
+	struct litmus_lock *l;
+	struct nested_info *nest;
+	char padding[81] = "                                                                                ";
+	struct task_struct *hp = NULL;
+	struct task_struct *hp_eff = NULL;
+	struct task_struct *node_prio = NULL;
+
+
+	if(n == NULL) {
+		TRACE("+-> %p\n", NULL);
+		return;
+	}
+
+	nest = binheap_entry(n, struct nested_info, hp_binheap_node);
+	l = nest->lock;
+
+	if(depth*2 <= 80)
+		padding[depth*2] = '\0';
+
+	if(nest->hp_waiter_ptr && *(nest->hp_waiter_ptr)) {
+		hp = *(nest->hp_waiter_ptr);
+
+		if(tsk_rt(hp)->inh_task) {
+			hp_eff = tsk_rt(hp)->inh_task;
+		}
+	}
+
+	node_prio = nest->hp_waiter_eff_prio;
+
+	TRACE("%s+-> %s/%d [waiter = %s/%d] [waiter's inh = %s/%d] (lock = %d)\n",
+		  padding,
+		  (node_prio) ? node_prio->comm : "nil",
+		  (node_prio) ? node_prio->pid : -1,
+		  (hp) ? hp->comm : "nil",
+		  (hp) ? hp->pid : -1,
+		  (hp_eff) ? hp_eff->comm : "nil",
+		  (hp_eff) ? hp_eff->pid : -1,
+		  l->ident);
+
+    if(n->left) print_hp_waiters(n->left, depth+1);
+    if(n->right) print_hp_waiters(n->right, depth+1);
+}
+#endif
+
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+
+void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
+{
+	/*
+	 We pick the next lock in reverse order. This causes inheritance propagation
+	 from locks received earlier to flow in the same direction as regular nested
+	 locking. This might make fine-grain DGL easier in the future.
+	 */
+
+	BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock);
+
+	//WARN_ON(dgl_wait->locks[dgl_wait->last_primary] != prev_lock);
+
+	// note reverse order
+	for(dgl_wait->last_primary = dgl_wait->last_primary - 1;
+		dgl_wait->last_primary >= 0;
+		--(dgl_wait->last_primary)){
+		if(!dgl_wait->locks[dgl_wait->last_primary]->ops->is_owner(
+				dgl_wait->locks[dgl_wait->last_primary], dgl_wait->task)) {
+
+			tsk_rt(dgl_wait->task)->blocked_lock =
+					dgl_wait->locks[dgl_wait->last_primary];
+			mb();
+
+			TRACE_CUR("New blocked lock is %d\n",
+					  dgl_wait->locks[dgl_wait->last_primary]->ident);
+
+			break;
+		}
+	}
+}
+
+int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key)
+{
+	// should never be called.
+	BUG();
+	return 1;
+}
+
+void __waitqueue_dgl_remove_first(wait_queue_head_t *wq,
+								  dgl_wait_state_t** dgl_wait,
+								  struct task_struct **task)
+{
+	wait_queue_t *q;
+
+	*dgl_wait = NULL;
+	*task = NULL;
+
+	if (waitqueue_active(wq)) {
+		q = list_entry(wq->task_list.next,
+					   wait_queue_t, task_list);
+
+		if(q->func == dgl_wake_up) {
+			*dgl_wait = (dgl_wait_state_t*) q->private;
+		}
+		else {
+			*task = (struct task_struct*) q->private;
+		}
+
+		__remove_wait_queue(wq, q);
+	}
+}
+
+void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait)
+{
+	init_waitqueue_entry(wq_node, dgl_wait->task);
+	wq_node->private = dgl_wait;
+	wq_node->func = dgl_wake_up;
+}
+
+
+static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
+{
+	int i;
+	unsigned long irqflags; //, dummyflags;
+	raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
+
+	BUG_ON(dgl_wait->task != current);
+
+	raw_spin_lock_irqsave(dgl_lock, irqflags);
+
+
+	dgl_wait->nr_remaining = dgl_wait->size;
+
+	TRACE_CUR("Locking DGL with size %d\n", dgl_wait->size);
+
+	// try to acquire each lock.  enqueue (non-blocking) if it is unavailable.
+	for(i = 0; i < dgl_wait->size; ++i) {
+		struct litmus_lock *l = dgl_wait->locks[i];
+
+		// dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks.
+
+		if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) {
+			--(dgl_wait->nr_remaining);
+			TRACE_CUR("Acquired lock %d immediatly.\n", l->ident);
+		}
+	}
+
+	if(dgl_wait->nr_remaining == 0) {
+		// acquired entire group immediatly
+		TRACE_CUR("Acquired all locks in DGL immediatly!\n");
+	}
+	else {
+
+		TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n",
+				  dgl_wait->nr_remaining);
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+		// KLUDGE: don't count this suspension as time in the critical gpu
+		// critical section
+		if(tsk_rt(dgl_wait->task)->held_gpus) {
+			tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1;
+		}
+#endif
+
+		// note reverse order.  see comments in select_next_lock for reason.
+		for(i = dgl_wait->size - 1; i >= 0; --i) {
+			struct litmus_lock *l = dgl_wait->locks[i];
+			if(!l->ops->is_owner(l, dgl_wait->task)) {  // double-check to be thread safe
+
+				TRACE_CUR("Activating priority inheritance on lock %d\n",
+						  l->ident);
+
+				TS_DGL_LOCK_SUSPEND;
+
+				l->ops->enable_priority(l, dgl_wait);
+				dgl_wait->last_primary = i;
+
+				TRACE_CUR("Suspending for lock %d\n", l->ident);
+
+				raw_spin_unlock_irqrestore(dgl_lock, irqflags);  // free dgl_lock before suspending
+
+				schedule();  // suspend!!!
+
+				TS_DGL_LOCK_RESUME;
+
+				TRACE_CUR("Woken up from DGL suspension.\n");
+
+				goto all_acquired;  // we should hold all locks when we wake up.
+			}
+		}
+
+		TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n");
+		//BUG();
+	}
+
+	raw_spin_unlock_irqrestore(dgl_lock, irqflags);
+
+all_acquired:
+
+	// FOR SANITY CHECK FOR TESTING
+//	for(i = 0; i < dgl_wait->size; ++i) {
+//		struct litmus_lock *l = dgl_wait->locks[i];
+//		BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
+//	}
+
+	TRACE_CUR("Acquired entire DGL\n");
+
+	return 0;
+}
+
+static int supports_dgl(struct litmus_lock *l)
+{
+	struct litmus_lock_ops* ops = l->ops;
+
+	return (ops->dgl_lock			&&
+			ops->is_owner			&&
+			ops->enable_priority);
+}
+
+asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
+{
+	struct task_struct *t = current;
+	long err = -EINVAL;
+	int dgl_ods[MAX_DGL_SIZE];
+	int i;
+
+	dgl_wait_state_t dgl_wait_state;  // lives on the stack until all resources in DGL are held.
+
+	if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
+		goto out;
+
+	if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
+		goto out;
+
+	if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
+		goto out;
+
+	if (!is_realtime(t)) {
+		err = -EPERM;
+		goto out;
+	}
+
+	for(i = 0; i < dgl_size; ++i) {
+		struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]);
+		if(entry && is_lock(entry)) {
+			dgl_wait_state.locks[i] = get_lock(entry);
+			if(!supports_dgl(dgl_wait_state.locks[i])) {
+				TRACE_CUR("Lock %d does not support all required DGL operations.\n",
+						  dgl_wait_state.locks[i]->ident);
+				goto out;
+			}
+		}
+		else {
+			TRACE_CUR("Invalid lock identifier\n");
+			goto out;
+		}
+	}
+
+	dgl_wait_state.task = t;
+	dgl_wait_state.size = dgl_size;
+
+	TS_DGL_LOCK_START;
+	err = do_litmus_dgl_lock(&dgl_wait_state);
+
+	/* Note: task my have been suspended or preempted in between!  Take
+	 * this into account when computing overheads. */
+	TS_DGL_LOCK_END;
+
+out:
+	return err;
+}
+
+static long do_litmus_dgl_unlock(struct litmus_lock* dgl_locks[], int dgl_size)
+{
+	int i;
+	long err = 0;
+
+	TRACE_CUR("Unlocking a DGL of %d size\n", dgl_size);
+
+	for(i = dgl_size - 1; i >= 0; --i) {  // unlock in reverse order
+
+		struct litmus_lock *l = dgl_locks[i];
+		long tmp_err;
+
+		TRACE_CUR("Unlocking lock %d of DGL.\n", l->ident);
+
+		tmp_err = l->ops->unlock(l);
+
+		if(tmp_err) {
+			TRACE_CUR("There was an error unlocking %d: %d.\n", l->ident, tmp_err);
+			err = tmp_err;
+		}
+	}
+
+	TRACE_CUR("DGL unlocked. err = %d\n", err);
+
+	return err;
+}
+
+asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
+{
+	long err = -EINVAL;
+	int dgl_ods[MAX_DGL_SIZE];
+	struct od_table_entry* entry;
+	int i;
+
+	struct litmus_lock* dgl_locks[MAX_DGL_SIZE];
+
+	if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
+		goto out;
+
+	if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
+		goto out;
+
+	if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
+		goto out;
+
+	for(i = 0; i < dgl_size; ++i) {
+		entry = get_entry_for_od(dgl_ods[i]);
+		if(entry && is_lock(entry)) {
+			dgl_locks[i] = get_lock(entry);
+			if(!supports_dgl(dgl_locks[i])) {
+				TRACE_CUR("Lock %d does not support all required DGL operations.\n",
+						  dgl_locks[i]->ident);
+				goto out;
+			}
+		}
+		else {
+			TRACE_CUR("Invalid lock identifier\n");
+			goto out;
+		}
+	}
+
+	TS_DGL_UNLOCK_START;
+	err = do_litmus_dgl_unlock(dgl_locks, dgl_size);
+
+	/* Note: task my have been suspended or preempted in between!  Take
+	 * this into account when computing overheads. */
+	TS_DGL_UNLOCK_END;
+
+out:
+	return err;
+}
+
+#else  // CONFIG_LITMUS_DGL_SUPPORT
+
+asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
+{
+	return -ENOSYS;
+}
+
+#endif
 
-#else
+#else  // CONFIG_LITMUS_LOCKING
 
 struct fdso_ops generic_lock_ops = {};
 
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
new file mode 100644
index 000000000000..4b86a50d3bd1
--- /dev/null
+++ b/litmus/nvidia_info.c
@@ -0,0 +1,597 @@
+#include <linux/module.h>
+#include <linux/semaphore.h>
+#include <linux/pci.h>
+
+#include <litmus/sched_trace.h>
+#include <litmus/nvidia_info.h>
+#include <litmus/litmus.h>
+
+#include <litmus/sched_plugin.h>
+
+#include <litmus/binheap.h>
+
+typedef unsigned char      NvV8;  /* "void": enumerated or multiple fields   */
+typedef unsigned short     NvV16; /* "void": enumerated or multiple fields   */
+typedef unsigned char      NvU8;  /* 0 to 255                                */
+typedef unsigned short     NvU16; /* 0 to 65535                              */
+typedef signed char        NvS8;  /* -128 to 127                             */
+typedef signed short       NvS16; /* -32768 to 32767                         */
+typedef float              NvF32; /* IEEE Single Precision (S1E8M23)         */
+typedef double             NvF64; /* IEEE Double Precision (S1E11M52)        */
+typedef unsigned int       NvV32; /* "void": enumerated or multiple fields   */
+typedef unsigned int       NvU32; /* 0 to 4294967295                         */
+typedef unsigned long long NvU64; /* 0 to 18446744073709551615          */
+typedef union
+{
+    volatile NvV8 Reg008[1];
+    volatile NvV16 Reg016[1];
+    volatile NvV32 Reg032[1];
+} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;
+
+typedef struct
+{
+    NvU64 address;
+    NvU64 size;
+    NvU32 offset;
+    NvU32 *map;
+    litmus_nv_phwreg_t map_u;
+} litmus_nv_aperture_t;
+
+typedef struct
+{
+    void  *priv;                    /* private data */
+    void  *os_state;                /* os-specific device state */
+
+    int    rmInitialized;
+    int    flags;
+
+    /* PCI config info */
+    NvU32 domain;
+    NvU16 bus;
+    NvU16 slot;
+    NvU16 vendor_id;
+    NvU16 device_id;
+    NvU16 subsystem_id;
+    NvU32 gpu_id;
+    void *handle;
+
+    NvU32 pci_cfg_space[16];
+
+    /* physical characteristics */
+    litmus_nv_aperture_t bars[3];
+    litmus_nv_aperture_t *regs;
+    litmus_nv_aperture_t *fb, ud;
+    litmus_nv_aperture_t agp;
+
+    NvU32  interrupt_line;
+
+    NvU32 agp_config;
+    NvU32 agp_status;
+
+    NvU32 primary_vga;
+
+    NvU32 sim_env;
+
+    NvU32 rc_timer_enabled;
+
+    /* list of events allocated for this device */
+    void *event_list;
+
+    void *kern_mappings;
+
+} litmus_nv_state_t;
+
+typedef struct work_struct litmus_nv_task_t;
+
+typedef struct litmus_nv_work_s {
+    litmus_nv_task_t task;
+    void *data;
+} litmus_nv_work_t;
+
+typedef struct litmus_nv_linux_state_s {
+    litmus_nv_state_t nv_state;
+    atomic_t usage_count;
+
+    struct pci_dev *dev;
+    void *agp_bridge;
+    void *alloc_queue;
+
+    void *timer_sp;
+    void *isr_sp;
+    void *pci_cfgchk_sp;
+    void *isr_bh_sp;
+
+#ifdef CONFIG_CUDA_4_0
+	char registry_keys[512];
+#endif
+
+    /* keep track of any pending bottom halfes */
+    struct tasklet_struct tasklet;
+    litmus_nv_work_t work;
+
+    /* get a timer callback every second */
+    struct timer_list rc_timer;
+
+    /* lock for linux-specific data, not used by core rm */
+    struct semaphore ldata_lock;
+
+    /* lock for linux-specific alloc queue */
+    struct semaphore at_lock;
+
+#if 0
+#if defined(NV_USER_MAP)
+    /* list of user mappings */
+    struct nv_usermap_s *usermap_list;
+
+    /* lock for VMware-specific mapping list */
+    struct semaphore mt_lock;
+#endif /* defined(NV_USER_MAP) */
+#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
+	void *apm_nv_dev;
+#endif
+#endif
+
+    NvU32 device_num;
+    struct litmus_nv_linux_state_s *next;
+} litmus_nv_linux_state_t;
+
+void dump_nvidia_info(const struct tasklet_struct *t)
+{
+	litmus_nv_state_t* nvstate = NULL;
+	litmus_nv_linux_state_t* linuxstate =  NULL;
+	struct pci_dev* pci = NULL;
+
+	nvstate = (litmus_nv_state_t*)(t->data);
+
+	if(nvstate)
+	{
+		TRACE("NV State:\n"
+			  "\ttasklet ptr = %p\n"
+			  "\tstate ptr = %p\n"
+			  "\tprivate data ptr = %p\n"
+			  "\tos state ptr = %p\n"
+			  "\tdomain = %u\n"
+			  "\tbus = %u\n"
+			  "\tslot = %u\n"
+			  "\tvender_id = %u\n"
+			  "\tdevice_id = %u\n"
+			  "\tsubsystem_id = %u\n"
+			  "\tgpu_id = %u\n"
+			  "\tinterrupt_line = %u\n",
+			  t,
+			  nvstate,
+			  nvstate->priv,
+			  nvstate->os_state,
+			  nvstate->domain,
+			  nvstate->bus,
+			  nvstate->slot,
+			  nvstate->vendor_id,
+			  nvstate->device_id,
+			  nvstate->subsystem_id,
+			  nvstate->gpu_id,
+			  nvstate->interrupt_line);
+
+		linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
+	}
+	else
+	{
+		TRACE("INVALID NVSTATE????\n");
+	}
+
+	if(linuxstate)
+	{
+		int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
+		int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
+		int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
+
+
+		TRACE("LINUX NV State:\n"
+			  "\tlinux nv state ptr: %p\n"
+			  "\taddress of tasklet: %p\n"
+			  "\taddress of work: %p\n"
+			  "\tusage_count: %d\n"
+			  "\tdevice_num: %u\n"
+			  "\ttasklet addr == this tasklet: %d\n"
+			  "\tpci: %p\n",
+			  linuxstate,
+			  &(linuxstate->tasklet),
+			  &(linuxstate->work),
+			  atomic_read(&(linuxstate->usage_count)),
+			  linuxstate->device_num,
+			  (t == &(linuxstate->tasklet)),
+			  linuxstate->dev);
+
+		pci = linuxstate->dev;
+
+		TRACE("Offsets:\n"
+			  "\tOffset from LinuxState: %d, %x\n"
+			  "\tOffset from NVState: %d, %x\n"
+			  "\tOffset from parameter: %d, %x\n"
+			  "\tdevice_num: %u\n",
+			  ls_offset, ls_offset,
+			  ns_offset_raw, ns_offset_raw,
+			  ns_offset_desired, ns_offset_desired,
+			  *((u32*)((void*)nvstate + ns_offset_desired)));
+	}
+	else
+	{
+		TRACE("INVALID LINUXNVSTATE?????\n");
+	}
+
+#if 0
+	if(pci)
+	{
+		TRACE("PCI DEV Info:\n"
+			  "pci device ptr: %p\n"
+			  "\tdevfn = %d\n"
+			  "\tvendor = %d\n"
+			  "\tdevice = %d\n"
+			  "\tsubsystem_vendor = %d\n"
+			  "\tsubsystem_device = %d\n"
+			  "\tslot # = %d\n",
+			  pci,
+			  pci->devfn,
+			  pci->vendor,
+			  pci->device,
+			  pci->subsystem_vendor,
+			  pci->subsystem_device,
+			  pci->slot->number);
+	}
+	else
+	{
+		TRACE("INVALID PCIDEV PTR?????\n");
+	}
+#endif
+}
+
+static struct module* nvidia_mod = NULL;
+int init_nvidia_info(void)
+{
+	mutex_lock(&module_mutex);
+	nvidia_mod = find_module("nvidia");
+	mutex_unlock(&module_mutex);
+	if(nvidia_mod != NULL)
+	{
+		TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
+			  (void*)(nvidia_mod->module_core),
+			  (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
+		init_nv_device_reg();
+		return(0);
+	}
+	else
+	{
+		TRACE("%s : Could not find NVIDIA module!  Loaded?\n", __FUNCTION__);
+		return(-1);
+	}
+}
+
+void shutdown_nvidia_info(void)
+{
+	nvidia_mod = NULL;
+	mb();
+}
+
+/* works with pointers to static data inside the module too. */
+int is_nvidia_func(void* func_addr)
+{
+	int ret = 0;
+	if(nvidia_mod)
+	{
+		ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
+		/*
+		if(ret)
+		{
+			TRACE("%s : %p is in NVIDIA module: %d\n",
+			  	__FUNCTION__, func_addr, ret);
+		}*/
+	}
+
+	return(ret);
+}
+
+u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
+{
+	// life is too short to use hard-coded offsets.  update this later.
+	litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
+	litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
+
+	BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
+
+	return(linuxstate->device_num);
+
+	//int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
+
+#if 0
+	// offset determined though observed behavior of the NV driver.
+	//const int DEVICE_NUM_OFFSET = 0x480;  // CUDA 4.0 RC1
+	//const int DEVICE_NUM_OFFSET = 0x510;  // CUDA 4.0 RC2
+
+	void* state = (void*)(t->data);
+	void* device_num_ptr = state + DEVICE_NUM_OFFSET;
+
+	//dump_nvidia_info(t);
+	return(*((u32*)device_num_ptr));
+#endif
+}
+
+u32 get_work_nv_device_num(const struct work_struct *t)
+{
+	// offset determined though observed behavior of the NV driver.
+	const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
+	void* state = (void*)(t);
+	void** device_num_ptr = state + DEVICE_NUM_OFFSET;
+	return(*((u32*)(*device_num_ptr)));
+}
+
+
+typedef struct {
+	raw_spinlock_t	lock;
+	int	nr_owners;
+	struct task_struct* max_prio_owner;
+	struct task_struct*	owners[NV_MAX_SIMULT_USERS];
+}nv_device_registry_t;
+
+static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
+
+int init_nv_device_reg(void)
+{
+	int i;
+
+	memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
+
+	for(i = 0; i < NV_DEVICE_NUM; ++i)
+	{
+		raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
+	}
+
+	return(1);
+}
+
+/* use to get nv_device_id by given owner.
+ (if return -1, can't get the assocaite device id)*/
+/*
+int get_nv_device_id(struct task_struct* owner)
+{
+	int i;
+	if(!owner)
+	{
+		return(-1);
+	}
+	for(i = 0; i < NV_DEVICE_NUM; ++i)
+	{
+		if(NV_DEVICE_REG[i].device_owner == owner)
+			return(i);
+	}
+	return(-1);
+}
+*/
+
+static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_struct *skip) {
+	int i;
+	struct task_struct *found = NULL;
+	for(i = 0; i < reg->nr_owners; ++i) {
+		if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
+			found = reg->owners[i];
+		}
+	}
+	return found;
+}
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
+{
+	unsigned long flags;
+	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
+
+	if(reg->max_prio_owner != t) {
+
+		raw_spin_lock_irqsave(&reg->lock, flags);
+
+		if(reg->max_prio_owner != t) {
+			if(litmus->compare(t, reg->max_prio_owner)) {
+				litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
+				reg->max_prio_owner = t;
+			}
+		}
+
+		raw_spin_unlock_irqrestore(&reg->lock, flags);
+	}
+}
+
+
+void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
+{
+	unsigned long flags;
+	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
+
+	if(reg->max_prio_owner == t) {
+
+		raw_spin_lock_irqsave(&reg->lock, flags);
+
+		if(reg->max_prio_owner == t) {
+			reg->max_prio_owner = find_hp_owner(reg, NULL);
+			if(reg->max_prio_owner != t) {
+				litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
+			}
+		}
+
+		raw_spin_unlock_irqrestore(&reg->lock, flags);
+	}
+}
+#endif
+
+static int __reg_nv_device(int reg_device_id, struct task_struct *t)
+{
+	int ret = 0;
+	int i;
+	struct task_struct *old_max = NULL;
+	unsigned long flags;
+	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
+
+    if(test_bit(reg_device_id, &tsk_rt(t)->held_gpus)) {
+		// TODO: check if taks is already registered.
+		return ret;  // assume already registered.
+	}
+
+
+	raw_spin_lock_irqsave(&reg->lock, flags);
+
+	if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
+		TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
+		for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
+			if(reg->owners[i] == NULL) {
+				reg->owners[i] = t;
+
+				//if(edf_higher_prio(t, reg->max_prio_owner)) {
+				if(litmus->compare(t, reg->max_prio_owner)) {
+					old_max = reg->max_prio_owner;
+					reg->max_prio_owner = t;
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+					litmus->change_prio_pai_tasklet(old_max, t);
+#endif
+				}
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+				down_and_set_stat(t, HELD, &tsk_rt(t)->klitirqd_sem);
+#endif
+				++(reg->nr_owners);
+
+				break;
+			}
+		}
+	}
+	else
+	{
+		TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
+		//ret = -EBUSY;
+	}
+
+	raw_spin_unlock_irqrestore(&reg->lock, flags);
+
+	__set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
+
+	return(ret);
+}
+
+static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
+{
+	int ret = 0;
+	int i;
+	unsigned long flags;
+	nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+    struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
+#endif
+
+	if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
+		return ret;
+	}
+
+	raw_spin_lock_irqsave(&reg->lock, flags);
+
+	TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
+
+	for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
+		if(reg->owners[i] == t) {
+#ifdef CONFIG_LITMUS_SOFTIRQD
+			flush_pending(klitirqd_th, t);
+#endif
+			if(reg->max_prio_owner == t) {
+				reg->max_prio_owner = find_hp_owner(reg, t);
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+				litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
+#endif
+			}
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+			up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klitirqd_sem);
+#endif
+
+			reg->owners[i] = NULL;
+			--(reg->nr_owners);
+
+			break;
+		}
+	}
+
+	raw_spin_unlock_irqrestore(&reg->lock, flags);
+
+	__clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
+
+	return(ret);
+}
+
+
+int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
+{
+	int ret;
+
+	if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
+	{
+		if(reg_action)
+			ret = __reg_nv_device(reg_device_id, t);
+		else
+			ret = __clear_reg_nv_device(reg_device_id, t);
+	}
+	else
+	{
+		ret = -ENODEV;
+	}
+
+	return(ret);
+}
+
+/* use to get the owner of nv_device_id. */
+struct task_struct* get_nv_max_device_owner(u32 target_device_id)
+{
+	struct task_struct *owner = NULL;
+	BUG_ON(target_device_id >= NV_DEVICE_NUM);
+	owner = NV_DEVICE_REG[target_device_id].max_prio_owner;
+	return(owner);
+}
+
+void lock_nv_registry(u32 target_device_id, unsigned long* flags)
+{
+	BUG_ON(target_device_id >= NV_DEVICE_NUM);
+
+	if(in_interrupt())
+		TRACE("Locking registry for %d.\n", target_device_id);
+	else
+		TRACE_CUR("Locking registry for %d.\n", target_device_id);
+
+	raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
+}
+
+void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
+{
+	BUG_ON(target_device_id >= NV_DEVICE_NUM);
+
+	if(in_interrupt())
+		TRACE("Unlocking registry for %d.\n", target_device_id);
+	else
+		TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
+
+	raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
+}
+
+
+//void increment_nv_int_count(u32 device)
+//{
+//	unsigned long flags;
+//	struct task_struct* owner;
+//
+//	lock_nv_registry(device, &flags);
+//
+//	owner = NV_DEVICE_REG[device].device_owner;
+//	if(owner)
+//	{
+//		atomic_inc(&tsk_rt(owner)->nv_int_count);
+//	}
+//
+//	unlock_nv_registry(device, &flags);
+//}
+//EXPORT_SYMBOL(increment_nv_int_count);
+
+
diff --git a/litmus/preempt.c b/litmus/preempt.c
index 5704d0bf4c0b..28368d5bc046 100644
--- a/litmus/preempt.c
+++ b/litmus/preempt.c
@@ -30,6 +30,7 @@ void sched_state_will_schedule(struct task_struct* tsk)
 		/* Litmus tasks should never be subject to a remote
 		 * set_tsk_need_resched(). */
 		BUG_ON(is_realtime(tsk));
+
 #ifdef CONFIG_PREEMPT_STATE_TRACE
 	TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
 		   __builtin_return_address(0));
@@ -45,13 +46,17 @@ void sched_state_ipi(void)
 		/* Cause scheduler to be invoked.
 		 * This will cause a transition to WILL_SCHEDULE. */
 		set_tsk_need_resched(current);
+		/*
 		TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
 			    current->comm, current->pid);
+		*/
 	} else {
 		/* ignore */
+		/*
 		TRACE_STATE("ignoring IPI in state %x (%s)\n",
 			    get_sched_state(),
 			    sched_state_name(get_sched_state()));
+		*/
 	}
 }
 
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
new file mode 100644
index 000000000000..75ed87c5ed48
--- /dev/null
+++ b/litmus/rsm_lock.c
@@ -0,0 +1,796 @@
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <litmus/trace.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/rsm_lock.h>
+
+//#include <litmus/edf_common.h>
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#endif
+
+
+/* caller is responsible for locking */
+static struct task_struct* rsm_mutex_find_hp_waiter(struct rsm_mutex *mutex,
+                                             struct task_struct* skip)
+{
+    wait_queue_t        *q;
+    struct list_head    *pos;
+    struct task_struct  *queued = NULL, *found = NULL;
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+    dgl_wait_state_t    *dgl_wait = NULL;
+#endif
+
+    list_for_each(pos, &mutex->wait.task_list) {
+        q = list_entry(pos, wait_queue_t, task_list);
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+        if(q->func == dgl_wake_up) {
+            dgl_wait = (dgl_wait_state_t*) q->private;
+            if(tsk_rt(dgl_wait->task)->blocked_lock == &mutex->litmus_lock) {
+                queued = dgl_wait->task;
+            }
+            else {
+                queued = NULL;  // skip it.
+            }
+        }
+        else {
+            queued = (struct task_struct*) q->private;
+        }
+#else
+        queued = (struct task_struct*) q->private;
+#endif
+
+        /* Compare task prios, find high prio task. */
+        //if (queued && queued != skip && edf_higher_prio(queued, found)) {
+		if (queued && queued != skip && litmus->compare(queued, found)) {
+            found = queued;
+        }
+    }
+    return found;
+}
+
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+
+int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t)
+{
+	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
+	return(mutex->owner == t);
+}
+
+// return 1 if resource was immediatly acquired.
+// Assumes mutex->lock is held.
+// Must set task state to TASK_UNINTERRUPTIBLE if task blocks.
+int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait,
+					   wait_queue_t* wq_node)
+{
+	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
+	struct task_struct *t = dgl_wait->task;
+
+	int acquired_immediatly = 0;
+
+	BUG_ON(t != current);
+
+	if (mutex->owner) {
+		TRACE_TASK(t, "Enqueuing on lock %d.\n", l->ident);
+
+		init_dgl_waitqueue_entry(wq_node, dgl_wait);
+
+		set_task_state(t, TASK_UNINTERRUPTIBLE);
+		__add_wait_queue_tail_exclusive(&mutex->wait, wq_node);
+	} else {
+		TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
+
+		/* it's ours now */
+		mutex->owner = t;
+
+		raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
+		binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
+					struct nested_info, hp_binheap_node);
+		raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
+
+		acquired_immediatly = 1;
+	}
+
+	return acquired_immediatly;
+}
+
+void rsm_mutex_enable_priority(struct litmus_lock *l,
+							   dgl_wait_state_t* dgl_wait)
+{
+	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
+	struct task_struct *t = dgl_wait->task;
+	struct task_struct *owner = mutex->owner;
+	unsigned long flags = 0;  // these are unused under DGL coarse-grain locking
+
+	BUG_ON(owner == t);
+
+	tsk_rt(t)->blocked_lock = l;
+	mb();
+
+	//if (edf_higher_prio(t, mutex->hp_waiter)) {
+	if (litmus->compare(t, mutex->hp_waiter)) {
+
+		struct task_struct *old_max_eff_prio;
+		struct task_struct *new_max_eff_prio;
+		struct task_struct *new_prio = NULL;
+
+		if(mutex->hp_waiter)
+			TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
+					   mutex->hp_waiter->comm, mutex->hp_waiter->pid);
+		else
+			TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
+
+		raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+
+		old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+		mutex->hp_waiter = t;
+		l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
+		binheap_decrease(&l->nest.hp_binheap_node,
+						 &tsk_rt(owner)->hp_blocked_tasks);
+		new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+
+		if(new_max_eff_prio != old_max_eff_prio) {
+			TRACE_TASK(t, "is new hp_waiter.\n");
+
+			if ((effective_priority(owner) == old_max_eff_prio) ||
+				//(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
+				(litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
+				new_prio = new_max_eff_prio;
+			}
+		}
+		else {
+			TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
+		}
+
+		if(new_prio) {
+			litmus->nested_increase_prio(owner, new_prio,
+										 &mutex->lock, flags);  // unlocks lock.
+		}
+		else {
+			raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+			unlock_fine_irqrestore(&mutex->lock, flags);
+		}
+	}
+	else {
+		TRACE_TASK(t, "no change in hp_waiter.\n");
+		unlock_fine_irqrestore(&mutex->lock, flags);
+	}
+}
+
+static void select_next_lock_if_primary(struct litmus_lock *l,
+										dgl_wait_state_t *dgl_wait)
+{
+	if(tsk_rt(dgl_wait->task)->blocked_lock == l) {
+		TRACE_CUR("Lock %d in DGL was primary for %s/%d.\n",
+				  l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
+		tsk_rt(dgl_wait->task)->blocked_lock = NULL;
+		mb();
+		select_next_lock(dgl_wait /*, l*/);  // pick the next lock to be blocked on
+	}
+	else {
+		TRACE_CUR("Got lock early! Lock %d in DGL was NOT primary for %s/%d.\n",
+				  l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
+	}
+}
+#endif
+
+
+
+
+int rsm_mutex_lock(struct litmus_lock* l)
+{
+	struct task_struct *t = current;
+	struct task_struct *owner;
+	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
+	wait_queue_t wait;
+	unsigned long flags;
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	raw_spinlock_t *dgl_lock;
+#endif
+
+	if (!is_realtime(t))
+		return -EPERM;
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	dgl_lock = litmus->get_dgl_spinlock(t);
+#endif
+
+	lock_global_irqsave(dgl_lock, flags);
+	lock_fine_irqsave(&mutex->lock, flags);
+
+	if (mutex->owner) {
+		TRACE_TASK(t, "Blocking on lock %d.\n", l->ident);
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+		// KLUDGE: don't count this suspension as time in the critical gpu
+		// critical section
+		if(tsk_rt(t)->held_gpus) {
+			tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
+		}
+#endif
+
+		/* resource is not free => must suspend and wait */
+
+		owner = mutex->owner;
+
+		init_waitqueue_entry(&wait, t);
+
+		tsk_rt(t)->blocked_lock = l;  /* record where we are blocked */
+		mb();  // needed?
+
+		/* FIXME: interruptible would be nice some day */
+		set_task_state(t, TASK_UNINTERRUPTIBLE);
+
+		__add_wait_queue_tail_exclusive(&mutex->wait, &wait);
+
+		/* check if we need to activate priority inheritance */
+		//if (edf_higher_prio(t, mutex->hp_waiter)) {
+		if (litmus->compare(t, mutex->hp_waiter)) {
+
+			struct task_struct *old_max_eff_prio;
+			struct task_struct *new_max_eff_prio;
+			struct task_struct *new_prio = NULL;
+
+			if(mutex->hp_waiter)
+				TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
+						   mutex->hp_waiter->comm, mutex->hp_waiter->pid);
+			else
+				TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
+
+			raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+
+			old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+			mutex->hp_waiter = t;
+			l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
+			binheap_decrease(&l->nest.hp_binheap_node,
+							 &tsk_rt(owner)->hp_blocked_tasks);
+			new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+
+			if(new_max_eff_prio != old_max_eff_prio) {
+				TRACE_TASK(t, "is new hp_waiter.\n");
+
+				if ((effective_priority(owner) == old_max_eff_prio) ||
+					//(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
+					(litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
+					new_prio = new_max_eff_prio;
+				}
+			}
+			else {
+				TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
+			}
+
+			if(new_prio) {
+				litmus->nested_increase_prio(owner, new_prio, &mutex->lock,
+											 flags);  // unlocks lock.
+			}
+			else {
+				raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+				unlock_fine_irqrestore(&mutex->lock, flags);
+			}
+		}
+		else {
+			TRACE_TASK(t, "no change in hp_waiter.\n");
+
+			unlock_fine_irqrestore(&mutex->lock, flags);
+		}
+
+		unlock_global_irqrestore(dgl_lock, flags);
+
+		TS_LOCK_SUSPEND;
+
+		/* We depend on the FIFO order.  Thus, we don't need to recheck
+		 * when we wake up; we are guaranteed to have the lock since
+		 * there is only one wake up per release.
+		 */
+
+		schedule();
+
+		TS_LOCK_RESUME;
+
+		/* Since we hold the lock, no other task will change
+		 * ->owner. We can thus check it without acquiring the spin
+		 * lock. */
+		BUG_ON(mutex->owner != t);
+
+		TRACE_TASK(t, "Acquired lock %d.\n", l->ident);
+
+	} else {
+		TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
+
+		/* it's ours now */
+		mutex->owner = t;
+
+		raw_spin_lock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
+		binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
+					struct nested_info, hp_binheap_node);
+		raw_spin_unlock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
+
+
+		unlock_fine_irqrestore(&mutex->lock, flags);
+		unlock_global_irqrestore(dgl_lock, flags);
+	}
+
+	return 0;
+}
+
+
+
+int rsm_mutex_unlock(struct litmus_lock* l)
+{
+	struct task_struct *t = current, *next = NULL;
+	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
+	unsigned long flags;
+
+	struct task_struct *old_max_eff_prio;
+
+	int wake_up_task = 1;
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	dgl_wait_state_t *dgl_wait = NULL;
+	raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
+#endif
+
+	int err = 0;
+
+	if (mutex->owner != t) {
+		err = -EINVAL;
+		return err;
+	}
+
+	lock_global_irqsave(dgl_lock, flags);
+	lock_fine_irqsave(&mutex->lock, flags);
+
+	raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
+
+	TRACE_TASK(t, "Freeing lock %d\n", l->ident);
+
+	old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
+	binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks);
+
+	if(tsk_rt(t)->inh_task){
+		struct task_struct *new_max_eff_prio =
+			top_priority(&tsk_rt(t)->hp_blocked_tasks);
+
+		if((new_max_eff_prio == NULL) ||
+		      /* there was a change in eff prio */
+		   (  (new_max_eff_prio != old_max_eff_prio) &&
+			/* and owner had the old eff prio */
+			  (effective_priority(t) == old_max_eff_prio))  )
+		{
+			// old_max_eff_prio > new_max_eff_prio
+
+			//if(__edf_higher_prio(new_max_eff_prio, BASE, t, EFFECTIVE)) {
+			if(litmus->__compare(new_max_eff_prio, BASE, t, EFFECTIVE)) {
+				TRACE_TASK(t, "new_max_eff_prio > task's eff_prio-- new_max_eff_prio: %s/%d   task: %s/%d [%s/%d]\n",
+						   new_max_eff_prio->comm, new_max_eff_prio->pid,
+						   t->comm, t->pid, tsk_rt(t)->inh_task->comm,
+						   tsk_rt(t)->inh_task->pid);
+				WARN_ON(1);
+			}
+
+			litmus->decrease_prio(t, new_max_eff_prio);
+		}
+	}
+
+	if(binheap_empty(&tsk_rt(t)->hp_blocked_tasks) &&
+	   tsk_rt(t)->inh_task != NULL)
+	{
+		WARN_ON(tsk_rt(t)->inh_task != NULL);
+		TRACE_TASK(t, "No more locks are held, but eff_prio = %s/%d\n",
+				   tsk_rt(t)->inh_task->comm, tsk_rt(t)->inh_task->pid);
+	}
+
+	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
+
+
+	/* check if there are jobs waiting for this resource */
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	__waitqueue_dgl_remove_first(&mutex->wait, &dgl_wait, &next);
+	if(dgl_wait) {
+		next = dgl_wait->task;
+		//select_next_lock_if_primary(l, dgl_wait);
+	}
+#else
+	next = __waitqueue_remove_first(&mutex->wait);
+#endif
+	if (next) {
+		/* next becomes the resouce holder */
+		mutex->owner = next;
+		TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
+
+		/* determine new hp_waiter if necessary */
+		if (next == mutex->hp_waiter) {
+
+			TRACE_TASK(next, "was highest-prio waiter\n");
+			/* next has the highest priority --- it doesn't need to
+			 * inherit.  However, we need to make sure that the
+			 * next-highest priority in the queue is reflected in
+			 * hp_waiter. */
+			mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, next);
+			l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
+				effective_priority(mutex->hp_waiter) :
+				NULL;
+
+			if (mutex->hp_waiter)
+				TRACE_TASK(mutex->hp_waiter, "is new highest-prio waiter\n");
+			else
+				TRACE("no further waiters\n");
+
+			raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
+
+			binheap_add(&l->nest.hp_binheap_node,
+						&tsk_rt(next)->hp_blocked_tasks,
+						struct nested_info, hp_binheap_node);
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+			if(dgl_wait) {
+				select_next_lock_if_primary(l, dgl_wait);
+				//wake_up_task = atomic_dec_and_test(&dgl_wait->nr_remaining);
+				--(dgl_wait->nr_remaining);
+				wake_up_task = (dgl_wait->nr_remaining == 0);
+			}
+#endif
+			raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
+		}
+		else {
+			/* Well, if 'next' is not the highest-priority waiter,
+			 * then it (probably) ought to inherit the highest-priority
+			 * waiter's priority. */
+			TRACE_TASK(next, "is not hp_waiter of lock %d.\n", l->ident);
+
+			raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
+
+			binheap_add(&l->nest.hp_binheap_node,
+						&tsk_rt(next)->hp_blocked_tasks,
+						struct nested_info, hp_binheap_node);
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+			if(dgl_wait) {
+				select_next_lock_if_primary(l, dgl_wait);
+				--(dgl_wait->nr_remaining);
+				wake_up_task = (dgl_wait->nr_remaining == 0);
+			}
+#endif
+
+			/* It is possible that 'next' *should* be the hp_waiter, but isn't
+		     * because that update hasn't yet executed (update operation is
+			 * probably blocked on mutex->lock). So only inherit if the top of
+			 * 'next's top heap node is indeed the effective prio. of hp_waiter.
+			 * (We use l->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
+			 * since the effective priority of hp_waiter can change (and the
+			 * update has not made it to this lock).)
+			 */
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+			if((l->nest.hp_waiter_eff_prio != NULL) &&
+			   (top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
+													l->nest.hp_waiter_eff_prio))
+			{
+				if(dgl_wait && tsk_rt(next)->blocked_lock) {
+					BUG_ON(wake_up_task);
+					//if(__edf_higher_prio(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
+					if(litmus->__compare(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
+						litmus->nested_increase_prio(next,
+							l->nest.hp_waiter_eff_prio, &mutex->lock, flags);  // unlocks lock && hp_blocked_tasks_lock.
+						goto out;  // all spinlocks are released.  bail out now.
+					}
+				}
+				else {
+					litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
+				}
+			}
+
+			raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
+#else
+			if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
+													l->nest.hp_waiter_eff_prio))
+			{
+				litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
+			}
+			raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
+#endif
+		}
+
+		if(wake_up_task) {
+			TRACE_TASK(next, "waking up since it is no longer blocked.\n");
+
+			tsk_rt(next)->blocked_lock = NULL;
+			mb();
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+			// re-enable tracking
+			if(tsk_rt(next)->held_gpus) {
+				tsk_rt(next)->suspend_gpu_tracker_on_block = 0;
+			}
+#endif
+
+			wake_up_process(next);
+		}
+		else {
+			TRACE_TASK(next, "is still blocked.\n");
+		}
+	}
+	else {
+		/* becomes available */
+		mutex->owner = NULL;
+	}
+
+	unlock_fine_irqrestore(&mutex->lock, flags);
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+out:
+#endif
+	unlock_global_irqrestore(dgl_lock, flags);
+
+	return err;
+}
+
+
+void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l,
+											struct task_struct* t,
+											raw_spinlock_t* to_unlock,
+											unsigned long irqflags)
+{
+	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
+
+	// relay-style locking
+	lock_fine(&mutex->lock);
+	unlock_fine(to_unlock);
+
+	if(tsk_rt(t)->blocked_lock == l) {  // prevent race on tsk_rt(t)->blocked
+		struct task_struct *owner = mutex->owner;
+
+		struct task_struct *old_max_eff_prio;
+		struct task_struct *new_max_eff_prio;
+
+		raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+
+		old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+
+		//if((t != mutex->hp_waiter) && edf_higher_prio(t, mutex->hp_waiter)) {
+		if((t != mutex->hp_waiter) && litmus->compare(t, mutex->hp_waiter)) {
+			TRACE_TASK(t, "is new highest-prio waiter by propagation.\n");
+			mutex->hp_waiter = t;
+		}
+		if(t == mutex->hp_waiter) {
+			// reflect the decreased priority in the heap node.
+			l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
+
+			BUG_ON(!binheap_is_in_heap(&l->nest.hp_binheap_node));
+			BUG_ON(!binheap_is_in_this_heap(&l->nest.hp_binheap_node,
+											&tsk_rt(owner)->hp_blocked_tasks));
+
+			binheap_decrease(&l->nest.hp_binheap_node,
+							 &tsk_rt(owner)->hp_blocked_tasks);
+		}
+
+		new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+
+
+		if(new_max_eff_prio != old_max_eff_prio) {
+			// new_max_eff_prio > old_max_eff_prio holds.
+			if ((effective_priority(owner) == old_max_eff_prio) ||
+				//(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
+				(litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
+				TRACE_CUR("Propagating inheritance to holder of lock %d.\n",
+						  l->ident);
+
+				// beware: recursion
+				litmus->nested_increase_prio(owner, new_max_eff_prio,
+											 &mutex->lock, irqflags);  // unlocks mutex->lock
+			}
+			else {
+				TRACE_CUR("Lower priority than holder %s/%d.  No propagation.\n",
+						  owner->comm, owner->pid);
+				raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+				unlock_fine_irqrestore(&mutex->lock, irqflags);
+			}
+		}
+		else {
+			TRACE_TASK(mutex->owner, "No change in maxiumum effective priority.\n");
+			raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+			unlock_fine_irqrestore(&mutex->lock, irqflags);
+		}
+	}
+	else {
+		struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
+
+		TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
+		if(still_blocked) {
+			TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
+					   still_blocked->ident);
+			if(still_blocked->ops->propagate_increase_inheritance) {
+				/* due to relay-style nesting of spinlocks (acq. A, acq. B, free A, free B)
+				 we know that task 't' has not released any locks behind us in this
+				 chain.  Propagation just needs to catch up with task 't'. */
+				still_blocked->ops->propagate_increase_inheritance(still_blocked,
+																   t,
+																   &mutex->lock,
+																   irqflags);
+			}
+			else {
+				TRACE_TASK(t,
+						   "Inheritor is blocked on lock (%p) that does not "
+						   "support nesting!\n",
+						   still_blocked);
+				unlock_fine_irqrestore(&mutex->lock, irqflags);
+			}
+		}
+		else {
+			unlock_fine_irqrestore(&mutex->lock, irqflags);
+		}
+	}
+}
+
+
+void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l,
+											 struct task_struct* t,
+											 raw_spinlock_t* to_unlock,
+											 unsigned long irqflags)
+{
+	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
+
+	// relay-style locking
+	lock_fine(&mutex->lock);
+	unlock_fine(to_unlock);
+
+	if(tsk_rt(t)->blocked_lock == l) {  // prevent race on tsk_rt(t)->blocked
+		if(t == mutex->hp_waiter) {
+			struct task_struct *owner = mutex->owner;
+
+			struct task_struct *old_max_eff_prio;
+			struct task_struct *new_max_eff_prio;
+
+			raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+
+			old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+
+			binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
+			mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, NULL);
+			l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
+				effective_priority(mutex->hp_waiter) : NULL;
+			binheap_add(&l->nest.hp_binheap_node,
+						&tsk_rt(owner)->hp_blocked_tasks,
+						struct nested_info, hp_binheap_node);
+
+			new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
+
+			if((old_max_eff_prio != new_max_eff_prio) &&
+			   (effective_priority(owner) == old_max_eff_prio))
+			{
+				// Need to set new effective_priority for owner
+
+				struct task_struct *decreased_prio;
+
+				TRACE_CUR("Propagating decreased inheritance to holder of lock %d.\n",
+						  l->ident);
+
+				//if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) {
+				if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
+					TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of lock %d.\n",
+							  (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
+							  (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
+							  owner->comm,
+							  owner->pid,
+							  l->ident);
+
+					decreased_prio = new_max_eff_prio;
+				}
+				else {
+					TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of lock %d.\n",
+							  (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
+							  (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
+							  owner->comm,
+							  owner->pid,
+							  l->ident);
+
+					decreased_prio = NULL;
+				}
+
+				// beware: recursion
+				litmus->nested_decrease_prio(owner, decreased_prio, &mutex->lock, irqflags);	// will unlock mutex->lock
+			}
+			else {
+				raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
+				unlock_fine_irqrestore(&mutex->lock, irqflags);
+			}
+		}
+		else {
+			TRACE_TASK(t, "is not hp_waiter.  No propagation.\n");
+			unlock_fine_irqrestore(&mutex->lock, irqflags);
+		}
+	}
+	else {
+		struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
+
+		TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
+		if(still_blocked) {
+			TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
+					   still_blocked->ident);
+			if(still_blocked->ops->propagate_decrease_inheritance) {
+				/* due to linked nesting of spinlocks (acq. A, acq. B, free A, free B)
+				 we know that task 't' has not released any locks behind us in this
+				 chain.  propagation just needs to catch up with task 't' */
+				still_blocked->ops->propagate_decrease_inheritance(still_blocked,
+																   t,
+																   &mutex->lock,
+																   irqflags);
+			}
+			else {
+				TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
+						   still_blocked);
+				unlock_fine_irqrestore(&mutex->lock, irqflags);
+			}
+		}
+		else {
+			unlock_fine_irqrestore(&mutex->lock, irqflags);
+		}
+	}
+}
+
+
+int rsm_mutex_close(struct litmus_lock* l)
+{
+	struct task_struct *t = current;
+	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
+	unsigned long flags;
+
+	int owner;
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
+#endif
+
+	lock_global_irqsave(dgl_lock, flags);
+	lock_fine_irqsave(&mutex->lock, flags);
+
+	owner = (mutex->owner == t);
+
+	unlock_fine_irqrestore(&mutex->lock, flags);
+	unlock_global_irqrestore(dgl_lock, flags);
+
+	if (owner)
+		rsm_mutex_unlock(l);
+
+	return 0;
+}
+
+void rsm_mutex_free(struct litmus_lock* lock)
+{
+	kfree(rsm_mutex_from_lock(lock));
+}
+
+struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops* ops)
+{
+	struct rsm_mutex* mutex;
+
+	mutex = kmalloc(sizeof(*mutex), GFP_KERNEL);
+	if (!mutex)
+		return NULL;
+
+	mutex->litmus_lock.ops = ops;
+	mutex->owner   = NULL;
+	mutex->hp_waiter = NULL;
+	init_waitqueue_head(&mutex->wait);
+
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+	{
+		__raw_spin_lock_init(&mutex->lock,
+							 ((struct litmus_lock*)mutex)->cheat_lockdep,
+							 &((struct litmus_lock*)mutex)->key);
+	}
+#else
+	raw_spin_lock_init(&mutex->lock);
+#endif
+
+	((struct litmus_lock*)mutex)->nest.hp_waiter_ptr = &mutex->hp_waiter;
+
+	return &mutex->litmus_lock;
+}
+
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 480c62bc895b..be14dbec6ed2 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -29,7 +29,7 @@
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-
+#include <linux/uaccess.h>
 #include <linux/module.h>
 
 #include <litmus/litmus.h>
@@ -42,6 +42,16 @@
 #include <litmus/clustered.h>
 
 #include <litmus/bheap.h>
+#include <litmus/binheap.h>
+
+#ifdef CONFIG_LITMUS_LOCKING
+#include <litmus/kfmlp_lock.h>
+#endif
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+#include <litmus/rsm_lock.h>
+#include <litmus/ikglp_lock.h>
+#endif
 
 #ifdef CONFIG_SCHED_CPU_AFFINITY
 #include <litmus/affinity.h>
@@ -49,7 +59,27 @@
 
 /* to configure the cluster size */
 #include <litmus/litmus_proc.h>
-#include <linux/uaccess.h>
+
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+#include <litmus/affinity.h>
+#endif
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+#include <litmus/litmus_softirq.h>
+#endif
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+#include <linux/interrupt.h>
+#include <litmus/trace.h>
+#endif
+
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/nvidia_info.h>
+#endif
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#endif
 
 /* Reference configuration variable. Determines which cache level is used to
  * group CPUs into clusters.  GLOBAL_CLUSTER, which is the default, means that
@@ -70,7 +100,7 @@ typedef struct  {
 	struct task_struct*	linked;		/* only RT tasks */
 	struct task_struct*	scheduled;	/* only RT tasks */
 	atomic_t		will_schedule;	/* prevent unneeded IPIs */
-	struct bheap_node*	hn;
+	struct binheap_node hn;
 } cpu_entry_t;
 
 /* one cpu_entry_t per CPU */
@@ -83,6 +113,14 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
 #define test_will_schedule(cpu) \
 	(atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
 
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+struct tasklet_head
+{
+	struct tasklet_struct *head;
+	struct tasklet_struct **tail;
+};
+#endif
+
 /*
  * In C-EDF there is a cedf domain _per_ cluster
  * The number of clusters is dynamically determined accordingly to the
@@ -96,10 +134,17 @@ typedef struct clusterdomain {
 	/* map of this cluster cpus */
 	cpumask_var_t	cpu_map;
 	/* the cpus queue themselves according to priority in here */
-	struct bheap_node *heap_node;
-	struct bheap      cpu_heap;
+	struct binheap_handle cpu_heap;
 	/* lock for this cluster */
 #define cluster_lock domain.ready_lock
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+	struct tasklet_head pending_tasklets;
+#endif
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	raw_spinlock_t dgl_lock;
+#endif
 } cedf_domain_t;
 
 /* a cedf_domain per cluster; allocation is done at init/activation time */
@@ -108,6 +153,22 @@ cedf_domain_t *cedf;
 #define remote_cluster(cpu)	((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
 #define task_cpu_cluster(task)	remote_cluster(get_partition(task))
 
+/* total number of cluster */
+static int num_clusters;
+/* we do not support cluster of different sizes */
+static unsigned int cluster_size;
+
+static int clusters_allocated = 0;
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+static raw_spinlock_t* cedf_get_dgl_spinlock(struct task_struct *t)
+{
+	cedf_domain_t *cluster = task_cpu_cluster(t);
+	return(&cluster->dgl_lock);
+}
+#endif
+
+
 /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
  * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
  * information during the initialization of the plugin (e.g., topology)
@@ -115,11 +176,11 @@ cedf_domain_t *cedf;
  */
 #define VERBOSE_INIT
 
-static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
+static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
 {
-	cpu_entry_t *a, *b;
-	a = _a->value;
-	b = _b->value;
+	cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
+	cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
+
 	/* Note that a and b are inverted: we want the lowest-priority CPU at
 	 * the top of the heap.
 	 */
@@ -133,20 +194,17 @@ static void update_cpu_position(cpu_entry_t *entry)
 {
 	cedf_domain_t *cluster = entry->cluster;
 
-	if (likely(bheap_node_in_heap(entry->hn)))
-		bheap_delete(cpu_lower_prio,
-				&cluster->cpu_heap,
-				entry->hn);
+	if (likely(binheap_is_in_heap(&entry->hn))) {
+		binheap_delete(&entry->hn, &cluster->cpu_heap);
+	}
 
-	bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
+	binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn);
 }
 
 /* caller must hold cedf lock */
 static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
 {
-	struct bheap_node* hn;
-	hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
-	return hn->value;
+	return binheap_top_entry(&cluster->cpu_heap, cpu_entry_t, hn);
 }
 
 
@@ -208,7 +266,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
 }
 
 /* unlink - Make sure a task is not linked any longer to an entry
- *          where it was linked before. Must hold cedf_lock.
+ *          where it was linked before. Must hold cluster_lock.
  */
 static noinline void unlink(struct task_struct* t)
 {
@@ -244,7 +302,7 @@ static void preempt(cpu_entry_t *entry)
 }
 
 /* requeue - Put an unlinked task into gsn-edf domain.
- *           Caller must hold cedf_lock.
+ *           Caller must hold cluster_lock.
  */
 static noinline void requeue(struct task_struct* task)
 {
@@ -339,13 +397,17 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
 	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
 }
 
-/* caller holds cedf_lock */
+/* caller holds cluster_lock */
 static noinline void job_completion(struct task_struct *t, int forced)
 {
 	BUG_ON(!t);
 
 	sched_trace_task_completion(t, forced);
 
+#ifdef CONFIG_LITMUS_NVIDIA
+	atomic_set(&tsk_rt(t)->nv_int_count, 0);
+#endif
+
 	TRACE_TASK(t, "job_completion().\n");
 
 	/* set flags */
@@ -389,6 +451,314 @@ static void cedf_tick(struct task_struct* t)
 	}
 }
 
+
+
+
+
+
+
+
+
+
+
+
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+
+
+static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
+{
+	if (!atomic_read(&tasklet->count)) {
+		if(tasklet->owner) {
+			sched_trace_tasklet_begin(tasklet->owner);
+		}
+
+		if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
+		{
+			BUG();
+		}
+		TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
+			  __FUNCTION__,
+			  (tasklet->owner) ? tasklet->owner->pid : -1,
+			  (tasklet->owner) ? 0 : 1);
+		tasklet->func(tasklet->data);
+		tasklet_unlock(tasklet);
+
+		if(tasklet->owner) {
+			sched_trace_tasklet_end(tasklet->owner, flushed);
+		}
+	}
+	else {
+		BUG();
+	}
+}
+
+
+static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task)
+{
+	int work_to_do = 1;
+	struct tasklet_struct *tasklet = NULL;
+	unsigned long flags;
+
+	while(work_to_do) {
+
+		TS_NV_SCHED_BOTISR_START;
+
+		raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+
+		if(cluster->pending_tasklets.head != NULL) {
+			// remove tasklet at head.
+			struct tasklet_struct *prev = NULL;
+			tasklet = cluster->pending_tasklets.head;
+
+			// find a tasklet with prio to execute; skip ones where
+			// sched_task has a higher priority.
+			// We use the '!edf' test instead of swaping function arguments since
+			// both sched_task and owner could be NULL.  In this case, we want to
+			// still execute the tasklet.
+			while(tasklet && !edf_higher_prio(tasklet->owner, sched_task)) {
+				prev = tasklet;
+				tasklet = tasklet->next;
+			}
+
+			if(tasklet) {  // found something to execuite
+				// remove the tasklet from the queue
+				if(prev) {
+					prev->next = tasklet->next;
+					if(prev->next == NULL) {
+						TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
+						cluster->pending_tasklets.tail = &(prev);
+					}
+				}
+				else {
+					cluster->pending_tasklets.head = tasklet->next;
+					if(tasklet->next == NULL) {
+						TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
+						cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
+					}
+				}
+			}
+			else {
+				TRACE("%s: No tasklets with eligible priority.\n", __FUNCTION__);
+			}
+		}
+		else {
+			TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
+		}
+
+		raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
+
+		if(tasklet) {
+			__do_lit_tasklet(tasklet, 0ul);
+			tasklet = NULL;
+		}
+		else {
+			work_to_do = 0;
+		}
+
+		TS_NV_SCHED_BOTISR_END;
+	}
+}
+
+static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster)
+{
+	struct tasklet_struct* step;
+
+	tasklet->next = NULL;  // make sure there are no old values floating around
+
+	step = cluster->pending_tasklets.head;
+	if(step == NULL) {
+		TRACE("%s: tasklet queue empty.  inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
+		// insert at tail.
+		*(cluster->pending_tasklets.tail) = tasklet;
+		cluster->pending_tasklets.tail = &(tasklet->next);
+	}
+	else if((*(cluster->pending_tasklets.tail) != NULL) &&
+			edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
+		// insert at tail.
+		TRACE("%s: tasklet belongs at end.  inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
+
+		*(cluster->pending_tasklets.tail) = tasklet;
+		cluster->pending_tasklets.tail = &(tasklet->next);
+	}
+	else {
+
+		// insert the tasklet somewhere in the middle.
+
+        TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
+
+		while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
+			step = step->next;
+		}
+
+		// insert tasklet right before step->next.
+
+		TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__,
+			  tasklet->owner->pid,
+			  (step->owner) ?
+			  step->owner->pid :
+			  -1,
+			  (step->next) ?
+			  ((step->next->owner) ?
+			   step->next->owner->pid :
+			   -1) :
+			  -1);
+
+		tasklet->next = step->next;
+		step->next = tasklet;
+
+		// patch up the head if needed.
+		if(cluster->pending_tasklets.head == step)
+		{
+			TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
+			cluster->pending_tasklets.head = tasklet;
+		}
+	}
+}
+
+static void cedf_run_tasklets(struct task_struct* sched_task)
+{
+	cedf_domain_t* cluster;
+
+	preempt_disable();
+
+	cluster = (is_realtime(sched_task)) ?
+		task_cpu_cluster(sched_task) :
+		remote_cluster(smp_processor_id());
+
+	if(cluster && cluster->pending_tasklets.head != NULL) {
+		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
+		do_lit_tasklets(cluster, sched_task);
+	}
+
+	preempt_enable_no_resched();
+}
+
+
+
+static int cedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
+{
+#if 0
+	cedf_domain_t *cluster = NULL;
+	cpu_entry_t *targetCPU = NULL;
+	int thisCPU;
+	int runLocal = 0;
+	int runNow = 0;
+	unsigned long flags;
+
+    if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
+    {
+        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
+		return 0;
+    }
+
+	cluster = task_cpu_cluster(tasklet->owner);
+
+	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+
+	thisCPU = smp_processor_id();
+
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+	{
+		cpu_entry_t* affinity = NULL;
+
+		// use this CPU if it is in our cluster and isn't running any RT work.
+		if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) {
+			affinity = &(__get_cpu_var(cedf_cpu_entries));
+		}
+		else {
+			// this CPU is busy or shouldn't run tasklet in this cluster.
+			// look for available near by CPUs.
+			// NOTE: Affinity towards owner and not this CPU.  Is this right?
+			affinity =
+				cedf_get_nearest_available_cpu(cluster,
+								&per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner)));
+		}
+
+		targetCPU = affinity;
+	}
+#endif
+
+	if (targetCPU == NULL) {
+		targetCPU = lowest_prio_cpu(cluster);
+	}
+
+	if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
+		if (thisCPU == targetCPU->cpu) {
+			TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
+			runLocal = 1;
+			runNow = 1;
+		}
+		else {
+			TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
+			runLocal = 0;
+			runNow = 1;
+		}
+	}
+	else {
+		runLocal = 0;
+		runNow = 0;
+	}
+
+	if(!runLocal) {
+		// enqueue the tasklet
+		__add_pai_tasklet(tasklet, cluster);
+	}
+
+	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
+
+
+	if (runLocal /*&& runNow */) {  // runNow == 1 is implied
+		TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
+		__do_lit_tasklet(tasklet, 0ul);
+	}
+	else if (runNow /*&& !runLocal */) {  // runLocal == 0 is implied
+		TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
+		preempt(targetCPU);  // need to be protected by cluster_lock?
+	}
+	else {
+		TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
+	}
+#else
+	TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
+	__do_lit_tasklet(tasklet, 0ul);
+#endif
+	return(1); // success
+}
+
+static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
+										 struct task_struct *new_prio)
+{
+	struct tasklet_struct* step;
+	unsigned long flags;
+	cedf_domain_t *cluster;
+	struct task_struct *probe;
+
+	// identify the cluster by the assignment of these tasks.  one should
+	// be non-NULL.
+	probe = (old_prio) ? old_prio : new_prio;
+
+	if(probe) {
+		cluster = task_cpu_cluster(probe);
+
+		if(cluster->pending_tasklets.head != NULL) {
+			raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+			for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) {
+				if(step->owner == old_prio) {
+					TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
+					step->owner = new_prio;
+				}
+			}
+			raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
+		}
+	}
+	else {
+		TRACE("%s: Both priorities were NULL\n");
+	}
+}
+
+#endif  // PAI
+
 /* Getting schedule() right is a bit tricky. schedule() may not make any
  * assumptions on the state of the current task since it may be called for a
  * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -465,6 +835,19 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
 	if (blocks)
 		unlink(entry->scheduled);
 
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
+	if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
+		if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
+			// don't track preemptions or locking protocol suspensions.
+			TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
+			stop_gpu_tracker(entry->scheduled);
+		}
+		else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
+			TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
+		}
+	}
+#endif
+
 	/* Request a sys_exit_np() call if we would like to preempt but cannot.
 	 * We need to make sure to update the link structure anyway in case
 	 * that we are still linked. Multiple calls to request_exit_np() don't
@@ -514,7 +897,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
 	raw_spin_unlock(&cluster->cluster_lock);
 
 #ifdef WANT_ALL_SCHED_EVENTS
-	TRACE("cedf_lock released, next=0x%p\n", next);
+	TRACE("cluster_lock released, next=0x%p\n", next);
 
 	if (next)
 		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
@@ -522,7 +905,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
 		TRACE("becomes idle at %llu.\n", litmus_clock());
 #endif
 
-
 	return next;
 }
 
@@ -548,7 +930,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
 	cpu_entry_t* 		entry;
 	cedf_domain_t*		cluster;
 
-	TRACE("gsn edf: task new %d\n", t->pid);
+	TRACE("c-edf: task new %d\n", t->pid);
 
 	/* the cluster doesn't change even if t is running */
 	cluster = task_cpu_cluster(t);
@@ -586,7 +968,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
 static void cedf_task_wake_up(struct task_struct *task)
 {
 	unsigned long flags;
-	lt_t now;
+	//lt_t now;
 	cedf_domain_t *cluster;
 
 	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
@@ -594,6 +976,8 @@ static void cedf_task_wake_up(struct task_struct *task)
 	cluster = task_cpu_cluster(task);
 
 	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+
+#if 0 // sproadic task model
 	/* We need to take suspensions because of semaphores into
 	 * account! If a job resumes after being suspended due to acquiring
 	 * a semaphore, it should never be treated as a new job release.
@@ -615,7 +999,13 @@ static void cedf_task_wake_up(struct task_struct *task)
 			}
 		}
 	}
-	cedf_job_arrival(task);
+#else
+	set_rt_flags(task, RT_F_RUNNING);  // periodic model
+#endif
+
+	if(tsk_rt(task)->linked_on == NO_CPU)
+		cedf_job_arrival(task);
+
 	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
 }
 
@@ -642,6 +1032,10 @@ static void cedf_task_exit(struct task_struct * t)
 	unsigned long flags;
 	cedf_domain_t *cluster = task_cpu_cluster(t);
 
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+	cedf_change_prio_pai_tasklet(t, NULL);
+#endif
+
 	/* unlink if necessary */
 	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
 	unlink(t);
@@ -659,13 +1053,536 @@ static void cedf_task_exit(struct task_struct * t)
 
 static long cedf_admit_task(struct task_struct* tsk)
 {
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
+						edf_max_heap_base_priority_order);
+#endif
+
 	return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
 }
 
-/* total number of cluster */
-static int num_clusters;
-/* we do not support cluster of different sizes */
-static unsigned int cluster_size;
+
+
+#ifdef CONFIG_LITMUS_LOCKING
+
+#include <litmus/fdso.h>
+
+
+
+/* called with IRQs off */
+static void __increase_priority_inheritance(struct task_struct* t,
+										    struct task_struct* prio_inh)
+{
+	int linked_on;
+	int check_preempt = 0;
+
+	cedf_domain_t* cluster = task_cpu_cluster(t);
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	/* this sanity check allows for weaker locking in protocols */
+	/* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
+	if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
+#endif
+		TRACE_TASK(t, "inherits priority from %s/%d\n",
+				   prio_inh->comm, prio_inh->pid);
+		tsk_rt(t)->inh_task = prio_inh;
+
+		linked_on  = tsk_rt(t)->linked_on;
+
+		/* If it is scheduled, then we need to reorder the CPU heap. */
+		if (linked_on != NO_CPU) {
+			TRACE_TASK(t, "%s: linked  on %d\n",
+					   __FUNCTION__, linked_on);
+			/* Holder is scheduled; need to re-order CPUs.
+			 * We can't use heap_decrease() here since
+			 * the cpu_heap is ordered in reverse direction, so
+			 * it is actually an increase. */
+			binheap_delete(&per_cpu(cedf_cpu_entries, linked_on).hn,
+						   &cluster->cpu_heap);
+			binheap_add(&per_cpu(cedf_cpu_entries, linked_on).hn,
+						&cluster->cpu_heap, cpu_entry_t, hn);
+
+		} else {
+			/* holder may be queued: first stop queue changes */
+			raw_spin_lock(&cluster->domain.release_lock);
+			if (is_queued(t)) {
+				TRACE_TASK(t, "%s: is queued\n",
+						   __FUNCTION__);
+				/* We need to update the position of holder in some
+				 * heap. Note that this could be a release heap if we
+				 * budget enforcement is used and this job overran. */
+				check_preempt =
+					!bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
+			} else {
+				/* Nothing to do: if it is not queued and not linked
+				 * then it is either sleeping or currently being moved
+				 * by other code (e.g., a timer interrupt handler) that
+				 * will use the correct priority when enqueuing the
+				 * task. */
+				TRACE_TASK(t, "%s: is NOT queued => Done.\n",
+						   __FUNCTION__);
+			}
+			raw_spin_unlock(&cluster->domain.release_lock);
+
+			/* If holder was enqueued in a release heap, then the following
+			 * preemption check is pointless, but we can't easily detect
+			 * that case. If you want to fix this, then consider that
+			 * simply adding a state flag requires O(n) time to update when
+			 * releasing n tasks, which conflicts with the goal to have
+			 * O(log n) merges. */
+			if (check_preempt) {
+				/* heap_decrease() hit the top level of the heap: make
+				 * sure preemption checks get the right task, not the
+				 * potentially stale cache. */
+				bheap_uncache_min(edf_ready_order,
+								  &cluster->domain.ready_queue);
+				check_for_preemptions(cluster);
+			}
+		}
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	}
+	else {
+		TRACE_TASK(t, "Spurious invalid priority increase. "
+				   "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
+				   "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
+				   t->comm, t->pid,
+				   effective_priority(t)->comm, effective_priority(t)->pid,
+				   (prio_inh) ? prio_inh->comm : "nil",
+				   (prio_inh) ? prio_inh->pid : -1);
+		WARN_ON(!prio_inh);
+	}
+#endif
+}
+
+/* called with IRQs off */
+static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
+{
+	cedf_domain_t* cluster = task_cpu_cluster(t);
+
+	raw_spin_lock(&cluster->cluster_lock);
+
+	__increase_priority_inheritance(t, prio_inh);
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	if(tsk_rt(t)->cur_klitirqd != NULL)
+	{
+		TRACE_TASK(t, "%s/%d inherits a new priority!\n",
+				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
+
+		__increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
+	}
+#endif
+
+	raw_spin_unlock(&cluster->cluster_lock);
+
+#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
+	if(tsk_rt(t)->held_gpus) {
+		int i;
+		for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
+			i < NV_DEVICE_NUM;
+			i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
+			pai_check_priority_increase(t, i);
+		}
+	}
+#endif
+}
+
+/* called with IRQs off */
+static void __decrease_priority_inheritance(struct task_struct* t,
+											struct task_struct* prio_inh)
+{
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
+#endif
+		/* A job only stops inheriting a priority when it releases a
+		 * resource. Thus we can make the following assumption.*/
+		if(prio_inh)
+			TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
+					   prio_inh->comm, prio_inh->pid);
+		else
+			TRACE_TASK(t, "base priority restored.\n");
+
+		tsk_rt(t)->inh_task = prio_inh;
+
+		if(tsk_rt(t)->scheduled_on != NO_CPU) {
+			TRACE_TASK(t, "is scheduled.\n");
+
+			/* Check if rescheduling is necessary. We can't use heap_decrease()
+			 * since the priority was effectively lowered. */
+			unlink(t);
+			cedf_job_arrival(t);
+		}
+		else {
+			cedf_domain_t* cluster = task_cpu_cluster(t);
+			/* task is queued */
+			raw_spin_lock(&cluster->domain.release_lock);
+			if (is_queued(t)) {
+				TRACE_TASK(t, "is queued.\n");
+
+				/* decrease in priority, so we have to re-add to binomial heap */
+				unlink(t);
+				cedf_job_arrival(t);
+			}
+			else {
+				TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
+			}
+			raw_spin_unlock(&cluster->domain.release_lock);
+		}
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	}
+	else {
+		TRACE_TASK(t, "Spurious invalid priority decrease. "
+				   "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
+				   "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
+				   t->comm, t->pid,
+				   effective_priority(t)->comm, effective_priority(t)->pid,
+				   (prio_inh) ? prio_inh->comm : "nil",
+				   (prio_inh) ? prio_inh->pid : -1);
+	}
+#endif
+}
+
+static void decrease_priority_inheritance(struct task_struct* t,
+										struct task_struct* prio_inh)
+{
+	cedf_domain_t* cluster = task_cpu_cluster(t);
+
+	raw_spin_lock(&cluster->cluster_lock);
+	__decrease_priority_inheritance(t, prio_inh);
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	if(tsk_rt(t)->cur_klitirqd != NULL)
+	{
+		TRACE_TASK(t, "%s/%d decreases in priority!\n",
+				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
+
+		__decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
+	}
+#endif
+
+	raw_spin_unlock(&cluster->cluster_lock);
+
+#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
+	if(tsk_rt(t)->held_gpus) {
+		int i;
+		for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
+			i < NV_DEVICE_NUM;
+			i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
+			pai_check_priority_decrease(t, i);
+		}
+	}
+#endif
+}
+
+
+
+
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+/* called with IRQs off */
+static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
+											  struct task_struct* old_owner,
+											  struct task_struct* new_owner)
+{
+	cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
+
+	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
+
+	raw_spin_lock(&cluster->cluster_lock);
+
+	if(old_owner != new_owner)
+	{
+		if(old_owner)
+		{
+			// unreachable?
+			tsk_rt(old_owner)->cur_klitirqd = NULL;
+		}
+
+		TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
+				   new_owner->comm, new_owner->pid);
+
+		tsk_rt(new_owner)->cur_klitirqd = klitirqd;
+	}
+
+	__decrease_priority_inheritance(klitirqd, NULL);  // kludge to clear out cur prio.
+
+	__increase_priority_inheritance(klitirqd,
+			(tsk_rt(new_owner)->inh_task == NULL) ?
+				new_owner :
+				tsk_rt(new_owner)->inh_task);
+
+	raw_spin_unlock(&cluster->cluster_lock);
+}
+
+
+/* called with IRQs off */
+static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
+												   struct task_struct* old_owner,
+												   struct task_struct* new_owner)
+{
+	cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
+
+	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
+
+	raw_spin_lock(&cluster->cluster_lock);
+
+    TRACE_TASK(klitirqd, "priority restored\n");
+
+	__decrease_priority_inheritance(klitirqd, new_owner);
+
+	tsk_rt(old_owner)->cur_klitirqd = NULL;
+
+	raw_spin_unlock(&cluster->cluster_lock);
+}
+#endif // CONFIG_LITMUS_SOFTIRQD
+
+
+
+
+
+
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+
+/* called with IRQs off */
+/* preconditions:
+ (1) The 'hp_blocked_tasks_lock' of task 't' is held.
+ (2) The lock 'to_unlock' is held.
+ */
+static void nested_increase_priority_inheritance(struct task_struct* t,
+												 struct task_struct* prio_inh,
+												 raw_spinlock_t *to_unlock,
+												 unsigned long irqflags)
+{
+	struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
+
+	if(tsk_rt(t)->inh_task != prio_inh) { 		// shield redundent calls.
+		increase_priority_inheritance(t, prio_inh);  // increase our prio.
+	}
+
+	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);  // unlock the t's heap.
+
+
+	if(blocked_lock) {
+		if(blocked_lock->ops->propagate_increase_inheritance) {
+			TRACE_TASK(t, "Inheritor is blocked (...perhaps).  Checking lock %d.\n",
+					   blocked_lock->ident);
+
+			// beware: recursion
+			blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
+															  t, to_unlock,
+															  irqflags);
+		}
+		else {
+			TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
+					   blocked_lock->ident);
+			unlock_fine_irqrestore(to_unlock, irqflags);
+		}
+	}
+	else {
+		TRACE_TASK(t, "is not blocked.  No propagation.\n");
+		unlock_fine_irqrestore(to_unlock, irqflags);
+	}
+}
+
+/* called with IRQs off */
+/* preconditions:
+ (1) The 'hp_blocked_tasks_lock' of task 't' is held.
+ (2) The lock 'to_unlock' is held.
+ */
+static void nested_decrease_priority_inheritance(struct task_struct* t,
+												 struct task_struct* prio_inh,
+												 raw_spinlock_t *to_unlock,
+												 unsigned long irqflags)
+{
+	struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
+	decrease_priority_inheritance(t, prio_inh);
+
+	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);  // unlock the t's heap.
+
+	if(blocked_lock) {
+		if(blocked_lock->ops->propagate_decrease_inheritance) {
+			TRACE_TASK(t, "Inheritor is blocked (...perhaps).  Checking lock %d.\n",
+					   blocked_lock->ident);
+
+			// beware: recursion
+			blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
+															  to_unlock,
+															  irqflags);
+		}
+		else {
+			TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
+					   blocked_lock);
+			unlock_fine_irqrestore(to_unlock, irqflags);
+		}
+	}
+	else {
+		TRACE_TASK(t, "is not blocked.  No propagation.\n");
+		unlock_fine_irqrestore(to_unlock, irqflags);
+	}
+}
+
+
+/* ******************** RSM MUTEX ********************** */
+
+static struct litmus_lock_ops cedf_rsm_mutex_lock_ops = {
+	.lock   = rsm_mutex_lock,
+	.unlock = rsm_mutex_unlock,
+	.close  = rsm_mutex_close,
+	.deallocate = rsm_mutex_free,
+
+	.propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
+	.propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	.dgl_lock = rsm_mutex_dgl_lock,
+	.is_owner = rsm_mutex_is_owner,
+	.enable_priority = rsm_mutex_enable_priority,
+#endif
+};
+
+static struct litmus_lock* cedf_new_rsm_mutex(void)
+{
+	return rsm_mutex_new(&cedf_rsm_mutex_lock_ops);
+}
+
+/* ******************** IKGLP ********************** */
+
+static struct litmus_lock_ops cedf_ikglp_lock_ops = {
+	.lock   = ikglp_lock,
+	.unlock = ikglp_unlock,
+	.close  = ikglp_close,
+	.deallocate = ikglp_free,
+
+	// ikglp can only be an outer-most lock.
+	.propagate_increase_inheritance = NULL,
+	.propagate_decrease_inheritance = NULL,
+};
+
+static struct litmus_lock* cedf_new_ikglp(void* __user arg)
+{
+	// assumes clusters of uniform size.
+	return ikglp_new(cluster_size/num_clusters, &cedf_ikglp_lock_ops, arg);
+}
+
+#endif  /* CONFIG_LITMUS_NESTED_LOCKING */
+
+
+
+
+/* ******************** KFMLP support ********************** */
+
+static struct litmus_lock_ops cedf_kfmlp_lock_ops = {
+	.lock   = kfmlp_lock,
+	.unlock = kfmlp_unlock,
+	.close  = kfmlp_close,
+	.deallocate = kfmlp_free,
+
+	// kfmlp can only be an outer-most lock.
+	.propagate_increase_inheritance = NULL,
+	.propagate_decrease_inheritance = NULL,
+};
+
+
+static struct litmus_lock* cedf_new_kfmlp(void* __user arg)
+{
+	return kfmlp_new(&cedf_kfmlp_lock_ops, arg);
+}
+
+
+/* **** lock constructor **** */
+
+static long cedf_allocate_lock(struct litmus_lock **lock, int type,
+								 void* __user args)
+{
+	int err;
+
+	switch (type) {
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+		case RSM_MUTEX:
+			*lock = cedf_new_rsm_mutex();
+			break;
+
+		case IKGLP_SEM:
+			*lock = cedf_new_ikglp(args);
+			break;
+#endif
+		case KFMLP_SEM:
+			*lock = cedf_new_kfmlp(args);
+			break;
+
+		default:
+			err = -ENXIO;
+			goto UNSUPPORTED_LOCK;
+	};
+
+	if (*lock)
+		err = 0;
+	else
+		err = -ENOMEM;
+
+UNSUPPORTED_LOCK:
+	return err;
+}
+
+#endif  // CONFIG_LITMUS_LOCKING
+
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+static struct affinity_observer_ops cedf_kfmlp_affinity_ops = {
+	.close = kfmlp_aff_obs_close,
+	.deallocate = kfmlp_aff_obs_free,
+};
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+static struct affinity_observer_ops cedf_ikglp_affinity_ops = {
+	.close = ikglp_aff_obs_close,
+	.deallocate = ikglp_aff_obs_free,
+};
+#endif
+
+static long cedf_allocate_affinity_observer(struct affinity_observer **aff_obs,
+											int type,
+											void* __user args)
+{
+	int err;
+
+	switch (type) {
+
+		case KFMLP_SIMPLE_GPU_AFF_OBS:
+			*aff_obs = kfmlp_simple_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
+			break;
+
+		case KFMLP_GPU_AFF_OBS:
+			*aff_obs = kfmlp_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
+			break;
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+		case IKGLP_SIMPLE_GPU_AFF_OBS:
+			*aff_obs = ikglp_simple_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
+			break;
+
+		case IKGLP_GPU_AFF_OBS:
+			*aff_obs = ikglp_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
+			break;
+#endif
+		default:
+			err = -ENXIO;
+			goto UNSUPPORTED_AFF_OBS;
+	};
+
+	if (*aff_obs)
+		err = 0;
+	else
+		err = -ENOMEM;
+
+UNSUPPORTED_AFF_OBS:
+	return err;
+}
+#endif
+
+
+
 
 #ifdef VERBOSE_INIT
 static void print_cluster_topology(cpumask_var_t mask, int cpu)
@@ -680,16 +1597,17 @@ static void print_cluster_topology(cpumask_var_t mask, int cpu)
 }
 #endif
 
-static int clusters_allocated = 0;
-
 static void cleanup_cedf(void)
 {
 	int i;
 
+#ifdef CONFIG_LITMUS_NVIDIA
+	shutdown_nvidia_info();
+#endif
+
 	if (clusters_allocated) {
 		for (i = 0; i < num_clusters; i++) {
 			kfree(cedf[i].cpus);
-			kfree(cedf[i].heap_node);
 			free_cpumask_var(cedf[i].cpu_map);
 		}
 
@@ -749,12 +1667,16 @@ static long cedf_activate_plugin(void)
 
 		cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
 				GFP_ATOMIC);
-		cedf[i].heap_node = kmalloc(
-				cluster_size * sizeof(struct bheap_node),
-				GFP_ATOMIC);
-		bheap_init(&(cedf[i].cpu_heap));
+		INIT_BINHEAP_HANDLE(&(cedf[i].cpu_heap), cpu_lower_prio);
 		edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
 
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+		cedf[i].pending_tasklets.head = NULL;
+		cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head);
+#endif
+
+
 		if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
 			return -ENOMEM;
 #ifdef CONFIG_RELEASE_MASTER
@@ -765,6 +1687,10 @@ static long cedf_activate_plugin(void)
 	/* cycle through cluster and add cpus to them */
 	for (i = 0; i < num_clusters; i++) {
 
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+		raw_spin_lock_init(&cedf[i].dgl_lock);
+#endif
+
 		for_each_online_cpu(cpu) {
 			/* check if the cpu is already in a cluster */
 			for (j = 0; j < num_clusters; j++)
@@ -795,8 +1721,8 @@ static long cedf_activate_plugin(void)
 				atomic_set(&entry->will_schedule, 0);
 				entry->cpu = ccpu;
 				entry->cluster = &cedf[i];
-				entry->hn = &(cedf[i].heap_node[cpu_count]);
-				bheap_node_init(&entry->hn, entry);
+
+				INIT_BINHEAP_NODE(&entry->hn);
 
 				cpu_count++;
 
@@ -813,6 +1739,40 @@ static long cedf_activate_plugin(void)
 		}
 	}
 
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	{
+		/* distribute the daemons evenly across the clusters. */
+		int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
+		int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
+		int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
+
+		int daemon = 0;
+		for(i = 0; i < num_clusters; ++i)
+		{
+			int num_on_this_cluster = num_daemons_per_cluster;
+			if(left_over)
+			{
+				++num_on_this_cluster;
+				--left_over;
+			}
+
+			for(j = 0; j < num_on_this_cluster; ++j)
+			{
+				// first CPU of this cluster
+				affinity[daemon++] = i*cluster_size;
+			}
+		}
+
+		spawn_klitirqd(affinity);
+
+		kfree(affinity);
+	}
+#endif
+
+#ifdef CONFIG_LITMUS_NVIDIA
+	init_nvidia_info();
+#endif
+
 	free_cpumask_var(mask);
 	clusters_allocated = 1;
 	return 0;
@@ -831,6 +1791,32 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
 	.task_block		= cedf_task_block,
 	.admit_task		= cedf_admit_task,
 	.activate_plugin	= cedf_activate_plugin,
+	.compare		= edf_higher_prio,
+#ifdef CONFIG_LITMUS_LOCKING
+	.allocate_lock		= cedf_allocate_lock,
+	.increase_prio		= increase_priority_inheritance,
+	.decrease_prio		= decrease_priority_inheritance,
+#endif
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	.nested_increase_prio		= nested_increase_priority_inheritance,
+	.nested_decrease_prio		= nested_decrease_priority_inheritance,
+	.__compare					= __edf_higher_prio,
+#endif
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	.get_dgl_spinlock = cedf_get_dgl_spinlock,
+#endif
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	.allocate_aff_obs = cedf_allocate_affinity_observer,
+#endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	.increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
+	.decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
+#endif
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+	.enqueue_pai_tasklet = cedf_enqueue_pai_tasklet,
+	.change_prio_pai_tasklet = cedf_change_prio_pai_tasklet,
+	.run_tasklets = cedf_run_tasklets,
+#endif
 };
 
 static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 6ed504f4750e..8c48757fa86c 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -12,23 +12,49 @@
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
 
 #include <litmus/litmus.h>
 #include <litmus/jobs.h>
 #include <litmus/sched_plugin.h>
 #include <litmus/edf_common.h>
 #include <litmus/sched_trace.h>
-#include <litmus/trace.h>
 
 #include <litmus/preempt.h>
 
 #include <litmus/bheap.h>
+#include <litmus/binheap.h>
+
+#ifdef CONFIG_LITMUS_LOCKING
+#include <litmus/kfmlp_lock.h>
+#endif
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+#include <litmus/rsm_lock.h>
+#include <litmus/ikglp_lock.h>
+#endif
 
 #ifdef CONFIG_SCHED_CPU_AFFINITY
 #include <litmus/affinity.h>
 #endif
 
-#include <linux/module.h>
+#ifdef CONFIG_LITMUS_SOFTIRQD
+#include <litmus/litmus_softirq.h>
+#endif
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+#include <linux/interrupt.h>
+#include <litmus/trace.h>
+#endif
+
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/nvidia_info.h>
+#endif
+
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#endif
 
 /* Overview of GSN-EDF operations.
  *
@@ -103,52 +129,70 @@ typedef struct  {
 	int 			cpu;
 	struct task_struct*	linked;		/* only RT tasks */
 	struct task_struct*	scheduled;	/* only RT tasks */
-	struct bheap_node*	hn;
+	struct binheap_node hn;
 } cpu_entry_t;
 DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
 
 cpu_entry_t* gsnedf_cpus[NR_CPUS];
 
 /* the cpus queue themselves according to priority in here */
-static struct bheap_node gsnedf_heap_node[NR_CPUS];
-static struct bheap      gsnedf_cpu_heap;
+static struct binheap_handle gsnedf_cpu_heap;
 
 static rt_domain_t gsnedf;
 #define gsnedf_lock (gsnedf.ready_lock)
 
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+static raw_spinlock_t dgl_lock;
+
+static raw_spinlock_t* gsnedf_get_dgl_spinlock(struct task_struct *t)
+{
+	return(&dgl_lock);
+}
+#endif
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+struct tasklet_head
+{
+	struct tasklet_struct *head;
+	struct tasklet_struct **tail;
+};
+
+struct tasklet_head gsnedf_pending_tasklets;
+#endif
+
 
 /* Uncomment this if you want to see all scheduling decisions in the
  * TRACE() log.
 #define WANT_ALL_SCHED_EVENTS
  */
 
-static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
+static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
 {
-	cpu_entry_t *a, *b;
-	a = _a->value;
-	b = _b->value;
+	cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
+	cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
+
 	/* Note that a and b are inverted: we want the lowest-priority CPU at
 	 * the top of the heap.
 	 */
 	return edf_higher_prio(b->linked, a->linked);
 }
 
+
 /* update_cpu_position - Move the cpu entry to the correct place to maintain
  *                       order in the cpu queue. Caller must hold gsnedf lock.
  */
 static void update_cpu_position(cpu_entry_t *entry)
 {
-	if (likely(bheap_node_in_heap(entry->hn)))
-		bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
-	bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
+	if (likely(binheap_is_in_heap(&entry->hn))) {
+		binheap_delete(&entry->hn, &gsnedf_cpu_heap);
+	}
+	binheap_add(&entry->hn, &gsnedf_cpu_heap, cpu_entry_t, hn);
 }
 
 /* caller must hold gsnedf lock */
 static cpu_entry_t* lowest_prio_cpu(void)
 {
-	struct bheap_node* hn;
-	hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
-	return hn->value;
+	return binheap_top_entry(&gsnedf_cpu_heap, cpu_entry_t, hn);
 }
 
 
@@ -337,6 +381,10 @@ static noinline void job_completion(struct task_struct *t, int forced)
 
 	sched_trace_task_completion(t, forced);
 
+#ifdef CONFIG_LITMUS_NVIDIA
+	atomic_set(&tsk_rt(t)->nv_int_count, 0);
+#endif
+
 	TRACE_TASK(t, "job_completion().\n");
 
 	/* set flags */
@@ -379,6 +427,318 @@ static void gsnedf_tick(struct task_struct* t)
 	}
 }
 
+
+
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+
+
+static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
+{
+	if (!atomic_read(&tasklet->count)) {
+		if(tasklet->owner) {
+			sched_trace_tasklet_begin(tasklet->owner);
+		}
+
+		if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
+		{
+			BUG();
+		}
+		TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
+			  __FUNCTION__,
+			  (tasklet->owner) ? tasklet->owner->pid : -1,
+			  (tasklet->owner) ? 0 : 1);
+		tasklet->func(tasklet->data);
+		tasklet_unlock(tasklet);
+
+		if(tasklet->owner) {
+			sched_trace_tasklet_end(tasklet->owner, flushed);
+		}
+	}
+	else {
+		BUG();
+	}
+}
+
+static void do_lit_tasklets(struct task_struct* sched_task)
+{
+	int work_to_do = 1;
+	struct tasklet_struct *tasklet = NULL;
+	unsigned long flags;
+
+	while(work_to_do) {
+
+		TS_NV_SCHED_BOTISR_START;
+
+		// execute one tasklet that has higher priority
+		raw_spin_lock_irqsave(&gsnedf_lock, flags);
+
+		if(gsnedf_pending_tasklets.head != NULL) {
+			struct tasklet_struct *prev = NULL;
+			tasklet = gsnedf_pending_tasklets.head;
+
+			while(tasklet && edf_higher_prio(sched_task, tasklet->owner)) {
+				prev = tasklet;
+				tasklet = tasklet->next;
+			}
+
+			// remove the tasklet from the queue
+			if(prev) {
+				prev->next = tasklet->next;
+				if(prev->next == NULL) {
+					TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
+					gsnedf_pending_tasklets.tail = &(prev);
+				}
+			}
+			else {
+				gsnedf_pending_tasklets.head = tasklet->next;
+				if(tasklet->next == NULL) {
+					TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
+					gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
+				}
+			}
+		}
+		else {
+			TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
+		}
+
+		raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
+
+		if(tasklet) {
+			__do_lit_tasklet(tasklet, 0ul);
+			tasklet = NULL;
+		}
+		else {
+			work_to_do = 0;
+		}
+
+		TS_NV_SCHED_BOTISR_END;
+	}
+}
+
+//static void do_lit_tasklets(struct task_struct* sched_task)
+//{
+//	int work_to_do = 1;
+//	struct tasklet_struct *tasklet = NULL;
+//	//struct tasklet_struct *step;
+//	unsigned long flags;
+//
+//	while(work_to_do) {
+//
+//		TS_NV_SCHED_BOTISR_START;
+//
+//		// remove tasklet at head of list if it has higher priority.
+//		raw_spin_lock_irqsave(&gsnedf_lock, flags);
+//
+//		if(gsnedf_pending_tasklets.head != NULL) {
+//			// remove tasklet at head.
+//			tasklet = gsnedf_pending_tasklets.head;
+//
+//			if(edf_higher_prio(tasklet->owner, sched_task)) {
+//
+//				if(NULL == tasklet->next) {
+//					// tasklet is at the head, list only has one element
+//					TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
+//					gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
+//				}
+//
+//				// remove the tasklet from the queue
+//				gsnedf_pending_tasklets.head = tasklet->next;
+//
+//				TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
+//			}
+//			else {
+//				TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
+//				tasklet = NULL;
+//			}
+//		}
+//		else {
+//			TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
+//		}
+//
+//		raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
+//
+//		TS_NV_SCHED_BOTISR_END;
+//
+//		if(tasklet) {
+//			__do_lit_tasklet(tasklet, 0ul);
+//			tasklet = NULL;
+//		}
+//		else {
+//			work_to_do = 0;
+//		}
+//	}
+//
+//	//TRACE("%s: exited.\n", __FUNCTION__);
+//}
+
+static void __add_pai_tasklet(struct tasklet_struct* tasklet)
+{
+	struct tasklet_struct* step;
+
+	tasklet->next = NULL;  // make sure there are no old values floating around
+
+	step = gsnedf_pending_tasklets.head;
+	if(step == NULL) {
+		TRACE("%s: tasklet queue empty.  inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
+		// insert at tail.
+		*(gsnedf_pending_tasklets.tail) = tasklet;
+		gsnedf_pending_tasklets.tail = &(tasklet->next);
+	}
+	else if((*(gsnedf_pending_tasklets.tail) != NULL) &&
+			edf_higher_prio((*(gsnedf_pending_tasklets.tail))->owner, tasklet->owner)) {
+		// insert at tail.
+		TRACE("%s: tasklet belongs at end.  inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
+
+		*(gsnedf_pending_tasklets.tail) = tasklet;
+		gsnedf_pending_tasklets.tail = &(tasklet->next);
+	}
+	else {
+		// insert the tasklet somewhere in the middle.
+
+        TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
+
+		while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
+			step = step->next;
+		}
+
+		// insert tasklet right before step->next.
+
+		TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
+
+		tasklet->next = step->next;
+		step->next = tasklet;
+
+		// patch up the head if needed.
+		if(gsnedf_pending_tasklets.head == step)
+		{
+			TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
+			gsnedf_pending_tasklets.head = tasklet;
+		}
+	}
+}
+
+static void gsnedf_run_tasklets(struct task_struct* sched_task)
+{
+	preempt_disable();
+
+	if(gsnedf_pending_tasklets.head != NULL) {
+		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
+		do_lit_tasklets(sched_task);
+	}
+
+	preempt_enable_no_resched();
+}
+
+static int gsnedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
+{
+	cpu_entry_t *targetCPU = NULL;
+	int thisCPU;
+	int runLocal = 0;
+	int runNow = 0;
+	unsigned long flags;
+
+    if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
+    {
+        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
+		return 0;
+    }
+
+
+	raw_spin_lock_irqsave(&gsnedf_lock, flags);
+
+	thisCPU = smp_processor_id();
+
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+	{
+		cpu_entry_t* affinity = NULL;
+
+		// use this CPU if it is in our cluster and isn't running any RT work.
+		if(
+#ifdef CONFIG_RELEASE_MASTER
+		   (thisCPU != gsnedf.release_master) &&
+#endif
+		   (__get_cpu_var(gsnedf_cpu_entries).linked == NULL)) {
+			affinity = &(__get_cpu_var(gsnedf_cpu_entries));
+		}
+		else {
+			// this CPU is busy or shouldn't run tasklet in this cluster.
+			// look for available near by CPUs.
+			// NOTE: Affinity towards owner and not this CPU.  Is this right?
+			affinity =
+				gsnedf_get_nearest_available_cpu(
+					&per_cpu(gsnedf_cpu_entries, task_cpu(tasklet->owner)));
+		}
+
+		targetCPU = affinity;
+	}
+#endif
+
+	if (targetCPU == NULL) {
+		targetCPU = lowest_prio_cpu();
+	}
+
+	if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
+		if (thisCPU == targetCPU->cpu) {
+			TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
+			runLocal = 1;
+			runNow = 1;
+		}
+		else {
+			TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
+			runLocal = 0;
+			runNow = 1;
+		}
+	}
+	else {
+		runLocal = 0;
+		runNow = 0;
+	}
+
+	if(!runLocal) {
+		// enqueue the tasklet
+		__add_pai_tasklet(tasklet);
+	}
+
+	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
+
+
+	if (runLocal /*&& runNow */) {  // runNow == 1 is implied
+		TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
+		__do_lit_tasklet(tasklet, 0ul);
+	}
+	else if (runNow /*&& !runLocal */) {  // runLocal == 0 is implied
+		TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
+		preempt(targetCPU);  // need to be protected by cedf_lock?
+	}
+	else {
+		TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
+	}
+
+	return(1); // success
+}
+
+static void gsnedf_change_prio_pai_tasklet(struct task_struct *old_prio,
+										   struct task_struct *new_prio)
+{
+	struct tasklet_struct* step;
+	unsigned long flags;
+
+	if(gsnedf_pending_tasklets.head != NULL) {
+		raw_spin_lock_irqsave(&gsnedf_lock, flags);
+		for(step = gsnedf_pending_tasklets.head; step != NULL; step = step->next) {
+			if(step->owner == old_prio) {
+				TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
+				step->owner = new_prio;
+			}
+		}
+		raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
+	}
+}
+
+#endif  // end PAI
+
+
 /* Getting schedule() right is a bit tricky. schedule() may not make any
  * assumptions on the state of the current task since it may be called for a
  * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -437,21 +797,32 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 	TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
 #endif
 
+	/*
 	if (exists)
 		TRACE_TASK(prev,
 			   "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
 			   "state:%d sig:%d\n",
 			   blocks, out_of_time, np, sleep, preempt,
 			   prev->state, signal_pending(prev));
+	 */
+
 	if (entry->linked && preempt)
 		TRACE_TASK(prev, "will be preempted by %s/%d\n",
 			   entry->linked->comm, entry->linked->pid);
 
-
 	/* If a task blocks we have no choice but to reschedule.
 	 */
-	if (blocks)
+	if (blocks) {
 		unlink(entry->scheduled);
+	}
+
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
+	if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
+		if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
+			stop_gpu_tracker(entry->scheduled);
+		}
+	}
+#endif
 
 	/* Request a sys_exit_np() call if we would like to preempt but cannot.
 	 * We need to make sure to update the link structure anyway in case
@@ -492,12 +863,15 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 			entry->scheduled->rt_param.scheduled_on = NO_CPU;
 			TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
 		}
-	} else
+	}
+	else
+	{
 		/* Only override Linux scheduler if we have a real-time task
 		 * scheduled that needs to continue.
 		 */
 		if (exists)
 			next = prev;
+	}
 
 	sched_state_task_picked();
 
@@ -524,6 +898,7 @@ static void gsnedf_finish_switch(struct task_struct *prev)
 	cpu_entry_t* 	entry = &__get_cpu_var(gsnedf_cpu_entries);
 
 	entry->scheduled = is_realtime(current) ? current : NULL;
+
 #ifdef WANT_ALL_SCHED_EVENTS
 	TRACE_TASK(prev, "switched away from\n");
 #endif
@@ -572,11 +947,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
 static void gsnedf_task_wake_up(struct task_struct *task)
 {
 	unsigned long flags;
-	lt_t now;
+	//lt_t now;
 
 	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
 
 	raw_spin_lock_irqsave(&gsnedf_lock, flags);
+
+
+#if 0  // sporadic task model
 	/* We need to take suspensions because of semaphores into
 	 * account! If a job resumes after being suspended due to acquiring
 	 * a semaphore, it should never be treated as a new job release.
@@ -598,19 +976,26 @@ static void gsnedf_task_wake_up(struct task_struct *task)
 			}
 		}
 	}
+#else  // periodic task model
+	set_rt_flags(task, RT_F_RUNNING);
+#endif
+
 	gsnedf_job_arrival(task);
 	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
 }
 
 static void gsnedf_task_block(struct task_struct *t)
 {
+	// TODO: is this called on preemption??
 	unsigned long flags;
 
 	TRACE_TASK(t, "block at %llu\n", litmus_clock());
 
 	/* unlink if necessary */
 	raw_spin_lock_irqsave(&gsnedf_lock, flags);
+
 	unlink(t);
+
 	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
 
 	BUG_ON(!is_realtime(t));
@@ -621,6 +1006,10 @@ static void gsnedf_task_exit(struct task_struct * t)
 {
 	unsigned long flags;
 
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+	gsnedf_change_prio_pai_tasklet(t, NULL);
+#endif
+
 	/* unlink if necessary */
 	raw_spin_lock_irqsave(&gsnedf_lock, flags);
 	unlink(t);
@@ -637,101 +1026,423 @@ static void gsnedf_task_exit(struct task_struct * t)
 
 static long gsnedf_admit_task(struct task_struct* tsk)
 {
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
+						edf_max_heap_base_priority_order);
+#endif
+
 	return 0;
 }
 
+
+
+
+
+
 #ifdef CONFIG_LITMUS_LOCKING
 
 #include <litmus/fdso.h>
 
 /* called with IRQs off */
-static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
+static void __increase_priority_inheritance(struct task_struct* t,
+										    struct task_struct* prio_inh)
 {
 	int linked_on;
 	int check_preempt = 0;
 
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	/* this sanity check allows for weaker locking in protocols */
+	/* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
+	if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
+#endif
+		TRACE_TASK(t, "inherits priority from %s/%d\n",
+				   prio_inh->comm, prio_inh->pid);
+		tsk_rt(t)->inh_task = prio_inh;
+
+		linked_on  = tsk_rt(t)->linked_on;
+
+		/* If it is scheduled, then we need to reorder the CPU heap. */
+		if (linked_on != NO_CPU) {
+			TRACE_TASK(t, "%s: linked  on %d\n",
+				   __FUNCTION__, linked_on);
+			/* Holder is scheduled; need to re-order CPUs.
+			 * We can't use heap_decrease() here since
+			 * the cpu_heap is ordered in reverse direction, so
+			 * it is actually an increase. */
+			binheap_delete(&gsnedf_cpus[linked_on]->hn, &gsnedf_cpu_heap);
+			binheap_add(&gsnedf_cpus[linked_on]->hn,
+					&gsnedf_cpu_heap, cpu_entry_t, hn);
+		} else {
+			/* holder may be queued: first stop queue changes */
+			raw_spin_lock(&gsnedf.release_lock);
+			if (is_queued(t)) {
+				TRACE_TASK(t, "%s: is queued\n",
+					   __FUNCTION__);
+				/* We need to update the position of holder in some
+				 * heap. Note that this could be a release heap if we
+				 * budget enforcement is used and this job overran. */
+				check_preempt =
+					!bheap_decrease(edf_ready_order,
+							   tsk_rt(t)->heap_node);
+			} else {
+				/* Nothing to do: if it is not queued and not linked
+				 * then it is either sleeping or currently being moved
+				 * by other code (e.g., a timer interrupt handler) that
+				 * will use the correct priority when enqueuing the
+				 * task. */
+				TRACE_TASK(t, "%s: is NOT queued => Done.\n",
+					   __FUNCTION__);
+			}
+			raw_spin_unlock(&gsnedf.release_lock);
+
+			/* If holder was enqueued in a release heap, then the following
+			 * preemption check is pointless, but we can't easily detect
+			 * that case. If you want to fix this, then consider that
+			 * simply adding a state flag requires O(n) time to update when
+			 * releasing n tasks, which conflicts with the goal to have
+			 * O(log n) merges. */
+			if (check_preempt) {
+				/* heap_decrease() hit the top level of the heap: make
+				 * sure preemption checks get the right task, not the
+				 * potentially stale cache. */
+				bheap_uncache_min(edf_ready_order,
+						 &gsnedf.ready_queue);
+				check_for_preemptions();
+			}
+		}
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	}
+	else {
+		TRACE_TASK(t, "Spurious invalid priority increase. "
+				      "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
+					  "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
+				   t->comm, t->pid,
+				   effective_priority(t)->comm, effective_priority(t)->pid,
+				   (prio_inh) ? prio_inh->comm : "nil",
+				   (prio_inh) ? prio_inh->pid : -1);
+		WARN_ON(!prio_inh);
+	}
+#endif
+}
+
+/* called with IRQs off */
+static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
+{
 	raw_spin_lock(&gsnedf_lock);
 
-	TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
-	tsk_rt(t)->inh_task = prio_inh;
-
-	linked_on  = tsk_rt(t)->linked_on;
-
-	/* If it is scheduled, then we need to reorder the CPU heap. */
-	if (linked_on != NO_CPU) {
-		TRACE_TASK(t, "%s: linked  on %d\n",
-			   __FUNCTION__, linked_on);
-		/* Holder is scheduled; need to re-order CPUs.
-		 * We can't use heap_decrease() here since
-		 * the cpu_heap is ordered in reverse direction, so
-		 * it is actually an increase. */
-		bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
-			    gsnedf_cpus[linked_on]->hn);
-		bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
-			    gsnedf_cpus[linked_on]->hn);
-	} else {
-		/* holder may be queued: first stop queue changes */
-		raw_spin_lock(&gsnedf.release_lock);
-		if (is_queued(t)) {
-			TRACE_TASK(t, "%s: is queued\n",
-				   __FUNCTION__);
-			/* We need to update the position of holder in some
-			 * heap. Note that this could be a release heap if we
-			 * budget enforcement is used and this job overran. */
-			check_preempt =
-				!bheap_decrease(edf_ready_order,
-					       tsk_rt(t)->heap_node);
-		} else {
-			/* Nothing to do: if it is not queued and not linked
-			 * then it is either sleeping or currently being moved
-			 * by other code (e.g., a timer interrupt handler) that
-			 * will use the correct priority when enqueuing the
-			 * task. */
-			TRACE_TASK(t, "%s: is NOT queued => Done.\n",
-				   __FUNCTION__);
+	__increase_priority_inheritance(t, prio_inh);
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	if(tsk_rt(t)->cur_klitirqd != NULL)
+	{
+		TRACE_TASK(t, "%s/%d inherits a new priority!\n",
+				tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
+
+		__increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
+	}
+#endif
+
+	raw_spin_unlock(&gsnedf_lock);
+
+#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
+	if(tsk_rt(t)->held_gpus) {
+		int i;
+		for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
+			i < NV_DEVICE_NUM;
+			i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
+			pai_check_priority_increase(t, i);
+		}
+	}
+#endif
+}
+
+
+/* called with IRQs off */
+static void __decrease_priority_inheritance(struct task_struct* t,
+											struct task_struct* prio_inh)
+{
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
+#endif
+		/* A job only stops inheriting a priority when it releases a
+		 * resource. Thus we can make the following assumption.*/
+		if(prio_inh)
+			TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
+					   prio_inh->comm, prio_inh->pid);
+		else
+			TRACE_TASK(t, "base priority restored.\n");
+
+		tsk_rt(t)->inh_task = prio_inh;
+
+		if(tsk_rt(t)->scheduled_on != NO_CPU) {
+			TRACE_TASK(t, "is scheduled.\n");
+
+			/* Check if rescheduling is necessary. We can't use heap_decrease()
+			 * since the priority was effectively lowered. */
+			unlink(t);
+			gsnedf_job_arrival(t);
 		}
-		raw_spin_unlock(&gsnedf.release_lock);
-
-		/* If holder was enqueued in a release heap, then the following
-		 * preemption check is pointless, but we can't easily detect
-		 * that case. If you want to fix this, then consider that
-		 * simply adding a state flag requires O(n) time to update when
-		 * releasing n tasks, which conflicts with the goal to have
-		 * O(log n) merges. */
-		if (check_preempt) {
-			/* heap_decrease() hit the top level of the heap: make
-			 * sure preemption checks get the right task, not the
-			 * potentially stale cache. */
-			bheap_uncache_min(edf_ready_order,
-					 &gsnedf.ready_queue);
-			check_for_preemptions();
+		else {
+			/* task is queued */
+			raw_spin_lock(&gsnedf.release_lock);
+			if (is_queued(t)) {
+				TRACE_TASK(t, "is queued.\n");
+
+				/* decrease in priority, so we have to re-add to binomial heap */
+				unlink(t);
+				gsnedf_job_arrival(t);
+			}
+			else {
+				TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
+			}
+			raw_spin_unlock(&gsnedf.release_lock);
 		}
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	}
+	else {
+		TRACE_TASK(t, "Spurious invalid priority decrease. "
+				   "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
+				   "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
+				   t->comm, t->pid,
+				   effective_priority(t)->comm, effective_priority(t)->pid,
+				   (prio_inh) ? prio_inh->comm : "nil",
+				   (prio_inh) ? prio_inh->pid : -1);
 	}
+#endif
+}
+
+static void decrease_priority_inheritance(struct task_struct* t,
+										  struct task_struct* prio_inh)
+{
+	raw_spin_lock(&gsnedf_lock);
+	__decrease_priority_inheritance(t, prio_inh);
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	if(tsk_rt(t)->cur_klitirqd != NULL)
+	{
+		TRACE_TASK(t, "%s/%d decreases in priority!\n",
+				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
+
+		__decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
+	}
+#endif
 
 	raw_spin_unlock(&gsnedf_lock);
+
+#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
+	if(tsk_rt(t)->held_gpus) {
+		int i;
+		for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
+			i < NV_DEVICE_NUM;
+			i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
+			pai_check_priority_decrease(t, i);
+		}
+	}
+#endif
 }
 
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
 /* called with IRQs off */
-static void clear_priority_inheritance(struct task_struct* t)
+static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
+											  struct task_struct* old_owner,
+											  struct task_struct* new_owner)
 {
+	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
+
 	raw_spin_lock(&gsnedf_lock);
 
-	/* A job only stops inheriting a priority when it releases a
-	 * resource. Thus we can make the following assumption.*/
-	BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
+	if(old_owner != new_owner)
+	{
+		if(old_owner)
+		{
+			// unreachable?
+			tsk_rt(old_owner)->cur_klitirqd = NULL;
+		}
 
-	TRACE_TASK(t, "priority restored\n");
-	tsk_rt(t)->inh_task = NULL;
+		TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
+				   new_owner->comm, new_owner->pid);
 
-	/* Check if rescheduling is necessary. We can't use heap_decrease()
-	 * since the priority was effectively lowered. */
-	unlink(t);
-	gsnedf_job_arrival(t);
+		tsk_rt(new_owner)->cur_klitirqd = klitirqd;
+	}
+
+	__decrease_priority_inheritance(klitirqd, NULL);  // kludge to clear out cur prio.
+
+	__increase_priority_inheritance(klitirqd,
+			(tsk_rt(new_owner)->inh_task == NULL) ?
+				new_owner :
+				tsk_rt(new_owner)->inh_task);
 
 	raw_spin_unlock(&gsnedf_lock);
 }
 
 
+/* called with IRQs off */
+static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
+												   struct task_struct* old_owner,
+												   struct task_struct* new_owner)
+{
+	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
+
+	raw_spin_lock(&gsnedf_lock);
+
+    TRACE_TASK(klitirqd, "priority restored\n");
+
+	__decrease_priority_inheritance(klitirqd, new_owner);
+
+	tsk_rt(old_owner)->cur_klitirqd = NULL;
+
+	raw_spin_unlock(&gsnedf_lock);
+}
+#endif
+
+
+
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+
+/* called with IRQs off */
+/* preconditions:
+ (1) The 'hp_blocked_tasks_lock' of task 't' is held.
+ (2) The lock 'to_unlock' is held.
+ */
+static void nested_increase_priority_inheritance(struct task_struct* t,
+												 struct task_struct* prio_inh,
+												 raw_spinlock_t *to_unlock,
+												 unsigned long irqflags)
+{
+	struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
+
+	if(tsk_rt(t)->inh_task != prio_inh) { 		// shield redundent calls.
+		increase_priority_inheritance(t, prio_inh);  // increase our prio.
+	}
+
+	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);  // unlock the t's heap.
+
+
+	if(blocked_lock) {
+		if(blocked_lock->ops->propagate_increase_inheritance) {
+			TRACE_TASK(t, "Inheritor is blocked (...perhaps).  Checking lock %d.\n",
+					   blocked_lock->ident);
+
+			// beware: recursion
+			blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
+															  t, to_unlock,
+															  irqflags);
+		}
+		else {
+			TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
+					   blocked_lock->ident);
+			unlock_fine_irqrestore(to_unlock, irqflags);
+		}
+	}
+	else {
+		TRACE_TASK(t, "is not blocked.  No propagation.\n");
+		unlock_fine_irqrestore(to_unlock, irqflags);
+	}
+}
+
+/* called with IRQs off */
+/* preconditions:
+ (1) The 'hp_blocked_tasks_lock' of task 't' is held.
+ (2) The lock 'to_unlock' is held.
+ */
+static void nested_decrease_priority_inheritance(struct task_struct* t,
+												 struct task_struct* prio_inh,
+												 raw_spinlock_t *to_unlock,
+												 unsigned long irqflags)
+{
+	struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
+	decrease_priority_inheritance(t, prio_inh);
+
+	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);  // unlock the t's heap.
+
+	if(blocked_lock) {
+		if(blocked_lock->ops->propagate_decrease_inheritance) {
+			TRACE_TASK(t, "Inheritor is blocked (...perhaps).  Checking lock %d.\n",
+					   blocked_lock->ident);
+
+			// beware: recursion
+			blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
+															  to_unlock,
+															  irqflags);
+		}
+		else {
+			TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
+					   blocked_lock);
+			unlock_fine_irqrestore(to_unlock, irqflags);
+		}
+	}
+	else {
+		TRACE_TASK(t, "is not blocked.  No propagation.\n");
+		unlock_fine_irqrestore(to_unlock, irqflags);
+	}
+}
+
+
+/* ******************** RSM MUTEX ********************** */
+
+static struct litmus_lock_ops gsnedf_rsm_mutex_lock_ops = {
+	.lock   = rsm_mutex_lock,
+	.unlock = rsm_mutex_unlock,
+	.close  = rsm_mutex_close,
+	.deallocate = rsm_mutex_free,
+
+	.propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
+	.propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	.dgl_lock = rsm_mutex_dgl_lock,
+	.is_owner = rsm_mutex_is_owner,
+	.enable_priority = rsm_mutex_enable_priority,
+#endif
+};
+
+static struct litmus_lock* gsnedf_new_rsm_mutex(void)
+{
+	return rsm_mutex_new(&gsnedf_rsm_mutex_lock_ops);
+}
+
+/* ******************** IKGLP ********************** */
+
+static struct litmus_lock_ops gsnedf_ikglp_lock_ops = {
+	.lock   = ikglp_lock,
+	.unlock = ikglp_unlock,
+	.close  = ikglp_close,
+	.deallocate = ikglp_free,
+
+	// ikglp can only be an outer-most lock.
+	.propagate_increase_inheritance = NULL,
+	.propagate_decrease_inheritance = NULL,
+};
+
+static struct litmus_lock* gsnedf_new_ikglp(void* __user arg)
+{
+	return ikglp_new(num_online_cpus(), &gsnedf_ikglp_lock_ops, arg);
+}
+
+#endif  /* CONFIG_LITMUS_NESTED_LOCKING */
+
+
+/* ******************** KFMLP support ********************** */
+
+static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = {
+	.lock   = kfmlp_lock,
+	.unlock = kfmlp_unlock,
+	.close  = kfmlp_close,
+	.deallocate = kfmlp_free,
+
+	// kfmlp can only be an outer-most lock.
+	.propagate_increase_inheritance = NULL,
+	.propagate_decrease_inheritance = NULL,
+};
+
+
+static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg)
+{
+	return kfmlp_new(&gsnedf_kfmlp_lock_ops, arg);
+}
+
 /* ******************** FMLP support ********************** */
 
 /* struct for semaphore with priority inheritance */
@@ -797,7 +1508,7 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
 		if (edf_higher_prio(t, sem->hp_waiter)) {
 			sem->hp_waiter = t;
 			if (edf_higher_prio(t, sem->owner))
-				set_priority_inheritance(sem->owner, sem->hp_waiter);
+				increase_priority_inheritance(sem->owner, sem->hp_waiter);
 		}
 
 		TS_LOCK_SUSPEND;
@@ -865,7 +1576,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
 			/* Well, if next is not the highest-priority waiter,
 			 * then it ought to inherit the highest-priority
 			 * waiter's priority. */
-			set_priority_inheritance(next, sem->hp_waiter);
+			increase_priority_inheritance(next, sem->hp_waiter);
 		}
 
 		/* wake up next */
@@ -876,7 +1587,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
 
 	/* we lose the benefit of priority inheritance (if any) */
 	if (tsk_rt(t)->inh_task)
-		clear_priority_inheritance(t);
+		decrease_priority_inheritance(t, NULL);
 
 out:
 	spin_unlock_irqrestore(&sem->wait.lock, flags);
@@ -914,6 +1625,11 @@ static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
 	.lock   = gsnedf_fmlp_lock,
 	.unlock = gsnedf_fmlp_unlock,
 	.deallocate = gsnedf_fmlp_free,
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	.propagate_increase_inheritance = NULL,
+	.propagate_decrease_inheritance = NULL
+#endif
 };
 
 static struct litmus_lock* gsnedf_new_fmlp(void)
@@ -932,47 +1648,121 @@ static struct litmus_lock* gsnedf_new_fmlp(void)
 	return &sem->litmus_lock;
 }
 
-/* **** lock constructor **** */
-
 
 static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
-				 void* __user unused)
+				 void* __user args)
 {
-	int err = -ENXIO;
+	int err;
 
-	/* GSN-EDF currently only supports the FMLP for global resources. */
 	switch (type) {
 
 	case FMLP_SEM:
 		/* Flexible Multiprocessor Locking Protocol */
 		*lock = gsnedf_new_fmlp();
-		if (*lock)
-			err = 0;
-		else
-			err = -ENOMEM;
+		break;
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+    case RSM_MUTEX:
+		*lock = gsnedf_new_rsm_mutex();
 		break;
 
+	case IKGLP_SEM:
+		*lock = gsnedf_new_ikglp(args);
+		break;
+#endif
+	case KFMLP_SEM:
+		*lock = gsnedf_new_kfmlp(args);
+		break;
+	default:
+		err = -ENXIO;
+		goto UNSUPPORTED_LOCK;
 	};
 
+	if (*lock)
+		err = 0;
+	else
+		err = -ENOMEM;
+
+UNSUPPORTED_LOCK:
 	return err;
 }
 
+#endif  // CONFIG_LITMUS_LOCKING
+
+
+
+
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+static struct affinity_observer_ops gsnedf_kfmlp_affinity_ops = {
+	.close = kfmlp_aff_obs_close,
+	.deallocate = kfmlp_aff_obs_free,
+};
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+static struct affinity_observer_ops gsnedf_ikglp_affinity_ops = {
+	.close = ikglp_aff_obs_close,
+	.deallocate = ikglp_aff_obs_free,
+};
 #endif
 
+static long gsnedf_allocate_affinity_observer(
+								struct affinity_observer **aff_obs,
+								int type,
+								void* __user args)
+{
+	int err;
+
+	switch (type) {
+
+		case KFMLP_SIMPLE_GPU_AFF_OBS:
+			*aff_obs = kfmlp_simple_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
+			break;
+
+		case KFMLP_GPU_AFF_OBS:
+			*aff_obs = kfmlp_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
+			break;
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+		case IKGLP_SIMPLE_GPU_AFF_OBS:
+			*aff_obs = ikglp_simple_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
+			break;
+
+		case IKGLP_GPU_AFF_OBS:
+			*aff_obs = ikglp_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
+			break;
+#endif
+		default:
+			err = -ENXIO;
+			goto UNSUPPORTED_AFF_OBS;
+	};
+
+	if (*aff_obs)
+		err = 0;
+	else
+		err = -ENOMEM;
+
+UNSUPPORTED_AFF_OBS:
+	return err;
+}
+#endif
+
+
+
+
 
 static long gsnedf_activate_plugin(void)
 {
 	int cpu;
 	cpu_entry_t *entry;
 
-	bheap_init(&gsnedf_cpu_heap);
+	INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
 #ifdef CONFIG_RELEASE_MASTER
 	gsnedf.release_master = atomic_read(&release_master_cpu);
 #endif
 
 	for_each_online_cpu(cpu) {
 		entry = &per_cpu(gsnedf_cpu_entries, cpu);
-		bheap_node_init(&entry->hn, entry);
+		INIT_BINHEAP_NODE(&entry->hn);
 		entry->linked    = NULL;
 		entry->scheduled = NULL;
 #ifdef CONFIG_RELEASE_MASTER
@@ -986,6 +1776,20 @@ static long gsnedf_activate_plugin(void)
 		}
 #endif
 	}
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+	gsnedf_pending_tasklets.head = NULL;
+	gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
+#endif
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+    spawn_klitirqd(NULL);
+#endif
+
+#ifdef CONFIG_LITMUS_NVIDIA
+	init_nvidia_info();
+#endif
+
 	return 0;
 }
 
@@ -1002,8 +1806,31 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
 	.task_block		= gsnedf_task_block,
 	.admit_task		= gsnedf_admit_task,
 	.activate_plugin	= gsnedf_activate_plugin,
+	.compare		= edf_higher_prio,
 #ifdef CONFIG_LITMUS_LOCKING
 	.allocate_lock		= gsnedf_allocate_lock,
+	.increase_prio		= increase_priority_inheritance,
+	.decrease_prio		= decrease_priority_inheritance,
+#endif
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	.nested_increase_prio		= nested_increase_priority_inheritance,
+	.nested_decrease_prio		= nested_decrease_priority_inheritance,
+	.__compare					= __edf_higher_prio,
+#endif
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	.get_dgl_spinlock = gsnedf_get_dgl_spinlock,
+#endif
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	.allocate_aff_obs = gsnedf_allocate_affinity_observer,
+#endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	.increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
+	.decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
+#endif
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+	.enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet,
+	.change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet,
+	.run_tasklets = gsnedf_run_tasklets,
 #endif
 };
 
@@ -1013,15 +1840,20 @@ static int __init init_gsn_edf(void)
 	int cpu;
 	cpu_entry_t *entry;
 
-	bheap_init(&gsnedf_cpu_heap);
+	INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
 	/* initialize CPU state */
-	for (cpu = 0; cpu < NR_CPUS; cpu++)  {
+	for (cpu = 0; cpu < NR_CPUS; ++cpu)  {
 		entry = &per_cpu(gsnedf_cpu_entries, cpu);
 		gsnedf_cpus[cpu] = entry;
 		entry->cpu 	 = cpu;
-		entry->hn        = &gsnedf_heap_node[cpu];
-		bheap_node_init(&entry->hn, entry);
+
+		INIT_BINHEAP_NODE(&entry->hn);
 	}
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	raw_spin_lock_init(&dgl_lock);
+#endif
+
 	edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
 	return register_sched_plugin(&gsn_edf_plugin);
 }
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
index 5a15ce938984..9a6fe487718e 100644
--- a/litmus/sched_litmus.c
+++ b/litmus/sched_litmus.c
@@ -103,7 +103,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev)
 		}
 #ifdef  __ARCH_WANT_UNLOCKED_CTXSW
 		if (next->oncpu)
+		{
 			TRACE_TASK(next, "waiting for !oncpu");
+		}
 		while (next->oncpu) {
 			cpu_relax();
 			mb();
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 00a1900d6457..245e41c25a5d 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -13,6 +13,10 @@
 #include <litmus/preempt.h>
 #include <litmus/jobs.h>
 
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/nvidia_info.h>
+#endif
+
 /*
  * Generic function to trigger preemption on either local or remote cpu
  * from scheduler plugins. The key feature is that this function is
@@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void)
 
 static long litmus_dummy_activate_plugin(void)
 {
+#ifdef CONFIG_LITMUS_NVIDIA
+	shutdown_nvidia_info();
+#endif
 	return 0;
 }
 
@@ -110,14 +117,93 @@ static long litmus_dummy_deactivate_plugin(void)
 	return 0;
 }
 
-#ifdef CONFIG_LITMUS_LOCKING
+static int litmus_dummy_compare(struct task_struct* a, struct task_struct* b)
+{
+	TRACE_CUR("WARNING: Dummy compare function called!\n");
+	return 0;
+}
 
+#ifdef CONFIG_LITMUS_LOCKING
 static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
 				       void* __user config)
 {
 	return -ENXIO;
 }
 
+static void litmus_dummy_increase_prio(struct task_struct* t, struct task_struct* prio_inh)
+{
+}
+
+static void litmus_dummy_decrease_prio(struct task_struct* t, struct task_struct* prio_inh)
+{
+}
+#endif
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+static void litmus_dummy_increase_prio_klitirq(struct task_struct* klitirqd,
+                                       struct task_struct* old_owner,
+                                       struct task_struct* new_owner)
+{
+}
+
+static void litmus_dummy_decrease_prio_klitirqd(struct task_struct* klitirqd,
+                                                struct task_struct* old_owner)
+{
+}
+#endif
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t)
+{
+	TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
+	return(0); // failure.
+}
+
+static void litmus_dummy_change_prio_pai_tasklet(struct task_struct *old_prio,
+												 struct task_struct *new_prio)
+{
+	TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
+}
+
+static void litmus_dummy_run_tasklets(struct task_struct* t)
+{
+	//TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
+}
+#endif
+
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+static void litmus_dummy_nested_increase_prio(struct task_struct* t, struct task_struct* prio_inh,
+											raw_spinlock_t *to_unlock, unsigned long irqflags)
+{
+}
+
+static void litmus_dummy_nested_decrease_prio(struct task_struct* t, struct task_struct* prio_inh,
+											raw_spinlock_t *to_unlock, unsigned long irqflags)
+{
+}
+
+static int litmus_dummy___compare(struct task_struct* a, comparison_mode_t a_mod,
+								  struct task_struct* b, comparison_mode_t b_mode)
+{
+	TRACE_CUR("WARNING: Dummy compare function called!\n");
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+static raw_spinlock_t* litmus_dummy_get_dgl_spinlock(struct task_struct *t)
+{
+	return NULL;
+}
+#endif
+
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+static long litmus_dummy_allocate_aff_obs(struct affinity_observer **aff_obs,
+									   int type,
+									   void* __user config)
+{
+	return -ENXIO;
+}
 #endif
 
 
@@ -136,9 +222,33 @@ struct sched_plugin linux_sched_plugin = {
 	.finish_switch = litmus_dummy_finish_switch,
 	.activate_plugin = litmus_dummy_activate_plugin,
 	.deactivate_plugin = litmus_dummy_deactivate_plugin,
+	.compare = litmus_dummy_compare,
 #ifdef CONFIG_LITMUS_LOCKING
 	.allocate_lock = litmus_dummy_allocate_lock,
+	.increase_prio = litmus_dummy_increase_prio,
+	.decrease_prio = litmus_dummy_decrease_prio,
+#endif
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	.nested_increase_prio = litmus_dummy_nested_increase_prio,
+	.nested_decrease_prio = litmus_dummy_nested_decrease_prio,
+	.__compare = litmus_dummy___compare,
+#endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	.increase_prio_klitirqd = litmus_dummy_increase_prio_klitirqd,
+	.decrease_prio_klitirqd = litmus_dummy_decrease_prio_klitirqd,
+#endif
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+	.enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet,
+	.change_prio_pai_tasklet = litmus_dummy_change_prio_pai_tasklet,
+	.run_tasklets = litmus_dummy_run_tasklets,
+#endif
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	.get_dgl_spinlock = litmus_dummy_get_dgl_spinlock,
 #endif
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	.allocate_aff_obs = litmus_dummy_allocate_aff_obs,
+#endif
+
 	.admit_task = litmus_dummy_admit_task
 };
 
@@ -174,8 +284,31 @@ int register_sched_plugin(struct sched_plugin* plugin)
 	CHECK(complete_job);
 	CHECK(activate_plugin);
 	CHECK(deactivate_plugin);
+	CHECK(compare);
 #ifdef CONFIG_LITMUS_LOCKING
 	CHECK(allocate_lock);
+	CHECK(increase_prio);
+	CHECK(decrease_prio);
+#endif
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+	CHECK(nested_increase_prio);
+	CHECK(nested_decrease_prio);
+	CHECK(__compare);
+#endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	CHECK(increase_prio_klitirqd);
+	CHECK(decrease_prio_klitirqd);
+#endif
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+	CHECK(enqueue_pai_tasklet);
+	CHECK(change_prio_pai_tasklet);
+	CHECK(run_tasklets);
+#endif
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	CHECK(get_dgl_spinlock);
+#endif
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+	CHECK(allocate_aff_obs);
 #endif
 	CHECK(admit_task);
 
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 5ef8d09ab41f..f7f575346b54 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/percpu.h>
+#include <linux/hardirq.h>
 
 #include <litmus/ftdev.h>
 #include <litmus/litmus.h>
@@ -16,13 +17,13 @@
 #include <litmus/ftdev.h>
 
 
-#define NO_EVENTS		(1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
+#define NUM_EVENTS		(1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11))
 
 #define now() litmus_clock()
 
 struct local_buffer {
-	struct st_event_record record[NO_EVENTS];
-	char   flag[NO_EVENTS];
+	struct st_event_record record[NUM_EVENTS];
+	char   flag[NUM_EVENTS];
 	struct ft_buffer ftbuf;
 };
 
@@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void)
 	int i, ok = 0, err;
 	printk("Allocated %u sched_trace_xxx() events per CPU "
 	       "(buffer size: %d bytes)\n",
-	       NO_EVENTS, (int) sizeof(struct local_buffer));
+	       NUM_EVENTS, (int) sizeof(struct local_buffer));
 
 	err = ftdev_init(&st_dev, THIS_MODULE,
 			num_online_cpus(), "sched_trace");
@@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void)
 
 	for (i = 0; i < st_dev.minor_cnt; i++) {
 		buf = &per_cpu(st_event_buffer, i);
-		ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS,
+		ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS,
 				     sizeof(struct st_event_record),
 				     buf->flag,
 				     buf->record);
@@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id,
 {
 	struct task_struct *t = (struct task_struct*) _task;
 	struct st_event_record* rec;
-	if (is_realtime(t)) {
+	//if (is_realtime(t))  /* comment out to trace EVERYTHING */
+	{
 		rec = get_record(ST_SWITCH_TO, t);
 		if (rec) {
 			rec->data.switch_to.when      = now();
@@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id,
 {
 	struct task_struct *t = (struct task_struct*) _task;
 	struct st_event_record* rec;
-	if (is_realtime(t)) {
+	//if (is_realtime(t))  /* comment out to trace EVERYTHING */
+	{
 		rec = get_record(ST_SWITCH_AWAY, t);
 		if (rec) {
 			rec->data.switch_away.when      = now();
@@ -188,6 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id,
 	if (rec) {
 		rec->data.completion.when   = now();
 		rec->data.completion.forced = forced;
+#ifdef LITMUS_NVIDIA
+		rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count);
+#endif
 		put_record(rec);
 	}
 }
@@ -239,3 +245,265 @@ feather_callback void do_sched_trace_action(unsigned long id,
 		put_record(rec);
 	}
 }
+
+
+
+
+feather_callback void do_sched_trace_prediction_err(unsigned long id,
+													unsigned long _task,
+													unsigned long _distance,
+													unsigned long _rel_err)
+{
+	struct task_struct *t = (struct task_struct*) _task;
+	struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t);
+
+	if (rec) {
+		gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance;
+		fp_t* rel_err = (fp_t*) _rel_err;
+
+		rec->data.prediction_err.distance = *distance;
+		rec->data.prediction_err.rel_err = rel_err->val;
+		put_record(rec);
+	}
+}
+
+
+feather_callback void do_sched_trace_migration(unsigned long id,
+													unsigned long _task,
+													unsigned long _mig_info)
+{
+	struct task_struct *t = (struct task_struct*) _task;
+	struct st_event_record *rec = get_record(ST_MIGRATION, t);
+
+	if (rec) {
+		struct migration_info* mig_info = (struct migration_info*) _mig_info;
+
+		rec->hdr.extra = mig_info->distance;
+		rec->data.migration.observed = mig_info->observed;
+		rec->data.migration.estimated = mig_info->estimated;
+
+		put_record(rec);
+	}
+}
+
+
+
+
+
+
+
+
+
+feather_callback void do_sched_trace_tasklet_release(unsigned long id,
+												   unsigned long _owner)
+{
+	struct task_struct *t = (struct task_struct*) _owner;
+	struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t);
+
+	if (rec) {
+		rec->data.tasklet_release.when = now();
+		put_record(rec);
+	}
+}
+
+
+feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
+												   unsigned long _owner)
+{
+	struct task_struct *t = (struct task_struct*) _owner;
+	struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t);
+
+	if (rec) {
+		rec->data.tasklet_begin.when = now();
+
+		if(!in_interrupt())
+			rec->data.tasklet_begin.exe_pid = current->pid;
+		else
+			rec->data.tasklet_begin.exe_pid = 0;
+
+		put_record(rec);
+	}
+}
+EXPORT_SYMBOL(do_sched_trace_tasklet_begin);
+
+
+feather_callback void do_sched_trace_tasklet_end(unsigned long id,
+												 unsigned long _owner,
+												 unsigned long _flushed)
+{
+	struct task_struct *t = (struct task_struct*) _owner;
+	struct st_event_record *rec = get_record(ST_TASKLET_END, t);
+
+	if (rec) {
+		rec->data.tasklet_end.when = now();
+		rec->data.tasklet_end.flushed = _flushed;
+
+		if(!in_interrupt())
+			rec->data.tasklet_end.exe_pid = current->pid;
+		else
+			rec->data.tasklet_end.exe_pid = 0;
+
+		put_record(rec);
+	}
+}
+EXPORT_SYMBOL(do_sched_trace_tasklet_end);
+
+
+feather_callback void do_sched_trace_work_release(unsigned long id,
+													 unsigned long _owner)
+{
+	struct task_struct *t = (struct task_struct*) _owner;
+	struct st_event_record *rec = get_record(ST_WORK_RELEASE, t);
+
+	if (rec) {
+		rec->data.work_release.when = now();
+		put_record(rec);
+	}
+}
+
+
+feather_callback void do_sched_trace_work_begin(unsigned long id,
+												unsigned long _owner,
+												unsigned long _exe)
+{
+	struct task_struct *t = (struct task_struct*) _owner;
+	struct st_event_record *rec = get_record(ST_WORK_BEGIN, t);
+
+	if (rec) {
+		struct task_struct *exe = (struct task_struct*) _exe;
+		rec->data.work_begin.exe_pid = exe->pid;
+		rec->data.work_begin.when = now();
+		put_record(rec);
+	}
+}
+EXPORT_SYMBOL(do_sched_trace_work_begin);
+
+
+feather_callback void do_sched_trace_work_end(unsigned long id,
+											  unsigned long _owner,
+											  unsigned long _exe,
+											  unsigned long _flushed)
+{
+	struct task_struct *t = (struct task_struct*) _owner;
+	struct st_event_record *rec = get_record(ST_WORK_END, t);
+
+	if (rec) {
+		struct task_struct *exe = (struct task_struct*) _exe;
+		rec->data.work_end.exe_pid = exe->pid;
+		rec->data.work_end.flushed = _flushed;
+		rec->data.work_end.when = now();
+		put_record(rec);
+	}
+}
+EXPORT_SYMBOL(do_sched_trace_work_end);
+
+
+feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
+											  unsigned long _task,
+											  unsigned long _inh)
+{
+	struct task_struct *t = (struct task_struct*) _task;
+	struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t);
+
+	if (rec) {
+		struct task_struct *inh = (struct task_struct*) _inh;
+		rec->data.effective_priority_change.when = now();
+		rec->data.effective_priority_change.inh_pid = (inh != NULL) ?
+			inh->pid :
+			0xffff;
+
+		put_record(rec);
+	}
+}
+
+/* pray for no nesting of nv interrupts on same CPU... */
+struct tracing_interrupt_map
+{
+	int active;
+	int count;
+	unsigned long data[128]; // assume nesting less than 128...
+	unsigned long serial[128];
+};
+DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing);
+
+
+DEFINE_PER_CPU(u32, intCounter);
+
+feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
+												unsigned long _device)
+{
+	struct st_event_record *rec;
+	u32 serialNum;
+
+	{
+		u32* serial;
+		struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
+		if(!int_map->active == 0xcafebabe)
+		{
+			int_map->count++;
+		}
+		else
+		{
+			int_map->active = 0xcafebabe;
+			int_map->count = 1;
+		}
+		//int_map->data[int_map->count-1] = _device;
+
+		serial = &per_cpu(intCounter, smp_processor_id());
+		*serial += num_online_cpus();
+		serialNum = *serial;
+		int_map->serial[int_map->count-1] = serialNum;
+	}
+
+	rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL);
+	if(rec) {
+		u32 device = _device;
+		rec->data.nv_interrupt_begin.when = now();
+		rec->data.nv_interrupt_begin.device = device;
+		rec->data.nv_interrupt_begin.serialNumber = serialNum;
+		put_record(rec);
+	}
+}
+EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin);
+
+/*
+int is_interrupt_tracing_active(void)
+{
+	struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
+	if(int_map->active == 0xcafebabe)
+		return 1;
+	return 0;
+}
+*/
+
+feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device)
+{
+	struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
+	if(int_map->active == 0xcafebabe)
+	{
+		struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL);
+
+		int_map->count--;
+		if(int_map->count == 0)
+			int_map->active = 0;
+
+		if(rec) {
+			u32 device = _device;
+			rec->data.nv_interrupt_end.when = now();
+			//rec->data.nv_interrupt_end.device = int_map->data[int_map->count];
+			rec->data.nv_interrupt_end.device = device;
+			rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count];
+			put_record(rec);
+		}
+	}
+}
+EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);
+
+
+
+
+
+
+
+
+
diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c
new file mode 100644
index 000000000000..cf8e1d78aa77
--- /dev/null
+++ b/litmus/sched_trace_external.c
@@ -0,0 +1,64 @@
+#include <linux/module.h>
+
+#include <litmus/trace.h>
+#include <litmus/sched_trace.h>
+#include <litmus/litmus.h>
+
+void __sched_trace_tasklet_begin_external(struct task_struct* t)
+{
+	sched_trace_tasklet_begin(t);
+}
+EXPORT_SYMBOL(__sched_trace_tasklet_begin_external);
+
+void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
+{
+	sched_trace_tasklet_end(t, flushed);
+}
+EXPORT_SYMBOL(__sched_trace_tasklet_end_external);
+
+
+
+void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
+{
+	sched_trace_work_begin(t, e);
+}
+EXPORT_SYMBOL(__sched_trace_work_begin_external);
+
+void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
+{
+	sched_trace_work_end(t, e, f);
+}
+EXPORT_SYMBOL(__sched_trace_work_end_external);
+
+
+
+void __sched_trace_nv_interrupt_begin_external(u32 device)
+{
+	//unsigned long _device = device;
+	sched_trace_nv_interrupt_begin((unsigned long)device);
+}
+EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external);
+
+void __sched_trace_nv_interrupt_end_external(u32 device)
+{
+	//unsigned long _device = device;
+	sched_trace_nv_interrupt_end((unsigned long)device);
+}
+EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external);
+
+
+#ifdef CONFIG_LITMUS_NVIDIA
+
+#define EXX_TS(evt) \
+void __##evt(void) { evt; } \
+EXPORT_SYMBOL(__##evt);
+
+EXX_TS(TS_NV_TOPISR_START)
+EXX_TS(TS_NV_TOPISR_END)
+EXX_TS(TS_NV_BOTISR_START)
+EXX_TS(TS_NV_BOTISR_END)
+EXX_TS(TS_NV_RELEASE_BOTISR_START)
+EXX_TS(TS_NV_RELEASE_BOTISR_END)
+
+#endif
+
-- 
cgit v1.2.2


From 9a19f35c9c287cb8abd5bcf276ae8d1a3e876907 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Mon, 20 Aug 2012 16:49:50 -0400
Subject: Improve readability of EDF comparisons.

Restructured the EDF task comparison code to improve readability.
Recoded chained logical expression embedded in return statement
into a series of if/else blocks.
---
 litmus/edf_common.c | 42 ++++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 9b44dc2d8d1e..668737f0fbf9 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -63,25 +63,35 @@ int edf_higher_prio(struct task_struct* first,
 
 #endif
 
-
-	return !is_realtime(second_task)  ||
-
-		/* is the deadline of the first task earlier?
+	/* Determine the task with earliest deadline, with
+	 * tie-break logic.
+	 */
+	if (unlikely(!is_realtime(second_task))) {
+		return 1;
+	}
+	else if (earlier_deadline(first_task, second_task)) {
+		/* Is the deadline of the first task earlier?
 		 * Then it has higher priority.
 		 */
-		earlier_deadline(first_task, second_task) ||
-
-		/* Do we have a deadline tie?
-		 * Then break by PID.
-		 */
-		(get_deadline(first_task) == get_deadline(second_task) &&
-	        (first_task->pid < second_task->pid ||
+		return 1;
+	}
+	else if (get_deadline(first_task) == get_deadline(second_task)) {
+		/* Need to tie break */
 
-		/* If the PIDs are the same then the task with the inherited
-		 * priority wins.
-		 */
-		(first_task->pid == second_task->pid &&
-		 !second->rt_param.inh_task)));
+		/* Tie break by pid */
+		if (first_task->pid < second_task->pid) {
+			return 1;
+		}
+		else if (first_task->pid == second_task->pid) {
+			/* If the PIDs are the same then the task with the
+			 * inherited priority wins.
+			 */
+			if (!second_task->rt_param.inh_task) {
+				return 1;
+			}
+		}
+	}
+	return 0; /* fall-through. prio(second_task) > prio(first_task) */
 }
 
 int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
-- 
cgit v1.2.2


From 077aaecac31331b65442275843932314049a2ceb Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Mon, 20 Aug 2012 17:28:55 -0400
Subject: EDF priority tie-breaks.

Instead of tie-breaking by PID (which is a static
priority tie-break), we can tie-break by other
job-level-unique parameters. This is desirable
because tasks are equaly affected by tardiness
since static priority tie-breaks cause tasks
with greater PID values to experience the most
tardiness.

There are four tie-break methods:

1) Lateness.  If two jobs, J_{1,i} and J_{2,j} of
tasks T_1 and T_2, respectively, have equal
deadlines, we favor the job of the task that
had the worst lateness for jobs J_{1,i-1} and
J_{2,j-1}.

Note: Unlike tardiness, lateness may be less than
zero. This occurs when a job finishes before its
deadline.

2) Normalized Lateness.  The same as #1, except
lateness is first normalized by each task's
relative deadline.  This prevents tasks with short
relative deadlines and small execution requirements
from always losing tie-breaks.

3) Hash. The job tuple (PID, Job#) is used to
generate a hash. Hash values are then compared.
A job has ~50% chance of winning a tie-break
with respect to another job.

Note: Emperical testing shows that some jobs
can have +/- ~1.5% advantage in tie-breaks.
Linux's built-in hash function is not totally
a uniform hash.

4) PIDs. PID-based tie-break used in prior
versions of Litmus.

Conflicts:

	litmus/edf_common.c
---
 include/litmus/fpmath.h   | 145 ++++++++++++++++++++++++++++++++++++++++++++++
 include/litmus/litmus.h   |   2 +-
 include/litmus/rt_param.h |   6 ++
 litmus/Kconfig            |  46 +++++++++++++++
 litmus/edf_common.c       | 110 +++++++++++++++++++++++++++++------
 litmus/jobs.c             |   8 +++
 6 files changed, 297 insertions(+), 20 deletions(-)
 create mode 100644 include/litmus/fpmath.h

diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
new file mode 100644
index 000000000000..04d4bcaeae96
--- /dev/null
+++ b/include/litmus/fpmath.h
@@ -0,0 +1,145 @@
+#ifndef __FP_MATH_H__
+#define __FP_MATH_H__
+
+#ifndef __KERNEL__
+#include <stdint.h>
+#define abs(x) (((x) < 0) ? -(x) : x)
+#endif
+
+// Use 64-bit because we want to track things at the nanosecond scale.
+// This can lead to very large numbers.
+typedef int64_t fpbuf_t;
+typedef struct
+{
+	fpbuf_t val;
+} fp_t;
+
+#define FP_SHIFT 10
+#define ROUND_BIT (FP_SHIFT - 1)
+
+#define _fp(x) ((fp_t) {x})
+
+#ifdef __KERNEL__
+static const fp_t LITMUS_FP_ZERO = {.val = 0};
+static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
+#endif
+
+static inline fp_t FP(fpbuf_t x)
+{
+	return _fp(((fpbuf_t) x) << FP_SHIFT);
+}
+
+/* divide two integers to obtain a fixed point value  */
+static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
+{
+	return _fp(FP(a).val / (b));
+}
+
+static inline fpbuf_t _point(fp_t x)
+{
+	return (x.val % (1 << FP_SHIFT));
+
+}
+
+#define fp2str(x) x.val
+/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
+#define _FP_  "%ld/1024"
+
+static inline fpbuf_t _floor(fp_t x)
+{
+	return x.val >> FP_SHIFT;
+}
+
+/* FIXME: negative rounding */
+static inline fpbuf_t _round(fp_t x)
+{
+	return _floor(x) + ((x.val >> ROUND_BIT) & 1);
+}
+
+/* multiply two fixed point values */
+static inline fp_t _mul(fp_t a, fp_t b)
+{
+	return _fp((a.val * b.val) >> FP_SHIFT);
+}
+
+static inline fp_t _div(fp_t a, fp_t b)
+{
+#if !defined(__KERNEL__) && !defined(unlikely)
+#define unlikely(x) (x)
+#define DO_UNDEF_UNLIKELY
+#endif
+	/* try not to overflow */
+	if (unlikely(  a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) ))
+		return _fp((a.val / b.val) << FP_SHIFT);
+	else
+		return _fp((a.val << FP_SHIFT) / b.val);
+#ifdef DO_UNDEF_UNLIKELY
+#undef unlikely
+#undef DO_UNDEF_UNLIKELY
+#endif
+}
+
+static inline fp_t _add(fp_t a, fp_t b)
+{
+	return _fp(a.val + b.val);
+}
+
+static inline fp_t _sub(fp_t a, fp_t b)
+{
+	return _fp(a.val - b.val);
+}
+
+static inline fp_t _neg(fp_t x)
+{
+	return _fp(-x.val);
+}
+
+static inline fp_t _abs(fp_t x)
+{
+	return _fp(abs(x.val));
+}
+
+/* works the same as casting float/double to integer */
+static inline fpbuf_t _fp_to_integer(fp_t x)
+{
+	return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
+}
+
+static inline fp_t _integer_to_fp(fpbuf_t x)
+{
+	return _frac(x,1);
+}
+
+static inline int _leq(fp_t a, fp_t b)
+{
+	return a.val <= b.val;
+}
+
+static inline int _geq(fp_t a, fp_t b)
+{
+	return a.val >= b.val;
+}
+
+static inline int _lt(fp_t a, fp_t b)
+{
+	return a.val < b.val;
+}
+
+static inline int _gt(fp_t a, fp_t b)
+{
+	return a.val > b.val;
+}
+
+static inline int _eq(fp_t a, fp_t b)
+{
+	return a.val == b.val;
+}
+
+static inline fp_t _max(fp_t a, fp_t b)
+{
+	if (a.val < b.val)
+		return b;
+	else
+		return a;
+}
+#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 338245abd6ed..807b7888695a 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -63,7 +63,7 @@ void litmus_exit_task(struct task_struct *tsk);
 #define get_exec_time(t)    (tsk_rt(t)->job_params.exec_time)
 #define get_deadline(t)		(tsk_rt(t)->job_params.deadline)
 #define get_release(t)		(tsk_rt(t)->job_params.release)
-
+#define get_lateness(t)		(tsk_rt(t)->job_params.lateness)
 
 #define is_hrt(t)     		\
 	(tsk_rt(t)->task_params.cls == RT_CLASS_HARD)
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 89ac0dda7d3d..fac939dbd33a 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -110,6 +110,12 @@ struct rt_job {
 	/* How much service has this job received so far? */
 	lt_t	exec_time;
 
+	/* By how much did the prior job miss its deadline by?
+	 * Value differs from tardiness in that lateness may
+	 * be negative (when job finishes before its deadline).
+	 */
+	long long	lateness;
+
 	/* Which job is this. This is used to let user space
 	 * specify which job to wait for, which is important if jobs
 	 * overrun. If we just call sys_sleep_next_period() then we
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 68459d4dca41..48ff3e3c657c 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -79,6 +79,52 @@ config SCHED_CPU_AFFINITY
 
 	  Say Yes if unsure.
 
+choice
+	prompt "EDF Tie-Break Behavior"
+	default EDF_TIE_BREAK_LATENESS_NORM
+	help
+	  Allows the configuration of tie-breaking behavior when the deadlines
+	  of two EDF-scheduled tasks are equal.
+	
+	config EDF_TIE_BREAK_LATENESS
+	bool "Lateness-based Tie Break"
+	help
+	  Break ties between to jobs, A and B, based upon the lateness of their
+	  prior jobs. The job with the greatest lateness has priority. Note that
+	  lateness has a negative value if the prior job finished before its
+	  deadline.
+	
+	config EDF_TIE_BREAK_LATENESS_NORM
+	bool "Normalized Lateness-based Tie Break"
+	help
+	  Break ties between to jobs, A and B, based upon the lateness, normalized
+	  by relative deadline, their prior jobs. The job with the greatest
+	  normalized lateness has priority. Note that lateness has a negative value
+	  if the prior job finished before its deadline.
+	  
+	  Normalized lateness tie-breaks are likely desireable over non-normalized
+	  tie-breaks if the execution times and/or relative deadlines of tasks in a
+	  task set vary greatly.
+	
+	config EDF_TIE_BREAK_HASH
+	bool "Hash-based Tie Breaks"
+	help
+	  Break ties between two jobs, A and B, with equal deadlines by using a
+	  uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job
+	  A has ~50% of winning a given tie-break.
+	
+	config EDF_PID_TIE_BREAK
+	bool "PID-based Tie Breaks"
+	help
+	  Break ties based upon OS-assigned process IDs. Use this option if
+	  required by algorithm's real-time analysis or per-task response-time
+	  jitter must be minimized in overload conditions.
+	
+	  NOTES:
+	    * This tie-breaking method was default in Litmus 2012.2 and before.
+		
+endchoice
+
 endmenu
 
 menu "Tracing"
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 668737f0fbf9..52205df3ea8b 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -14,6 +14,32 @@
 
 #include <litmus/edf_common.h>
 
+#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM
+#include <litmus/fpmath.h>
+#endif
+
+#ifdef CONFIG_EDF_TIE_BREAK_HASH
+#include <linux/hash.h>
+static inline long edf_hash(struct task_struct *t)
+{
+	/* pid is 32 bits, so normally we would shove that into the
+	 * upper 32-bits and and put the job number in the bottom
+	 * and hash the 64-bit number with hash_64(). Sadly,
+	 * in testing, hash_64() doesn't distribute keys were the
+	 * upper bits are close together (as would be the case with
+	 * pids) and job numbers are equal (as would be the case with
+	 * synchronous task sets with all relative deadlines equal).
+	 *
+	 * A 2006 Linux patch proposed the following solution
+	 * (but for some reason it wasn't accepted...).
+	 *
+	 * At least this workaround works for 32-bit systems as well.
+	 */
+	return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
+}
+#endif
+
+
 /* edf_higher_prio -  returns true if first has a higher EDF priority
  *                    than second. Deadline ties are broken by PID.
  *
@@ -63,32 +89,78 @@ int edf_higher_prio(struct task_struct* first,
 
 #endif
 
-	/* Determine the task with earliest deadline, with
-	 * tie-break logic.
-	 */
-	if (unlikely(!is_realtime(second_task))) {
-		return 1;
-	}
-	else if (earlier_deadline(first_task, second_task)) {
-		/* Is the deadline of the first task earlier?
-		 * Then it has higher priority.
-		 */
+	if (earlier_deadline(first_task, second_task)) {
 		return 1;
 	}
 	else if (get_deadline(first_task) == get_deadline(second_task)) {
-		/* Need to tie break */
-
-		/* Tie break by pid */
-		if (first_task->pid < second_task->pid) {
+		/* Need to tie break. All methods must set pid_break to 0/1 if
+		 * first_task does not have priority over second_task.
+		 */
+		int pid_break;
+		
+		
+#if defined(CONFIG_EDF_TIE_BREAK_LATENESS)
+		/* Tie break by lateness. Jobs with greater lateness get
+		 * priority. This should spread tardiness across all tasks,
+		 * especially in task sets where all tasks have the same
+		 * period and relative deadlines.
+		 */
+		if (get_lateness(first_task) > get_lateness(second_task)) {
 			return 1;
 		}
-		else if (first_task->pid == second_task->pid) {
-			/* If the PIDs are the same then the task with the
-			 * inherited priority wins.
-			 */
-			if (!second_task->rt_param.inh_task) {
+		pid_break = (get_lateness(first_task) == get_lateness(second_task));
+		
+		
+#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM)
+		/* Tie break by lateness, normalized by relative deadline. Jobs with
+		 * greater normalized lateness get priority.
+		 *
+		 * Note: Considered using the algebraically equivalent
+		 *	lateness(first)*relative_deadline(second) >
+					lateness(second)*relative_deadline(first)
+		 * to avoid fixed-point math, but values are prone to overflow if inputs
+		 * are on the order of several seconds, even in 64-bit.
+		 */
+		fp_t fnorm = _frac(get_lateness(first_task),
+						   get_rt_relative_deadline(first_task));
+		fp_t snorm = _frac(get_lateness(second_task),
+						   get_rt_relative_deadline(second_task));
+		if (_gt(fnorm, snorm)) {
+			return 1;
+		}
+		pid_break = _eq(fnorm, snorm);
+		
+		
+#elif defined(CONFIG_EDF_TIE_BREAK_HASH)
+		/* Tie break by comparing hashs of (pid, job#) tuple.  There should be
+		 * a 50% chance that first_task has a higher priority than second_task.
+		 */
+		long fhash = edf_hash(first_task);
+		long shash = edf_hash(second_task);
+		if (fhash < shash) {
+			return 1;
+		}
+		pid_break = (fhash == shash);
+#else
+		
+		
+		/* CONFIG_EDF_PID_TIE_BREAK */
+		pid_break = 1; // fall through to tie-break by pid;
+#endif
+
+		/* Tie break by pid */
+		if(pid_break) {
+			if (first_task->pid < second_task->pid) {
 				return 1;
 			}
+			else if (first_task->pid == second_task->pid) {
+				/* If the PIDs are the same then the task with the
+				 * inherited priority wins.
+				 */
+				if (!second_task->rt_param.inh_task) {
+					return 1;
+				}
+			}
 		}
 	}
 	return 0; /* fall-through. prio(second_task) > prio(first_task) */
diff --git a/litmus/jobs.c b/litmus/jobs.c
index bc8246572e54..fb093c03d53d 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -23,6 +23,14 @@ static inline void setup_release(struct task_struct *t, lt_t release)
 void prepare_for_next_period(struct task_struct *t)
 {
 	BUG_ON(!t);
+
+	/* Record lateness before we set up the next job's
+	 * release and deadline. Lateness may be negative.
+	 */
+	t->rt_param.job_params.lateness =
+		(long long)litmus_clock() - 
+		(long long)t->rt_param.job_params.deadline;
+
 	setup_release(t, get_release(t) + get_rt_period(t));
 }
 
-- 
cgit v1.2.2


From 00c173dc87b14b8422cea2aa129a2fc99689a05d Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Fri, 31 Aug 2012 11:10:33 -0400
Subject: enable migration tracing and short-cut interrupts

---
 kernel/softirq.c      |  2 +-
 litmus/gpu_affinity.c | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 5ce271675662..b013046e8c36 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -445,7 +445,7 @@ void __tasklet_schedule(struct tasklet_struct *t)
 #ifdef CONFIG_LITMUS_NVIDIA
 	if(is_nvidia_func(t->func))
 	{
-#if 0
+#if 1
 		// do nvidia tasklets right away and return
 		if(__do_nv_now(t))
 			return;
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
index 9762be1a085e..55bb5e1128ec 100644
--- a/litmus/gpu_affinity.c
+++ b/litmus/gpu_affinity.c
@@ -58,17 +58,17 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
 			fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
 		}
 		else {
-//			struct migration_info mig_info;
+			struct migration_info mig_info;
 
 			sched_trace_prediction_err(t,
 									   &(tsk_rt(t)->gpu_migration),
 									   &rel_err);
 
-//			mig_info.observed = observed;
-//			mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
-//			mig_info.distance = tsk_rt(t)->gpu_migration;
-//
-//			sched_trace_migration(t, &mig_info);
+			mig_info.observed = observed;
+			mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
+			mig_info.distance = tsk_rt(t)->gpu_migration;
+
+			sched_trace_migration(t, &mig_info);
 		}
 	}
 
-- 
cgit v1.2.2


From 6a225701acf7d79f292eeffcd99d6f00b02c180b Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Fri, 7 Sep 2012 23:25:01 -0400
Subject: Infrastructure for Litmus signals.

Added signals to Litmus. Specifcally, SIG_BUDGET signals
are delivered (when requested by real-time tasks) when
a budget is exceeded.

Note: pfair not currently supported (but it probably could be).
---
 include/litmus/budget.h   | 20 +++++++++++++++---
 include/litmus/rt_param.h | 16 +++++++++++++-
 include/litmus/signal.h   | 47 +++++++++++++++++++++++++++++++++++++++++
 litmus/budget.c           | 16 ++++++++++++--
 litmus/jobs.c             |  2 ++
 litmus/litmus.c           | 10 +++++++++
 litmus/sched_cedf.c       | 47 +++++++++++++++++++++++++++--------------
 litmus/sched_gsn_edf.c    | 54 ++++++++++++++++++++++++++++++-----------------
 litmus/sched_pfp.c        | 40 ++++++++++++++++++++++++-----------
 litmus/sched_psn_edf.c    | 41 +++++++++++++++++++++++------------
 10 files changed, 227 insertions(+), 66 deletions(-)
 create mode 100644 include/litmus/signal.h

diff --git a/include/litmus/budget.h b/include/litmus/budget.h
index 33344ee8d5f9..763b31c0e9f6 100644
--- a/include/litmus/budget.h
+++ b/include/litmus/budget.h
@@ -5,6 +5,9 @@
  * the next task. */
 void update_enforcement_timer(struct task_struct* t);
 
+/* Send SIG_BUDGET to a real-time task. */
+void send_sigbudget(struct task_struct* t);
+
 inline static int budget_exhausted(struct task_struct* t)
 {
 	return get_exec_time(t) >= get_exec_cost(t);
@@ -19,10 +22,21 @@ inline static lt_t budget_remaining(struct task_struct* t)
 		return 0;
 }
 
-#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
+#define budget_enforced(t) (\
+	tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
+
+#define budget_precisely_tracked(t) (\
+	tsk_rt(t)->task_params.budget_policy == PRECISE_ENFORCEMENT || \
+	tsk_rt(t)->task_params.budget_signal_policy == PRECISE_SIGNALS)
+
+#define budget_signalled(t) (\
+	tsk_rt(t)->task_params.budget_signal_policy != NO_SIGNALS)
+
+#define budget_precisely_signalled(t) (\
+	tsk_rt(t)->task_params.budget_policy == PRECISE_SIGNALS)
 
-#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \
-				      == PRECISE_ENFORCEMENT)
+#define sigbudget_sent(t) (\
+	test_bit(RT_JOB_SIG_BUDGET_SENT, &tsk_rt(t)->job_params.flags))
 
 static inline int requeue_preempted_job(struct task_struct* t)
 {
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 89ac0dda7d3d..637fe6b84f9d 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -30,9 +30,15 @@ typedef enum {
 typedef enum {
 	NO_ENFORCEMENT,      /* job may overrun unhindered */
 	QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */
-	PRECISE_ENFORCEMENT  /* budgets are enforced with hrtimers */
+	PRECISE_ENFORCEMENT, /* budgets are enforced with hrtimers */
 } budget_policy_t;
 
+typedef enum {
+	NO_SIGNALS,			/* job receives no signals when it exhausts its budget */
+	QUANTUM_SIGNALS,	/* budget signals are only sent on quantum boundaries */
+	PRECISE_SIGNALS,	/* budget signals are triggered with hrtimers */
+} budget_signal_policy_t;
+
 /* We use the common priority interpretation "lower index == higher priority",
  * which is commonly used in fixed-priority schedulability analysis papers.
  * So, a numerically lower priority value implies higher scheduling priority,
@@ -62,6 +68,7 @@ struct rt_task {
 	unsigned int	priority;
 	task_class_t	cls;
 	budget_policy_t budget_policy; /* ignored by pfair */
+	budget_signal_policy_t budget_signal_policy; /* currently ignored by pfair */
 };
 
 union np_flag {
@@ -118,8 +125,15 @@ struct rt_job {
 	 * Increase this sequence number when a job is released.
 	 */
 	unsigned int    job_no;
+
+	/* bits:
+	 * 0th: Set if a budget exhaustion signal has already been sent for
+	 *      the current job. */
+	unsigned long	flags;
 };
 
+#define RT_JOB_SIG_BUDGET_SENT  0
+
 struct pfair_param;
 
 /*	RT task parameters for scheduling extensions
diff --git a/include/litmus/signal.h b/include/litmus/signal.h
new file mode 100644
index 000000000000..b3d82b294984
--- /dev/null
+++ b/include/litmus/signal.h
@@ -0,0 +1,47 @@
+#ifndef LITMUS_SIGNAL_H
+#define LITMUS_SIGNAL_H
+
+#ifdef __KERNEL__
+#include <linux/signal.h>
+#else
+#include <signal.h>
+#endif
+
+/* Signals used by Litmus to asynchronously communicate events
+ * to real-time tasks.
+ * 
+ * Signal values overlap with [SIGRTMIN, SIGRTMAX], so beware of
+ * application-level conflicts when dealing with COTS user-level
+ * code.
+ */
+
+/* Sent to a Litmus task when all of the following conditions are true:
+ * (1) The task has exhausted its budget.
+ * (2) budget_signal_policy is QUANTUM_SIGNALS or PRECISE_SIGNALS.
+ *
+ * Note: If a task does not have a registered handler for SIG_BUDGET,
+ * the signal will cause the task to terminate (default action).
+ */
+
+/* Assigned values start at SIGRTMAX and decrease, hopefully reducing
+ * likelihood of user-level conflicts.
+ */
+#define SIG_BUDGET					(SIGRTMAX - 0)
+
+/*
+Future signals could include:
+
+#define SIG_DEADLINE_MISS			(SIGRTMAX - 1)
+#define SIG_CRIT_LEVEL_CHANGE		(SIGRTMAX - 2)
+*/
+
+#define SIGLITMUSMIN				SIG_BUDGET
+
+#ifdef __KERNEL__
+#if (SIGLITMUSMIN < SIGRTMIN)
+/* no compile-time check in user-space since SIGRTMIN may be a variable. */
+#error "Too many LITMUS^RT signals!"
+#endif
+#endif
+
+#endif
diff --git a/litmus/budget.c b/litmus/budget.c
index f7712be29adb..518174a37a3b 100644
--- a/litmus/budget.c
+++ b/litmus/budget.c
@@ -1,11 +1,13 @@
 #include <linux/sched.h>
 #include <linux/percpu.h>
 #include <linux/hrtimer.h>
+#include <linux/signal.h>
 
 #include <litmus/litmus.h>
 #include <litmus/preempt.h>
 
 #include <litmus/budget.h>
+#include <litmus/signal.h>
 
 struct enforcement_timer {
 	/* The enforcement timer is used to accurately police
@@ -64,7 +66,7 @@ static void arm_enforcement_timer(struct enforcement_timer* et,
 
 	/* Calling this when there is no budget left for the task
 	 * makes no sense, unless the task is non-preemptive. */
-	BUG_ON(budget_exhausted(t) && (!is_np(t)));
+	BUG_ON(budget_exhausted(t) && !is_np(t));
 
 	/* __hrtimer_start_range_ns() cancels the timer
 	 * anyway, so we don't have to check whether it is still armed */
@@ -86,7 +88,7 @@ void update_enforcement_timer(struct task_struct* t)
 {
 	struct enforcement_timer* et = &__get_cpu_var(budget_timer);
 
-	if (t && budget_precisely_enforced(t)) {
+	if (t && budget_precisely_tracked(t) && !sigbudget_sent(t)) {
 		/* Make sure we call into the scheduler when this budget
 		 * expires. */
 		arm_enforcement_timer(et, t);
@@ -96,6 +98,16 @@ void update_enforcement_timer(struct task_struct* t)
 	}
 }
 
+void send_sigbudget(struct task_struct* t)
+{
+	if (!test_and_set_bit(RT_JOB_SIG_BUDGET_SENT, &tsk_rt(t)->job_params.flags)) {
+		/* signal has not yet been sent and we are responsible for sending
+		 * since we just set the sent-bit when it was previously 0. */
+
+		TRACE_TASK(t, "SIG_BUDGET being sent!\n");
+		send_sig(SIG_BUDGET, t, 1); /* '1' denotes signal sent from kernel */
+	}
+}
 
 static int __init init_budget_enforcement(void)
 {
diff --git a/litmus/jobs.c b/litmus/jobs.c
index bc8246572e54..4981665a37bf 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -13,6 +13,8 @@ static inline void setup_release(struct task_struct *t, lt_t release)
 	t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t);
 	t->rt_param.job_params.exec_time = 0;
 
+	clear_bit(RT_JOB_SIG_BUDGET_SENT, &t->rt_param.job_params.flags);
+
 	/* update job sequence number */
 	t->rt_param.job_params.job_no++;
 
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 81384327e850..3526749852aa 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -136,6 +136,16 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
 		       pid, tp.budget_policy);
 		goto out_unlock;
 	}
+	if (tp.budget_signal_policy != NO_SIGNALS &&
+	    tp.budget_signal_policy != QUANTUM_SIGNALS &&
+	    tp.budget_signal_policy != PRECISE_SIGNALS)
+	{
+		printk(KERN_INFO "litmus: real-time task %d rejected "
+		       "because unsupported budget signalling policy "
+		       "specified (%d)\n",
+		       pid, tp.budget_signal_policy);
+		goto out_unlock;
+	}
 
 	target->rt_param.task_params = tp;
 
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index b0c16e34d2c5..208f067934fc 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -371,21 +371,29 @@ static noinline void job_completion(struct task_struct *t, int forced)
  */
 static void cedf_tick(struct task_struct* t)
 {
-	if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
-		if (!is_np(t)) {
-			/* np tasks will be preempted when they become
-			 * preemptable again
-			 */
-			litmus_reschedule_local();
-			set_will_schedule();
-			TRACE("cedf_scheduler_tick: "
-			      "%d is preemptable "
-			      " => FORCE_RESCHED\n", t->pid);
-		} else if (is_user_np(t)) {
-			TRACE("cedf_scheduler_tick: "
-			      "%d is non-preemptable, "
-			      "preemption delayed.\n", t->pid);
-			request_exit_np(t);
+	if (is_realtime(t) && budget_exhausted(t))
+	{
+		if (budget_signalled(t) && !sigbudget_sent(t)) {
+			/* signal exhaustion */
+			send_sigbudget(t);
+		}
+
+		if (budget_enforced(t)) {
+			if (!is_np(t)) {
+				/* np tasks will be preempted when they become
+				 * preemptable again
+				 */
+				litmus_reschedule_local();
+				set_will_schedule();
+				TRACE("cedf_scheduler_tick: "
+					  "%d is preemptable "
+					  " => FORCE_RESCHED\n", t->pid);
+			} else if (is_user_np(t)) {
+				TRACE("cedf_scheduler_tick: "
+					  "%d is non-preemptable, "
+					  "preemption delayed.\n", t->pid);
+				request_exit_np(t);
+			}
 		}
 	}
 }
@@ -415,7 +423,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
 {
 	cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
 	cedf_domain_t *cluster = entry->cluster;
-	int out_of_time, sleep, preempt, np, exists, blocks;
+	int out_of_time, signal_budget, sleep, preempt, np, exists, blocks;
 	struct task_struct* next = NULL;
 
 #ifdef CONFIG_RELEASE_MASTER
@@ -442,6 +450,10 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
 	out_of_time = exists &&
 				  budget_enforced(entry->scheduled) &&
 				  budget_exhausted(entry->scheduled);
+	signal_budget = exists &&
+					budget_signalled(entry->scheduled) &&
+					budget_exhausted(entry->scheduled) &&
+					!sigbudget_sent(entry->scheduled);
 	np 	    = exists && is_np(entry->scheduled);
 	sleep	    = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
 	preempt     = entry->scheduled != entry->linked;
@@ -460,6 +472,9 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
 		TRACE_TASK(prev, "will be preempted by %s/%d\n",
 			   entry->linked->comm, entry->linked->pid);
 
+	/* Send the signal that the budget has been exhausted */
+	if (signal_budget)
+		send_sigbudget(entry->scheduled);
 
 	/* If a task blocks we have no choice but to reschedule.
 	 */
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index c3344b9d288f..c1f25b56e51e 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -362,20 +362,28 @@ static noinline void job_completion(struct task_struct *t, int forced)
  */
 static void gsnedf_tick(struct task_struct* t)
 {
-	if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
-		if (!is_np(t)) {
-			/* np tasks will be preempted when they become
-			 * preemptable again
-			 */
-			litmus_reschedule_local();
-			TRACE("gsnedf_scheduler_tick: "
-			      "%d is preemptable "
-			      " => FORCE_RESCHED\n", t->pid);
-		} else if (is_user_np(t)) {
-			TRACE("gsnedf_scheduler_tick: "
-			      "%d is non-preemptable, "
-			      "preemption delayed.\n", t->pid);
-			request_exit_np(t);
+	if (is_realtime(t) && budget_exhausted(t))
+	{
+		if (budget_signalled(t) && !sigbudget_sent(t)) {
+			/* signal exhaustion */
+			send_sigbudget(t);
+		}
+
+		if (budget_enforced(t)) {
+			if (!is_np(t)) {
+				/* np tasks will be preempted when they become
+				 * preemptable again
+				 */
+				litmus_reschedule_local();
+				TRACE("gsnedf_scheduler_tick: "
+					  "%d is preemptable "
+					  " => FORCE_RESCHED\n", t->pid);
+			} else if (is_user_np(t)) {
+				TRACE("gsnedf_scheduler_tick: "
+					  "%d is non-preemptable, "
+					  "preemption delayed.\n", t->pid);
+				request_exit_np(t);
+			}
 		}
 	}
 }
@@ -404,7 +412,7 @@ static void gsnedf_tick(struct task_struct* t)
 static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 {
 	cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
-	int out_of_time, sleep, preempt, np, exists, blocks;
+	int out_of_time, signal_budget, sleep, preempt, np, exists, blocks;
 	struct task_struct* next = NULL;
 
 #ifdef CONFIG_RELEASE_MASTER
@@ -427,8 +435,13 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 	/* (0) Determine state */
 	exists      = entry->scheduled != NULL;
 	blocks      = exists && !is_running(entry->scheduled);
-	out_of_time = exists && budget_enforced(entry->scheduled)
-		&& budget_exhausted(entry->scheduled);
+	out_of_time = exists &&
+		budget_enforced(entry->scheduled) &&
+		budget_exhausted(entry->scheduled);
+	signal_budget = exists &&
+		budget_signalled(entry->scheduled) &&
+		budget_exhausted(entry->scheduled) &&
+		!sigbudget_sent(entry->scheduled);
 	np 	    = exists && is_np(entry->scheduled);
 	sleep	    = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
 	preempt     = entry->scheduled != entry->linked;
@@ -439,14 +452,17 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 
 	if (exists)
 		TRACE_TASK(prev,
-			   "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
+			   "blocks:%d out_of_time:%d signal_budget: %d np:%d sleep:%d preempt:%d "
 			   "state:%d sig:%d\n",
-			   blocks, out_of_time, np, sleep, preempt,
+			   blocks, out_of_time, signal_budget, np, sleep, preempt,
 			   prev->state, signal_pending(prev));
 	if (entry->linked && preempt)
 		TRACE_TASK(prev, "will be preempted by %s/%d\n",
 			   entry->linked->comm, entry->linked->pid);
 
+	/* Send the signal that the budget has been exhausted */
+	if (signal_budget)
+		send_sigbudget(entry->scheduled);
 
 	/* If a task blocks we have no choice but to reschedule.
 	 */
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
index 62be699629b1..6129eb94d3ea 100644
--- a/litmus/sched_pfp.c
+++ b/litmus/sched_pfp.c
@@ -135,17 +135,25 @@ static void pfp_tick(struct task_struct *t)
 	 */
 	BUG_ON(is_realtime(t) && t != pfp->scheduled);
 
-	if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
-		if (!is_np(t)) {
-			litmus_reschedule_local();
-			TRACE("pfp_scheduler_tick: "
-			      "%d is preemptable "
-			      " => FORCE_RESCHED\n", t->pid);
-		} else if (is_user_np(t)) {
-			TRACE("pfp_scheduler_tick: "
-			      "%d is non-preemptable, "
-			      "preemption delayed.\n", t->pid);
-			request_exit_np(t);
+	if (is_realtime(t) && budget_exhausted(t))
+	{
+		if (budget_signalled(t) && !sigbudget_sent(t)) {
+			/* signal exhaustion */
+			send_sigbudget(t);
+		}
+
+		if (budget_enforced(t)) {
+			if (!is_np(t)) {
+				litmus_reschedule_local();
+				TRACE("pfp_scheduler_tick: "
+					  "%d is preemptable "
+					  " => FORCE_RESCHED\n", t->pid);
+			} else if (is_user_np(t)) {
+				TRACE("pfp_scheduler_tick: "
+					  "%d is non-preemptable, "
+					  "preemption delayed.\n", t->pid);
+				request_exit_np(t);
+			}
 		}
 	}
 }
@@ -155,7 +163,7 @@ static struct task_struct* pfp_schedule(struct task_struct * prev)
 	pfp_domain_t* 	pfp = local_pfp;
 	struct task_struct*	next;
 
-	int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate;
+	int out_of_time, signal_budget, sleep, preempt, np, exists, blocks, resched, migrate;
 
 	raw_spin_lock(&pfp->slock);
 
@@ -172,6 +180,10 @@ static struct task_struct* pfp_schedule(struct task_struct * prev)
 	out_of_time = exists &&
 				  budget_enforced(pfp->scheduled) &&
 				  budget_exhausted(pfp->scheduled);
+	signal_budget = exists &&
+					budget_signalled(pfp->scheduled) &&
+					budget_exhausted(pfp->scheduled) &&
+					!sigbudget_sent(pfp->scheduled);
 	np 	    = exists && is_np(pfp->scheduled);
 	sleep	    = exists && get_rt_flags(pfp->scheduled) == RT_F_SLEEP;
 	migrate     = exists && get_partition(pfp->scheduled) != pfp->cpu;
@@ -183,6 +195,10 @@ static struct task_struct* pfp_schedule(struct task_struct * prev)
 	 */
 	resched = preempt;
 
+	/* Send the signal that the budget has been exhausted */
+	if (signal_budget)
+		send_sigbudget(pfp->scheduled);
+
 	/* If a task blocks we have no choice but to reschedule.
 	 */
 	if (blocks)
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
index b0c8126bd44a..a5fda133bad9 100644
--- a/litmus/sched_psn_edf.c
+++ b/litmus/sched_psn_edf.c
@@ -169,17 +169,25 @@ static void psnedf_tick(struct task_struct *t)
 	 */
 	BUG_ON(is_realtime(t) && t != pedf->scheduled);
 
-	if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
-		if (!is_np(t)) {
-			litmus_reschedule_local();
-			TRACE("psnedf_scheduler_tick: "
-			      "%d is preemptable "
-			      " => FORCE_RESCHED\n", t->pid);
-		} else if (is_user_np(t)) {
-			TRACE("psnedf_scheduler_tick: "
-			      "%d is non-preemptable, "
-			      "preemption delayed.\n", t->pid);
-			request_exit_np(t);
+	if (is_realtime(t) && budget_exhausted(t))
+	{
+		if (budget_signalled(t) && !sigbudget_sent(t)) {
+			/* signal exhaustion */
+			send_sigbudget(t);
+		}
+
+		if (budget_enforced(t)) {
+			if (!is_np(t)) {
+				litmus_reschedule_local();
+				TRACE("psnedf_scheduler_tick: "
+					  "%d is preemptable "
+					  " => FORCE_RESCHED\n", t->pid);
+			} else if (is_user_np(t)) {
+				TRACE("psnedf_scheduler_tick: "
+					  "%d is non-preemptable, "
+					  "preemption delayed.\n", t->pid);
+				request_exit_np(t);
+			}
 		}
 	}
 }
@@ -190,8 +198,7 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev)
 	rt_domain_t*		edf  = &pedf->domain;
 	struct task_struct*	next;
 
-	int 			out_of_time, sleep, preempt,
-				np, exists, blocks, resched;
+	int out_of_time, signal_budget, sleep, preempt, np, exists, blocks, resched;
 
 	raw_spin_lock(&pedf->slock);
 
@@ -208,6 +215,10 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev)
 	out_of_time = exists &&
 				  budget_enforced(pedf->scheduled) &&
 				  budget_exhausted(pedf->scheduled);
+	signal_budget = exists &&
+					budget_signalled(pedf->scheduled) &&
+					budget_exhausted(pedf->scheduled) &&
+					!sigbudget_sent(pedf->scheduled);
 	np 	    = exists && is_np(pedf->scheduled);
 	sleep	    = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
 	preempt     = edf_preemption_needed(edf, prev);
@@ -218,6 +229,10 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev)
 	 */
 	resched = preempt;
 
+	/* Send the signal that the budget has been exhausted */
+	if (signal_budget)
+		send_sigbudget(pedf->scheduled);
+
 	/* If a task blocks we have no choice but to reschedule.
 	 */
 	if (blocks)
-- 
cgit v1.2.2


From d3c32e91e3fce2a57083a734efae6d9de06ec02f Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Sat, 8 Sep 2012 10:26:00 -0400
Subject: Fixed type-os and clarified text in litmus/Kconfig

---
 litmus/Kconfig | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/litmus/Kconfig b/litmus/Kconfig
index 48ff3e3c657c..f2dbfb396883 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -89,7 +89,7 @@ choice
 	config EDF_TIE_BREAK_LATENESS
 	bool "Lateness-based Tie Break"
 	help
-	  Break ties between to jobs, A and B, based upon the lateness of their
+	  Break ties between two jobs, A and B, based upon the lateness of their
 	  prior jobs. The job with the greatest lateness has priority. Note that
 	  lateness has a negative value if the prior job finished before its
 	  deadline.
@@ -97,8 +97,8 @@ choice
 	config EDF_TIE_BREAK_LATENESS_NORM
 	bool "Normalized Lateness-based Tie Break"
 	help
-	  Break ties between to jobs, A and B, based upon the lateness, normalized
-	  by relative deadline, their prior jobs. The job with the greatest
+	  Break ties between two jobs, A and B, based upon the lateness, normalized
+	  by relative deadline, of their prior jobs. The job with the greatest
 	  normalized lateness has priority. Note that lateness has a negative value
 	  if the prior job finished before its deadline.
 	  
@@ -116,9 +116,9 @@ choice
 	config EDF_PID_TIE_BREAK
 	bool "PID-based Tie Breaks"
 	help
-	  Break ties based upon OS-assigned process IDs. Use this option if
+	  Break ties based upon OS-assigned thread IDs. Use this option if
 	  required by algorithm's real-time analysis or per-task response-time
-	  jitter must be minimized in overload conditions.
+	  jitter must be minimized.
 	
 	  NOTES:
 	    * This tie-breaking method was default in Litmus 2012.2 and before.
-- 
cgit v1.2.2


From 193a19c94a32f2e2a0e973f0a98cf4a098cefa15 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Sun, 9 Sep 2012 13:42:13 -0400
Subject: simple average tracking

---
 include/litmus/gpu_affinity.h | 15 +++++-----
 include/litmus/rt_param.h     | 15 ++++++++--
 litmus/gpu_affinity.c         | 66 +++++++++++++++++++++++++++++++++++++++----
 litmus/litmus.c               |  3 +-
 4 files changed, 83 insertions(+), 16 deletions(-)

diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
index 6b3fb8b28745..d64a15cbf2a5 100644
--- a/include/litmus/gpu_affinity.h
+++ b/include/litmus/gpu_affinity.h
@@ -31,17 +31,18 @@ static inline lt_t get_gpu_time(struct task_struct* t)
 
 static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist)
 {
-	int i;
-	fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est);
-	lt_t val = (temp >= 0) ? temp : 0;  // never allow negative estimates...
+//	int i;
+//	fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est);
+//	lt_t val = (temp >= 0) ? temp : 0;  // never allow negative estimates...
+	lt_t val = t->rt_param.gpu_migration_est[dist].avg;
 
-	WARN_ON(temp < 0);
+//	WARN_ON(temp < 0);
 
 	// lower-bound a distant migration to be at least equal to the level
 	// below it.
-	for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) {
-		val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
-	}
+//	for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) {
+//		val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
+//	}
 
 	return ((val > 0) ? val : dist+1);
 }
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 0198884eab86..a441badd30cc 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -144,6 +144,17 @@ typedef struct feedback_est{
 	fp_t accum_err;
 } feedback_est_t;
 
+
+#define AVG_EST_WINDOW_SIZE 20
+
+typedef struct avg_est{
+	lt_t history[AVG_EST_WINDOW_SIZE];
+	uint16_t count;
+	uint16_t idx;
+	lt_t sum;
+	lt_t avg;
+} avg_est_t;
+
 /*	RT task parameters for scheduling extensions
  *	These parameters are inherited during clone and therefore must
  *	be explicitly set up before the task set is launched.
@@ -190,12 +201,10 @@ struct rt_param {
 	long unsigned int			held_gpus;  // bitmap of held GPUs.
 
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-	fp_t	gpu_fb_param_a[MIG_LAST+1];
-	fp_t	gpu_fb_param_b[MIG_LAST+1];
+	avg_est_t gpu_migration_est[MIG_LAST+1];
 
 	gpu_migration_dist_t	gpu_migration;
 	int				last_gpu;
-	feedback_est_t	gpu_migration_est[MIG_LAST+1]; // local, near, med, far
 
 	lt_t accum_gpu_time;
 	lt_t gpu_time_stamp;
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
index 55bb5e1128ec..2cdf18bc7dd6 100644
--- a/litmus/gpu_affinity.c
+++ b/litmus/gpu_affinity.c
@@ -7,7 +7,14 @@
 
 #include <litmus/sched_trace.h>
 
-#define OBSERVATION_CAP 2*1e9
+#define OBSERVATION_CAP ((lt_t)(2e9))
+
+// reason for skew: high outliers are less
+// frequent and way out of bounds
+#define HI_THRESHOLD 2
+#define LO_THRESHOLD 4
+
+#define MIN(a, b) ((a < b) ? a : b)
 
 static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
 {
@@ -28,10 +35,59 @@ static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
 
 void update_gpu_estimate(struct task_struct *t, lt_t observed)
 {
-	feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
+	//feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
+	avg_est_t *est;
+	struct migration_info mig_info;
 
 	BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
 
+	est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
+
+	if (unlikely(observed > OBSERVATION_CAP)) {
+		TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n",
+			observed,
+			OBSERVATION_CAP);
+		return;
+	}
+
+#if 0
+	// filter out values that are HI_THRESHOLDx or (1/LO_THRESHOLD)x out
+	// of range of the average, but only filter if enough samples
+	// have been taken.
+	if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
+		if (unlikely(observed < est->avg/LO_THRESHOLD)) {
+			TRACE_TASK(t, "Observation is too small: %llu\n",
+							observed);
+			return;
+		}
+		else if (unlikely(observed > est->avg*HI_THRESHOLD)) {
+			TRACE_TASK(t, "Observation is too large: %llu\n",
+							observed);
+			return;
+		}
+	}
+#endif
+
+	if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) {
+		++est->count;
+	}
+	else {
+		est->sum -= est->history[est->idx];
+	}
+
+	mig_info.observed = observed;
+	mig_info.estimated = est->avg;
+	mig_info.distance = tsk_rt(t)->gpu_migration;
+	sched_trace_migration(t, &mig_info);
+
+
+	est->history[est->idx] = observed;
+	est->sum += observed;
+	est->avg = est->sum/est->count;
+	est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE;
+
+
+#if 0
 	if(unlikely(fb->est.val == 0)) {
 		// kludge-- cap observed values to prevent whacky estimations.
 		// whacky stuff happens during the first few jobs.
@@ -71,12 +127,12 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
 			sched_trace_migration(t, &mig_info);
 		}
 	}
+#endif
 
-	TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
+	TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n",
 			   tsk_rt(t)->gpu_migration,
 			   observed,
-			   _fp_to_integer(fb->est),
-			   _point(fb->est));
+			   est->avg);
 }
 
 gpu_migration_dist_t gpu_migration_distance(int a, int b)
diff --git a/litmus/litmus.c b/litmus/litmus.c
index d1f836c8af6e..91ec65894379 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -321,6 +321,7 @@ void init_gpu_affinity_state(struct task_struct* p)
 	//p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
 	//p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
 
+#if 0		
 	// emperical;
 	p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
 	p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
@@ -333,7 +334,7 @@ void init_gpu_affinity_state(struct task_struct* p)
 
 	p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
 	p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
-
+#endif
 	p->rt_param.gpu_migration = MIG_NONE;
 	p->rt_param.last_gpu = -1;
 }
-- 
cgit v1.2.2


From 901fdd9c22790039a76c1d3ee01828a2f124f6f3 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Mon, 10 Sep 2012 11:27:08 -0400
Subject: standard devation-based gpu affinity predictor

---
 include/litmus/rt_param.h |  1 +
 litmus/gpu_affinity.c     | 67 ++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index a441badd30cc..04239c747f06 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -152,6 +152,7 @@ typedef struct avg_est{
 	uint16_t count;
 	uint16_t idx;
 	lt_t sum;
+	lt_t std;
 	lt_t avg;
 } avg_est_t;
 
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
index 2cdf18bc7dd6..896f3248b8a2 100644
--- a/litmus/gpu_affinity.c
+++ b/litmus/gpu_affinity.c
@@ -11,8 +11,10 @@
 
 // reason for skew: high outliers are less
 // frequent and way out of bounds
-#define HI_THRESHOLD 2
-#define LO_THRESHOLD 4
+//#define HI_THRESHOLD 2
+//#define LO_THRESHOLD 4
+
+#define NUM_STDEV 2
 
 #define MIN(a, b) ((a < b) ? a : b)
 
@@ -33,6 +35,44 @@ static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
 	return relative_err;
 }
 
+lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count)
+{
+	/* brute force: takes about as much time as incremental running methods when
+	 * count < 50 (on Bonham). Brute force also less prone to overflow.
+	 */
+	lt_t sqdeviations = 0;
+	uint16_t i;
+	for(i = 0; i < count; ++i)
+	{
+		lt_t temp = (int64_t)nums[i] - (int64_t)avg;
+		sqdeviations += temp * temp;
+	}
+	return sqdeviations/count;
+}
+
+lt_t isqrt(lt_t n)
+{
+	/* integer square root using babylonian method
+	 * (algo taken from wikipedia */
+	lt_t res = 0;
+	lt_t bit = ((lt_t)1) << (sizeof(n)*8-2);
+	while (bit > n) {
+		bit >>= 2;
+	}
+
+	while (bit != 0) {
+		if (n >= res + bit) {
+			n -= res + bit;
+			res = (res >> 1) + bit;
+		}
+		else {
+			res >>= 1;
+		}
+		bit >>= 2;
+	}
+	return res;
+}
+
 void update_gpu_estimate(struct task_struct *t, lt_t observed)
 {
 	//feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
@@ -65,8 +105,28 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
 							observed);
 			return;
 		}
-	}
 #endif
+	// filter values outside NUM_STDEVx the standard deviation,
+	// but only filter if enough samples have been taken.
+	if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
+		lt_t lower, upper;
+
+		lt_t range = est->std*NUM_STDEV;
+		lower = est->avg - MIN(range, est->avg); // no underflow.
+
+		if (unlikely(observed < lower)) {
+			TRACE_TASK(t, "Observation is too small: %llu\n", observed);
+			return;
+		}
+
+		upper = est->avg + range;
+		if (unlikely(observed > upper)) {
+			TRACE_TASK(t, "Observation is too large: %llu\n", observed);
+			return;
+		}
+	}
+
+
 
 	if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) {
 		++est->count;
@@ -84,6 +144,7 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
 	est->history[est->idx] = observed;
 	est->sum += observed;
 	est->avg = est->sum/est->count;
+	est->std = isqrt(varience(est->history, est->avg, est->count));
 	est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE;
 
 
-- 
cgit v1.2.2


From a916d9b2feaeb5934e1f8ba30fde74193a60e8d1 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Tue, 11 Sep 2012 15:25:29 -0400
Subject: Fix hang from bug in edf_common.c

---
 include/litmus/fdso.h | 22 +++++++++++-----------
 litmus/edf_common.c   | 36 +++++++++++++++++++++++++++++++-----
 litmus/fdso.c         | 10 ++++++----
 litmus/preempt.c      |  5 +++--
 4 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
index 35be59b970ee..1469c0fd0460 100644
--- a/include/litmus/fdso.h
+++ b/include/litmus/fdso.h
@@ -20,20 +20,20 @@ typedef enum  {
 	FMLP_SEM	= 0,
 	SRP_SEM		= 1,
 
-	RSM_MUTEX	= 2,
-	IKGLP_SEM	= 3,
-	KFMLP_SEM	= 4,
+	MPCP_SEM	= 2,
+	MPCP_VS_SEM	= 3,
+	DPCP_SEM	= 4,
 
-	IKGLP_SIMPLE_GPU_AFF_OBS = 5,
-	IKGLP_GPU_AFF_OBS = 6,
-	KFMLP_SIMPLE_GPU_AFF_OBS = 7,
-	KFMLP_GPU_AFF_OBS = 8,
+	PCP_SEM     = 5,
 
-	MPCP_SEM	= 9,
-	MPCP_VS_SEM	= 10,
-	DPCP_SEM	= 11,
+	RSM_MUTEX	= 6,
+	IKGLP_SEM	= 7,
+	KFMLP_SEM	= 8,
 
-	PCP_SEM         = 12,
+	IKGLP_SIMPLE_GPU_AFF_OBS	= 9,
+	IKGLP_GPU_AFF_OBS			= 10,
+	KFMLP_SIMPLE_GPU_AFF_OBS	= 11,
+	KFMLP_GPU_AFF_OBS			= 12,
 
 	MAX_OBJ_TYPE	= 12
 } obj_type_t;
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index a1cdc10ea6f1..39ce1816ee04 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -185,11 +185,37 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 					return 1;
 				}
 #endif
-				/* If the PIDs are the same then the task with the
-				 * inherited priority wins.
-				 */
-				if (!second_task->rt_param.inh_task) {
-					return 1;
+				/* Something could be wrong if you get this far. */
+				if (unlikely(first->rt_param.inh_task ==
+										second->rt_param.inh_task)) {
+					/* Both tasks have the same inherited priority.
+					 * Likely in a bug-condition.
+				     */
+					if (likely(first->pid < second->pid)) {
+						return 1;
+					}
+					else if (first->pid == second->pid) {
+						WARN_ON(1);
+					}
+				}
+				else {
+					/* At least one task must inherit */
+					BUG_ON(!first->rt_param.inh_task &&
+						   !second->rt_param.inh_task);
+
+					/* The task with the inherited priority wins. */
+					if (!second->rt_param.inh_task) {
+						TRACE_CUR("unusual comparison: "
+							"first = %s/%d  first_task = %s/%d  "
+							"second = %s/%d  second_task = %s/%d\n",
+							first->comm, first->pid,
+							(first->rt_param.inh_task) ? first->rt_param.inh_task->comm : "(nil)",
+							(first->rt_param.inh_task) ? first->rt_param.inh_task->pid : 0,
+							second->comm, second->pid,
+							(second->rt_param.inh_task) ? second->rt_param.inh_task->comm : "(nil)",
+							(second->rt_param.inh_task) ? second->rt_param.inh_task->pid : 0);
+						return 1;
+					}
 				}
 			}
 		}
diff --git a/litmus/fdso.c b/litmus/fdso.c
index bac6a35fa17d..2411d16ba486 100644
--- a/litmus/fdso.c
+++ b/litmus/fdso.c
@@ -27,6 +27,12 @@ extern struct fdso_ops generic_affinity_ops;
 static const struct fdso_ops* fdso_ops[] = {
 	&generic_lock_ops, /* FMLP_SEM */
 	&generic_lock_ops, /* SRP_SEM */
+
+	&generic_lock_ops, /* MPCP_SEM */
+	&generic_lock_ops, /* MPCP_VS_SEM */
+	&generic_lock_ops, /* DPCP_SEM */
+	&generic_lock_ops, /* PCP_SEM */
+
 	&generic_lock_ops, /* RSM_MUTEX */
 	&generic_lock_ops, /* IKGLP_SEM */
 	&generic_lock_ops, /* KFMLP_SEM */
@@ -36,10 +42,6 @@ static const struct fdso_ops* fdso_ops[] = {
 	&generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */
 	&generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */
 #endif
-	&generic_lock_ops, /* MPCP_SEM */
-	&generic_lock_ops, /* MPCP_VS_SEM */
-	&generic_lock_ops, /* DPCP_SEM */
-	&generic_lock_ops, /* PCP_SEM */
 };
 
 static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
diff --git a/litmus/preempt.c b/litmus/preempt.c
index 28368d5bc046..a2cae3648e15 100644
--- a/litmus/preempt.c
+++ b/litmus/preempt.c
@@ -26,10 +26,11 @@ void sched_state_will_schedule(struct task_struct* tsk)
 			set_sched_state(PICKED_WRONG_TASK);
 		else
 			set_sched_state(WILL_SCHEDULE);
-	} else
+	} else {
 		/* Litmus tasks should never be subject to a remote
 		 * set_tsk_need_resched(). */
-		BUG_ON(is_realtime(tsk));
+		//BUG_ON(is_realtime(tsk));
+	}
 
 #ifdef CONFIG_PREEMPT_STATE_TRACE
 	TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
-- 
cgit v1.2.2


From 55e04c94b925b0790c2ae0a79f16e939e9bb2846 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Tue, 11 Sep 2012 18:16:55 -0400
Subject: changed gpu filtering to 1.5 stdev.

---
 litmus/gpu_affinity.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
index 896f3248b8a2..7d73105b4181 100644
--- a/litmus/gpu_affinity.c
+++ b/litmus/gpu_affinity.c
@@ -14,7 +14,8 @@
 //#define HI_THRESHOLD 2
 //#define LO_THRESHOLD 4
 
-#define NUM_STDEV 2
+#define NUM_STDEV_NUM	1
+#define NUM_STDEV_DENOM	2
 
 #define MIN(a, b) ((a < b) ? a : b)
 
@@ -111,7 +112,7 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
 	if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
 		lt_t lower, upper;
 
-		lt_t range = est->std*NUM_STDEV;
+		lt_t range = (est->std*NUM_STDEV_NUM)/NUM_STDEV_DENOM;
 		lower = est->avg - MIN(range, est->avg); // no underflow.
 
 		if (unlikely(observed < lower)) {
-- 
cgit v1.2.2


From fd3aa01f176cf12b1625f4f46ba01f3340bb57ed Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Tue, 11 Sep 2012 19:36:11 -0400
Subject: blarg

---
 include/litmus/rt_param.h  |   5 +++
 include/litmus/unistd_32.h |   4 +-
 include/litmus/unistd_64.h |   5 ++-
 litmus/litmus.c            | 109 ++++++++++++++++++++++++++++++++++++---------
 4 files changed, 99 insertions(+), 24 deletions(-)

diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 89ac0dda7d3d..21430623a940 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -157,6 +157,11 @@ struct rt_param {
 	 */
 	 struct task_struct*	inh_task;
 
+	
+	 struct task_struct*	hp_group;
+	 unsigned int			is_slave:1;
+
+
 #ifdef CONFIG_NP_SECTION
 	/* For the FMLP under PSN-EDF, it is required to make the task
 	 * non-preemptive from kernel space. In order not to interfere with
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
index 94264c27d9ac..bcb8f1183b4f 100644
--- a/include/litmus/unistd_32.h
+++ b/include/litmus/unistd_32.h
@@ -18,4 +18,6 @@
 #define __NR_release_ts		__LSC(10)
 #define __NR_null_call		__LSC(11)
 
-#define NR_litmus_syscalls 12
+#define __NR_slave_non_rt_threads	_LSC(12)
+
+#define NR_litmus_syscalls 13
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index d5ced0d2642c..5f56d5947343 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -30,4 +30,7 @@ __SYSCALL(__NR_release_ts, sys_release_ts)
 #define __NR_null_call				__LSC(11)
 __SYSCALL(__NR_null_call, sys_null_call)
 
-#define NR_litmus_syscalls 12
+#define __NR_slave_non_rt_threads	__LSC(12)
+__SYSCALL(__NR_slave_non_rt_threads, sys_slave_non_rt_threads)
+
+#define NR_litmus_syscalls 13
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 81384327e850..2300281b6b30 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -290,6 +290,60 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
 	return ret;
 }
 
+
+
+
+
+
+
+
+
+
+long __litmus_admit_task(struct task_struct* tsk);
+
+asmlinkage long sys_slave_non_rt_threads(void)
+{
+	long retval = 0;
+	struct task_struct *leader = current->group_leader;
+	struct task_struct *t;
+	struct task_struct *hp = NULL;
+
+	read_lock_irq(&tasklist_lock);
+
+	is_realtime(target)
+
+	t = leader;
+	do {
+		TRACE_CUR("threads in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
+
+		if (tsk_rt(t)->heap_node == NULL) {
+			retval = __litmus_admit_task(t);
+
+			if (retval != 0) break;
+
+			/* hasn't been admitted into rt. make it a slave. */
+			tsk_rt(t)->slave = 1;
+		}
+		else if (is_realtime(t))
+			if (litmus->compare(t, hp)) {
+				hp = t;
+			}
+		}
+
+		t = next_thread(t);
+	} while(t != leader);
+
+	if (hp) {
+		/* set up inheritance */
+		
+	}
+
+	read_unlock_irq(&tasklist_lock);
+
+	return 0;
+}
+
+
 /* p is a real-time task. Re-init its state as a best-effort task. */
 static void reinit_litmus_state(struct task_struct* p, int restore)
 {
@@ -318,32 +372,11 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 	}
 }
 
-long litmus_admit_task(struct task_struct* tsk)
+long __litmus_admit_task(struct task_struct* tsk)
 {
 	long retval = 0;
 	unsigned long flags;
 
-	BUG_ON(is_realtime(tsk));
-
-	if (get_rt_relative_deadline(tsk) == 0 ||
-	    get_exec_cost(tsk) >
-			min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
-		TRACE_TASK(tsk,
-			"litmus admit: invalid task parameters "
-			"(e = %lu, p = %lu, d = %lu)\n",
-			get_exec_cost(tsk), get_rt_period(tsk),
-			get_rt_relative_deadline(tsk));
-		retval = -EINVAL;
-		goto out;
-	}
-
-	if (!cpu_online(get_partition(tsk))) {
-		TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
-			   get_partition(tsk));
-		retval = -EINVAL;
-		goto out;
-	}
-
 	INIT_LIST_HEAD(&tsk_rt(tsk)->list);
 
 	/* avoid scheduler plugin changing underneath us */
@@ -375,6 +408,38 @@ long litmus_admit_task(struct task_struct* tsk)
 
 out_unlock:
 	raw_spin_unlock_irqrestore(&task_transition_lock, flags);
+
+	return retval;
+}
+
+long litmus_admit_task(struct task_struct* tsk)
+{
+	long retval = 0;
+	unsigned long flags;
+
+	BUG_ON(is_realtime(tsk));
+
+	if (get_rt_relative_deadline(tsk) == 0 ||
+	    get_exec_cost(tsk) >
+			min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
+		TRACE_TASK(tsk,
+			"litmus admit: invalid task parameters "
+			"(e = %lu, p = %lu, d = %lu)\n",
+			get_exec_cost(tsk), get_rt_period(tsk),
+			get_rt_relative_deadline(tsk));
+		retval = -EINVAL;
+		goto out;
+	}
+
+	if (!cpu_online(get_partition(tsk))) {
+		TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
+			   get_partition(tsk));
+		retval = -EINVAL;
+		goto out;
+	}
+
+	retval = __litmus_admit_task(tsk);
+
 out:
 	return retval;
 }
-- 
cgit v1.2.2


From 4ad6ba08f0dab67bbd89a26b27f1cc86e3c45c13 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Fri, 14 Sep 2012 08:34:36 -0400
Subject: checkpoint for aux_tasks.  can still deadlock

---
 include/linux/sched.h         |   4 +-
 include/litmus/aux_tasks.h    |  33 ++++
 include/litmus/litmus.h       |   1 +
 include/litmus/rt_param.h     |  17 +-
 include/litmus/sched_plugin.h |   7 +
 litmus/Makefile               |   2 +-
 litmus/aux_tasks.c            | 387 ++++++++++++++++++++++++++++++++++++++++++
 litmus/edf_common.c           |  22 ++-
 litmus/litmus.c               | 111 ++++++------
 litmus/nvidia_info.c          |  48 ++++++
 litmus/rt_domain.c            |  13 +-
 litmus/sched_cedf.c           |  12 +-
 litmus/sched_gsn_edf.c        | 161 ++++++++++++++++--
 litmus/sched_plugin.c         |  16 ++
 14 files changed, 746 insertions(+), 88 deletions(-)
 create mode 100644 include/litmus/aux_tasks.h
 create mode 100644 litmus/aux_tasks.c

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c990d13ae35..5d1c041be809 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1532,8 +1532,10 @@ struct task_struct {
 #endif
 	struct prop_local_single dirties;
 
-	/* LITMUS RT parameters and state */
+	/*** LITMUS RT parameters and state ***/
 	struct rt_param rt_param;
+	struct aux_data aux_data;
+	/*****/
 
 	/* references to PI semaphores, etc. */
 	struct od_table_entry *od_table;
diff --git a/include/litmus/aux_tasks.h b/include/litmus/aux_tasks.h
new file mode 100644
index 000000000000..8e50ac85b082
--- /dev/null
+++ b/include/litmus/aux_tasks.h
@@ -0,0 +1,33 @@
+#ifndef LITMUS_AUX_taskS
+#define LITMUS_AUX_taskS
+
+struct task_struct;
+
+#define MAGIC_AUX_TASK_PERIOD ~((lt_t)0)
+
+/* admit an aux task with default parameters */
+//int admit_aux_task(struct task_struct *t);
+
+/* call on an aux task when it exits real-time */
+int exit_aux_task(struct task_struct *t);
+
+/* call when an aux_owner becomes real-time */
+long enable_aux_task_owner(struct task_struct *t);
+
+/* call when an aux_owner exits real-time */
+long disable_aux_task_owner(struct task_struct *t);
+
+
+/* collectivelly make all aux tasks in the process of leader inherit from hp */
+//int aux_tasks_increase_priority(struct task_struct *leader, struct task_struct *hp);
+
+/* collectivelly make all aux tasks in the process of leader inherit from hp */
+//int aux_tasks_decrease_priority(struct task_struct *leader, struct task_struct *hp);
+
+/* call when an aux_owner increases its priority */
+int aux_task_owner_increase_priority(struct task_struct *t);
+
+/* call when an aux_owner decreases its priority */
+int aux_task_owner_decrease_priority(struct task_struct *t);
+
+#endif
\ No newline at end of file
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 1d70ab713571..f9829167294d 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -44,6 +44,7 @@ void litmus_exit_task(struct task_struct *tsk);
 	((t)->rt_param.transition_pending)
 
 #define tsk_rt(t)		(&(t)->rt_param)
+#define tsk_aux(t)		(&(t)->aux_data)
 
 /*	Realtime utility macros */
 #define get_rt_flags(t)		(tsk_rt(t)->flags)
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 02b750a9570b..2a6c70f1dd37 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -285,9 +285,13 @@ struct rt_param {
 #endif
 
 	
-	 struct task_struct*	hp_group;
-	 unsigned int			is_slave:1;
-	 unsigned int			has_slaves:1;
+#ifdef CONFIG_LITMUS_LOCKING
+	unsigned int		is_aux_task:1;
+	unsigned int		has_aux_tasks:1;
+	
+	struct list_head	aux_task_node;
+	struct binheap_node	aux_task_owner_node;
+#endif
 
 
 #ifdef CONFIG_NP_SECTION
@@ -354,6 +358,13 @@ struct rt_param {
 	struct control_page * ctrl_page;
 };
 
+struct aux_data
+{
+	struct list_head	aux_tasks;
+	struct binheap		aux_task_owners;
+	unsigned int		initialized:1;
+};
+
 /*	Possible RT flags	*/
 #define RT_F_RUNNING		0x00000000
 #define RT_F_SLEEP		0x00000001
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 24a6858b4b0b..bd75e7c09a10 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -70,6 +70,10 @@ typedef long (*allocate_affinity_observer_t) (
 
 typedef void (*increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
 typedef void (*decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
+
+typedef int (*__increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
+typedef int (*__decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
+
 typedef void (*nested_increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
 									  raw_spinlock_t *to_unlock, unsigned long irqflags);
 typedef void (*nested_decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
@@ -146,6 +150,9 @@ struct sched_plugin {
 	allocate_lock_t		allocate_lock;
 	increase_prio_t		increase_prio;
 	decrease_prio_t		decrease_prio;
+	
+	__increase_prio_t	__increase_prio;
+	__decrease_prio_t	__decrease_prio;
 #endif
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	nested_increase_prio_t nested_increase_prio;
diff --git a/litmus/Makefile b/litmus/Makefile
index 59c018560ee9..f2dd7be7ae4a 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
 obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
 obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
 
-obj-$(CONFIG_LITMUS_LOCKING) += kfmlp_lock.o
+obj-$(CONFIG_LITMUS_LOCKING) += aux_tasks.o kfmlp_lock.o
 obj-$(CONFIG_LITMUS_NESTED_LOCKING) += rsm_lock.o ikglp_lock.o
 obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
 obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c
new file mode 100644
index 000000000000..c197a95fc3a1
--- /dev/null
+++ b/litmus/aux_tasks.c
@@ -0,0 +1,387 @@
+#ifdef CONFIG_LITMUS_LOCKING
+
+#include <litmus/sched_plugin.h>
+#include <litmus/trace.h>
+#include <litmus/litmus.h>
+#include <litmus/rt_param.h>
+#include <litmus/aux_tasks.h>
+
+static int admit_aux_task(struct task_struct *t)
+{
+	int retval = 0;
+	struct task_struct *leader = t->group_leader;
+	
+	struct rt_task tp = {
+		.exec_cost = 0,
+		.period = MAGIC_AUX_TASK_PERIOD,
+		.relative_deadline = MAGIC_AUX_TASK_PERIOD,
+		.phase = 0,
+		.cpu = task_cpu(leader),  /* take CPU of group leader */
+		.budget_policy = NO_ENFORCEMENT,
+		.cls = RT_CLASS_BEST_EFFORT
+	};
+	
+	struct sched_param param = { .sched_priority = 0};
+	
+	tsk_rt(t)->task_params = tp;
+	retval = sched_setscheduler_nocheck(t, SCHED_LITMUS, &param);
+	
+	return retval;
+}
+
+int exit_aux_task(struct task_struct *t)
+{
+	int retval = 0;
+	struct task_struct *leader = t->group_leader;
+	
+	BUG_ON(!tsk_rt(t)->is_aux_task);
+	
+	TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
+	
+	list_del(&tsk_rt(t)->aux_task_node);
+	
+	tsk_rt(t)->is_aux_task = 0;
+	
+	if (tsk_rt(t)->inh_task) {
+		litmus->decrease_prio(t, NULL);
+	}
+	
+	return retval;
+}
+
+static int aux_tasks_increase_priority(struct task_struct *leader, struct task_struct *hp)
+{
+	int retval = 0;
+	struct list_head *pos;
+	
+	TRACE_CUR("Increasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid);
+	
+	list_for_each(pos, &tsk_aux(leader)->aux_tasks) {
+		struct task_struct *aux =
+			container_of(list_entry(pos, struct rt_param, aux_task_node),
+						struct task_struct, rt_param);
+
+		if (!is_realtime(aux)) {
+#if 0
+			/* currently can't do this here because of scheduler deadlock on itself */
+			TRACE_CUR("aux_tasks_increase_priorityting aux task: %s/%d\n", aux->comm, aux->pid);
+			retval = admit_aux_task(aux);
+			
+			if (retval != 0) {
+				TRACE_CUR("failed to admit aux task %s/%d\n", aux->comm, aux->pid);
+				goto out;
+			}
+#endif
+			TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid);
+		}
+		
+		// aux tasks don't touch rt locks, so no nested call needed.
+		TRACE_CUR("increasing %s/%d.\n", aux->comm, aux->pid);
+		retval = litmus->__increase_prio(aux, hp);
+	}
+	
+	//out:
+	return retval;
+}
+
+static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_struct *hp)
+{
+	int retval = 0;
+	struct list_head *pos;
+	
+	TRACE_CUR("Decreasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid);
+	
+	list_for_each(pos, &tsk_aux(leader)->aux_tasks) {
+		struct task_struct *aux =
+			container_of(list_entry(pos, struct rt_param, aux_task_node),
+						 struct task_struct, rt_param);
+		
+		if (!is_realtime(aux)) {
+#if 0
+			/* currently can't do this here because of scheduler deadlock on itself */
+			TRACE_CUR("aux_tasks_increase_priorityting aux task: %s/%d\n", aux->comm, aux->pid);
+			retval = admit_aux_task(aux);
+			
+			if (retval != 0)
+				goto out;
+			
+			if (hp) {
+				// aux tasks don't touch rt locks, so no nested call needed.
+				TRACE_CUR("decreasing (actually increasing) %s/%d.\n", aux->comm, aux->pid);
+				retval = litmus->__increase_prio(aux, hp);
+			}
+#endif
+			
+			TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid);
+		}
+		else {
+			TRACE_CUR("decreasing %s/%d.\n", aux->comm, aux->pid);
+			retval = litmus->__decrease_prio(aux, hp);
+		}
+	}
+	
+	//out:
+	return retval;
+}
+
+int aux_task_owner_increase_priority(struct task_struct *t)
+{
+	int retval = 0;
+	struct task_struct *leader;
+	struct task_struct *hp = NULL;
+
+	BUG_ON(!tsk_rt(t)->has_aux_tasks);
+	BUG_ON(!is_realtime(t));
+	BUG_ON(!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node));
+	
+	leader = t->group_leader;
+	
+	TRACE_CUR("task %s/%d in group %s/%d increasing priority.\n", t->comm, t->pid, leader->comm, leader->pid);
+
+	hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
+					  struct task_struct, rt_param);
+		
+	if (hp == t) {
+		goto out; // already hp, nothing to do.
+	}
+	
+	binheap_decrease(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners);
+	
+	hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
+					  struct task_struct, rt_param);
+
+	if (hp == t) {
+		TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
+		retval = aux_tasks_increase_priority(leader,
+					   (tsk_rt(hp)->inh_task) ? tsk_rt(hp)->inh_task : hp);
+	}
+	
+out:
+	return retval;
+}
+
+int aux_task_owner_decrease_priority(struct task_struct *t)
+{
+	int retval = 0;
+	struct task_struct *leader;
+	struct task_struct *hp = NULL;
+	struct task_struct *new_hp = NULL;
+	
+	BUG_ON(!tsk_rt(t)->has_aux_tasks);
+	BUG_ON(!is_realtime(t));
+	BUG_ON(!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node));
+	
+	leader = t->group_leader;
+	
+	TRACE_CUR("task %s/%d in group %s/%d decresing priority.\n", t->comm, t->pid, leader->comm, leader->pid);
+	
+	hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
+					  struct task_struct, rt_param);
+	binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners);
+	binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners,
+				struct rt_param, aux_task_owner_node);
+	new_hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
+						  struct task_struct, rt_param);
+	
+	if (hp == t && new_hp != t) {
+		TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
+		retval = aux_tasks_decrease_priority(leader,
+				   (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp);
+	}
+
+	return retval;
+}
+
+
+
+long enable_aux_task_owner(struct task_struct *t)
+{
+	long retval = 0;
+	struct task_struct *leader = t->group_leader;
+	struct task_struct *hp;
+
+	if (!tsk_rt(t)->has_aux_tasks) {
+		TRACE_CUR("task %s/%d is not an aux owner\n", t->comm, t->pid);
+		return -1;
+	}
+	
+	BUG_ON(!is_realtime(t));
+	
+	if (binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) {
+		TRACE_CUR("task %s/%d is already active\n", t->comm, t->pid);
+		goto out;
+	}
+	
+	binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners,
+				struct rt_param, aux_task_owner_node);
+	
+	hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
+					  struct task_struct, rt_param);
+	if (hp == t) {
+		/* we're the new hp */
+		TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
+		
+		retval = aux_tasks_increase_priority(leader,
+					   (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
+	}
+
+	
+out:
+	return retval;
+}
+
+long disable_aux_task_owner(struct task_struct *t)
+{
+	long retval = 0;
+	struct task_struct *leader = t->group_leader;
+	struct task_struct *hp;
+	struct task_struct *new_hp = NULL;
+	
+	if (!tsk_rt(t)->has_aux_tasks) {
+		TRACE_CUR("task %s/%d is not an aux owner\n", t->comm, t->pid);
+		return -1;
+	}
+	
+	BUG_ON(!is_realtime(t));
+	
+	if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) {
+		TRACE_CUR("task %s/%d is already not active\n", t->comm, t->pid);
+		goto out;
+	}
+	
+	TRACE_CUR("task %s/%d exiting from group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
+	
+	hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
+					  struct task_struct, rt_param);
+	binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners);
+	
+	if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
+		new_hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
+							  struct task_struct, rt_param);
+	}
+		
+	if (hp == t && new_hp != t) {
+		struct task_struct *to_inh = NULL;
+		
+		TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
+		
+		if (new_hp) {
+			to_inh = (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp;
+		}
+		
+		retval = aux_tasks_decrease_priority(leader, to_inh);
+	}
+	
+out:
+	return retval;
+}
+
+
+static int aux_task_owner_max_priority_order(struct binheap_node *a,
+									   struct binheap_node *b)
+{
+	struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, aux_task_owner_node),
+										   struct task_struct, rt_param);
+	struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, aux_task_owner_node),
+										   struct task_struct, rt_param);
+	
+	BUG_ON(!d_a);
+	BUG_ON(!d_b);
+	
+	return litmus->compare(d_a, d_b);
+}
+
+
+asmlinkage long sys_slave_non_rt_threads(void)
+{
+	long retval = 0;
+	struct task_struct *leader;
+	struct task_struct *t;
+
+	read_lock_irq(&tasklist_lock);
+
+	leader = current->group_leader;
+	
+#if 0
+	t = leader;
+	do {
+		if (tsk_rt(t)->has_aux_tasks || tsk_rt(t)->is_aux_task) {
+			printk("slave_non_rt_tasks may only be called once per process.\n");
+			retval = -EINVAL;
+			goto out_unlock;
+		}
+	} while (t != leader);
+#endif
+	
+	if (!tsk_aux(leader)->initialized) {
+		INIT_LIST_HEAD(&tsk_aux(leader)->aux_tasks);
+		INIT_BINHEAP_HANDLE(&tsk_aux(leader)->aux_task_owners, aux_task_owner_max_priority_order);
+		tsk_aux(leader)->initialized = 1;
+	}
+	
+	t = leader;
+	do {
+		/* doesn't hurt to initialize them both */
+		INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node);
+		INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node);
+		
+		TRACE_CUR("Checking task in %s/%d: %s/%d = (p = %llu):\n",
+				  leader->comm, leader->pid, t->comm, t->pid,
+				  tsk_rt(t)->task_params.period);
+		
+		/* inspect heap_node to see if it is an rt task */
+		if (tsk_rt(t)->task_params.period == 0 ||
+			tsk_rt(t)->task_params.period == MAGIC_AUX_TASK_PERIOD) {
+			if (!tsk_rt(t)->is_aux_task) {
+				TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
+				/* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */
+				tsk_rt(t)->is_aux_task = 1;
+				list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks);
+				
+				(void)admit_aux_task(t);
+			}
+			else {
+				TRACE_CUR("AUX task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid);
+			}
+		}
+		else {
+			if (!tsk_rt(t)->has_aux_tasks) {
+				TRACE_CUR("task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
+				tsk_rt(t)->has_aux_tasks = 1;
+				if (is_realtime(t)) {
+					binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners,
+								struct rt_param, aux_task_owner_node);
+				}
+			}
+			else {
+				TRACE_CUR("task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid);
+			}
+		}
+
+		t = next_thread(t);
+	} while(t != leader);
+
+	
+	if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
+		struct task_struct *hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
+											  struct task_struct, rt_param);
+		TRACE_CUR("found hp in group: %s/%d\n", hp->comm, hp->pid);
+		retval = aux_tasks_increase_priority(leader,
+				   (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
+	}
+
+	//out_unlock:
+	read_unlock_irq(&tasklist_lock);
+
+	return retval;
+}
+
+#else
+
+asmlinkage long sys_slave_non_rt_tasks(void)
+{
+	printk("Unsupported. Recompile with CONFIG_LITMUS_LOCKING.\n");
+	return -EINVAL;
+}
+
+#endif
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 39ce1816ee04..9b439299e5fc 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -74,6 +74,23 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 	}
 
 #ifdef CONFIG_LITMUS_LOCKING
+	/* aux threads with no inheritance have lowest priority; however, do a PID
+	 * tie break if both threads are aux threads with no inheritance.
+	 */
+	if (unlikely(first->rt_param.is_aux_task && !first->rt_param.inh_task)) {
+		if (second->rt_param.is_aux_task && !second->rt_param.inh_task) {
+			/* pid break */
+			if (first->pid < second->pid) {
+				return 1;
+			}
+		}
+		return 0;
+	}
+	if (unlikely(second->rt_param.is_aux_task && !second->rt_param.inh_task)) {
+		/* no need for pid break -- case already tested */
+		return 1;
+	}
+	
 	/* Check for EFFECTIVE priorities. Change task
 	 * used for comparison in such a case.
 	 */
@@ -191,7 +208,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 					/* Both tasks have the same inherited priority.
 					 * Likely in a bug-condition.
 				     */
-					if (likely(first->pid < second->pid)) {
+					if (first->pid < second->pid) {
 						return 1;
 					}
 					else if (first->pid == second->pid) {
@@ -205,6 +222,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 
 					/* The task with the inherited priority wins. */
 					if (!second->rt_param.inh_task) {
+						/*
+						 * common with aux tasks.
 						TRACE_CUR("unusual comparison: "
 							"first = %s/%d  first_task = %s/%d  "
 							"second = %s/%d  second_task = %s/%d\n",
@@ -214,6 +233,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 							second->comm, second->pid,
 							(second->rt_param.inh_task) ? second->rt_param.inh_task->comm : "(nil)",
 							(second->rt_param.inh_task) ? second->rt_param.inh_task->pid : 0);
+						 */
 						return 1;
 					}
 				}
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 83e8ef3f42af..1b4182ac3337 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -25,6 +25,10 @@
 #include <litmus/nvidia_info.h>
 #endif
 
+#ifdef CONFIG_LITMUS_LOCKING
+#include <litmus/aux_tasks.h>
+#endif
+
 /* Number of RT tasks that exist in the system */
 atomic_t rt_task_count 		= ATOMIC_INIT(0);
 static DEFINE_RAW_SPINLOCK(task_transition_lock);
@@ -327,60 +331,6 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
 	return ret;
 }
 
-
-long __litmus_admit_task(struct task_struct* tsk);
-
-asmlinkage long sys_slave_non_rt_threads(void)
-{
-	long retval = 0;
-	struct task_struct *leader = current->group_leader;
-	struct task_struct *t;
-	struct task_struct *hp = NULL;
-
-	read_lock_irq(&tasklist_lock);
-
-	t = leader;
-	do {
-		TRACE_CUR("threads in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
-
-		if (tsk_rt(t)->heap_node == NULL) {
-			retval = __litmus_admit_task(t);
-
-			if (retval != 0) break;
-
-			/* hasn't been admitted into rt. make it a slave. */
-			tsk_rt(t)->slave = 1;
-		}
-		else {
-			tsk_rt(t)->has_slaves = 1;
-
-			if (is_realtime(t) && litmus->compare(t, hp)) {
-				hp = t;
-			}
-		}
-
-		t = next_thread(t);
-	} while(t != leader);
-
-	if (hp) {
-		TRACE_CUR("found hp in group: %s/%d\n", hp->comm, hp->pid);
-
-		/* set up inheritance */
-		leader->hp_group = hp;
-
-		t = leader;
-		do {
-			if (tsk_rt(t)->slave) {
-				litmus->increase_prio(t);
-			}
-		} while(t != leader);
-	}
-
-	read_unlock_irq(&tasklist_lock);
-
-	return 0;
-}
-
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
 void init_gpu_affinity_state(struct task_struct* p)
 {
@@ -412,11 +362,13 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 {
 	struct rt_task  user_config = {};
 	void*  ctrl_page     = NULL;
-
+	
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	binheap_order_t	prio_order = NULL;
 #endif
 
+	TRACE_TASK(p, "reinit_litmus_state: restore = %d\n", restore);
+	
 	if (restore) {
 		/* Safe user-space provided configuration data.
 		 * and allocated page. */
@@ -428,10 +380,12 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 	prio_order = p->rt_param.hp_blocked_tasks.compare;
 #endif
 
+#ifdef CONFIG_LITMUS_LOCKING
 	/* We probably should not be inheriting any task's priority
 	 * at this point in time.
 	 */
 	WARN_ON(p->rt_param.inh_task);
+#endif
 
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	WARN_ON(p->rt_param.blocked_lock);
@@ -459,6 +413,13 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 	/* Cleanup everything else. */
 	memset(&p->rt_param, 0, sizeof(p->rt_param));
 
+#ifdef CONFIG_LITMUS_LOCKING
+	/* also clear out the aux_data. the !restore case is only called on
+	 * fork (initial thread creation). */
+	if (!restore)
+		memset(&p->aux_data, 0, sizeof(p->aux_data));
+#endif
+	
 	/* Restore preserved fields. */
 	if (restore) {
 		p->rt_param.task_params = user_config;
@@ -475,7 +436,12 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 #endif
 }
 
+
+#ifdef CONFIG_LITMUS_LOCKING
+long __litmus_admit_task(struct task_struct* tsk, int clear_aux)
+#else
 long __litmus_admit_task(struct task_struct* tsk)
+#endif
 {
 	long retval = 0;
 	unsigned long flags;
@@ -520,6 +486,14 @@ long __litmus_admit_task(struct task_struct* tsk)
 	atomic_set(&tsk_rt(tsk)->klitirqd_sem_stat, NOT_HELD);
 #endif
 
+#ifdef CONFIG_LITMUS_LOCKING
+	/* turns out our aux thread isn't really an aux thread. */
+	if (clear_aux && tsk_rt(tsk)->is_aux_task) {
+		exit_aux_task(tsk);
+		tsk_rt(tsk)->has_aux_tasks = 1;
+	}
+#endif
+	
 	retval = litmus->admit_task(tsk);
 
 	if (!retval) {
@@ -537,8 +511,7 @@ out_unlock:
 long litmus_admit_task(struct task_struct* tsk)
 {
 	long retval = 0;
-	unsigned long flags;
-
+	
 	BUG_ON(is_realtime(tsk));
 
 	if (get_rt_relative_deadline(tsk) == 0 ||
@@ -560,8 +533,12 @@ long litmus_admit_task(struct task_struct* tsk)
 		goto out;
 	}
 
+#ifdef CONFIG_LITMUS_LOCKING
+	retval = __litmus_admit_task(tsk, (tsk_rt(tsk)->task_params.period != MAGIC_AUX_TASK_PERIOD));
+#else
 	retval = __litmus_admit_task(tsk);
-
+#endif
+	
 out:
 	return retval;
 }
@@ -574,7 +551,7 @@ void litmus_exit_task(struct task_struct* tsk)
 		litmus->task_exit(tsk);
 
 		BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
-	        bheap_node_free(tsk_rt(tsk)->heap_node);
+		bheap_node_free(tsk_rt(tsk)->heap_node);
 		release_heap_free(tsk_rt(tsk)->rel_heap);
 
 		atomic_dec(&rt_task_count);
@@ -647,14 +624,22 @@ out:
  */
 void litmus_fork(struct task_struct* p)
 {
+	reinit_litmus_state(p, 0);
+	
 	if (is_realtime(p)) {
+		TRACE_TASK(p, "fork, is real-time\n");
 		/* clean out any litmus related state, don't preserve anything */
-		reinit_litmus_state(p, 0);
+		//reinit_litmus_state(p, 0);
 		/* Don't let the child be a real-time task.  */
 		p->sched_reset_on_fork = 1;
-	} else
+	} else {
 		/* non-rt tasks might have ctrl_page set */
 		tsk_rt(p)->ctrl_page = NULL;
+		
+		/* still don't inherit any parental parameters */
+		//memset(&p->rt_param, 0, sizeof(p->rt_param));
+		//memset(&p->aux_data, 0, sizeof(p->aux_data));
+	}
 
 	/* od tables are never inherited across a fork */
 	p->od_table = NULL;
@@ -751,6 +736,10 @@ static int __init _init_litmus(void)
 	init_topology();
 #endif
 
+#ifdef CONFIG_LITMUS_NVIDIA
+	//init_nvidia_info();
+#endif
+
 	return 0;
 }
 
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 4b86a50d3bd1..b6ead58802f6 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -244,9 +244,56 @@ void dump_nvidia_info(const struct tasklet_struct *t)
 #endif
 }
 
+
+
 static struct module* nvidia_mod = NULL;
+
+
+#if 0
+static int nvidia_ready_module_notify(struct notifier_block *self,
+				unsigned long val, void *data)
+{
+	mutex_lock(&module_mutex);
+	nvidia_mod = find_module("nvidia");
+	mutex_unlock(&module_mutex);
+
+	if(nvidia_mod != NULL)
+	{
+		TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
+			  (void*)(nvidia_mod->module_core),
+			  (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
+		init_nv_device_reg();
+		return(0);
+	}
+	else
+	{
+		TRACE("%s : Could not find NVIDIA module!  Loaded?\n", __FUNCTION__);
+	}
+}
+
+static int nvidia_going_module_notify(struct notifier_block *self,
+				unsigned long val, void *data)
+{
+	nvidia_mod = NULL;
+	mb();
+
+	return 0;
+}
+
+static struct notifier_block nvidia_ready = {
+	.notifier_call = nvidia_ready_module_notify,
+	.priority = 1,
+};
+
+static struct notifier_block nvidia_going = {
+	.notifier_call = nvidia_going_module_notify,
+	.priority = 1,
+};
+#endif
+
 int init_nvidia_info(void)
 {
+#if 1
 	mutex_lock(&module_mutex);
 	nvidia_mod = find_module("nvidia");
 	mutex_unlock(&module_mutex);
@@ -263,6 +310,7 @@ int init_nvidia_info(void)
 		TRACE("%s : Could not find NVIDIA module!  Loaded?\n", __FUNCTION__);
 		return(-1);
 	}
+#endif
 }
 
 void shutdown_nvidia_info(void)
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
index d0b796611bea..d4f030728d3c 100644
--- a/litmus/rt_domain.c
+++ b/litmus/rt_domain.c
@@ -300,10 +300,15 @@ void rt_domain_init(rt_domain_t *rt,
  */
 void __add_ready(rt_domain_t* rt, struct task_struct *new)
 {
-	TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu "
-		"to ready queue at %llu\n",
-		new->comm, new->pid,
-		get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
+	TRACE("rt: adding %s/%d (%llu, %llu, %llu) "
+		"[inh_task: %s/%d (%llu, %llu %llu)] "
+		"rel=%llu to ready queue at %llu\n",
+		new->comm, new->pid, get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
+		(tsk_rt(new)->inh_task) ? tsk_rt(new)->inh_task->comm : "(nil)",
+		(tsk_rt(new)->inh_task) ? tsk_rt(new)->inh_task->pid : 0,
+		(tsk_rt(new)->inh_task) ? get_exec_cost(tsk_rt(new)->inh_task) : 0,
+		(tsk_rt(new)->inh_task) ? get_rt_period(tsk_rt(new)->inh_task) : 0,
+		(tsk_rt(new)->inh_task) ? get_rt_relative_deadline(tsk_rt(new)->inh_task) : 0,
 		get_release(new), litmus_clock());
 
 	BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index d98de4579394..f030f027b486 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -1086,9 +1086,10 @@ static long cedf_admit_task(struct task_struct* tsk)
 
 
 /* called with IRQs off */
-static void __increase_priority_inheritance(struct task_struct* t,
+static int __increase_priority_inheritance(struct task_struct* t,
 										    struct task_struct* prio_inh)
 {
+	int success = 1;
 	int linked_on;
 	int check_preempt = 0;
 
@@ -1166,8 +1167,10 @@ static void __increase_priority_inheritance(struct task_struct* t,
 				   (prio_inh) ? prio_inh->comm : "nil",
 				   (prio_inh) ? prio_inh->pid : -1);
 		WARN_ON(!prio_inh);
+		success = 0;
 	}
 #endif
+	return success;
 }
 
 /* called with IRQs off */
@@ -1204,9 +1207,10 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str
 }
 
 /* called with IRQs off */
-static void __decrease_priority_inheritance(struct task_struct* t,
+static int __decrease_priority_inheritance(struct task_struct* t,
 											struct task_struct* prio_inh)
 {
+	int success = 1;
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
 #endif
@@ -1254,8 +1258,10 @@ static void __decrease_priority_inheritance(struct task_struct* t,
 				   effective_priority(t)->comm, effective_priority(t)->pid,
 				   (prio_inh) ? prio_inh->comm : "nil",
 				   (prio_inh) ? prio_inh->pid : -1);
+		success = 0;
 	}
 #endif
+	return success;
 }
 
 static void decrease_priority_inheritance(struct task_struct* t,
@@ -1812,6 +1818,8 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
 	.allocate_lock		= cedf_allocate_lock,
 	.increase_prio		= increase_priority_inheritance,
 	.decrease_prio		= decrease_priority_inheritance,
+	.__increase_prio	= __increase_priority_inheritance,
+	.__decrease_prio	= __decrease_priority_inheritance,
 #endif
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	.nested_increase_prio		= nested_increase_priority_inheritance,
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 83b2f04b1532..5b8ca6698423 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -29,6 +29,7 @@
 
 #ifdef CONFIG_LITMUS_LOCKING
 #include <litmus/kfmlp_lock.h>
+#include <litmus/aux_tasks.h>
 #endif
 
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
@@ -295,11 +296,37 @@ static noinline void requeue(struct task_struct* task)
 	/* sanity check before insertion */
 	BUG_ON(is_queued(task));
 
-	if (is_released(task, litmus_clock()))
-		__add_ready(&gsnedf, task);
+	if (is_released(task, litmus_clock())) {
+		
+		if (unlikely(tsk_rt(task)->is_aux_task && !is_running(task))) {
+			/* aux_task probably transitioned to real-time while it was blocked */
+			TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid);
+		}
+		else {
+			__add_ready(&gsnedf, task);
+			
+#if 0
+			if (tsk_rt(task)->has_aux_tasks) {
+				
+				TRACE_CUR("%s/%d is ready and has aux tasks.\n", task->comm, task->pid);
+				/* allow it's prio inheritance to act on aux threads */
+				enable_aux_task_owner(task);
+			}
+#endif
+		}
+	}
 	else {
 		/* it has got to wait */
 		add_release(&gsnedf, task);
+		
+#if 0
+		if (tsk_rt(task)->has_aux_tasks) {
+			
+			TRACE_CUR("%s/%d is waiting for release and has aux tasks.\n", task->comm, task->pid);
+			/* prevent prio inheritance from acting while it's not ready */
+			disable_aux_task_owner(task);
+		}
+#endif
 	}
 }
 
@@ -366,10 +393,45 @@ static noinline void gsnedf_job_arrival(struct task_struct* task)
 static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
 {
 	unsigned long flags;
+	//struct bheap_node* node;
 
 	raw_spin_lock_irqsave(&gsnedf_lock, flags);
 
+#if 0
+	node = tasks->head;
+	while(node) {
+		struct task_struct *task = bheap2task(node);
+		
+		if (tsk_rt(task)->has_aux_tasks) {
+			
+			TRACE_CUR("%s/%d is ready and has aux tasks.\n", task->comm, task->pid);
+			
+			/* allow it's prio inheritance to act on aux threads */
+			enable_aux_task_owner(task);
+		}
+		
+		/* pre-order sub-tree traversal */
+		if (node->child) {
+			/* go down */
+			node = node->child;
+		}
+		else if(node->parent && node->parent->next) {
+			/* go up a level and across */
+			node = node->parent->next;
+		}
+		else if(!node->parent && node->next) {
+			/* go to the next binomial tree */
+			node = node->next;
+		}
+		else {
+			/* the end! */
+			node = NULL;
+		}
+	}
+#endif
+	
 	__merge_ready(rt, tasks);
+	
 	check_for_preemptions();
 
 	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
@@ -387,11 +449,12 @@ static noinline void job_completion(struct task_struct *t, int forced)
 #endif
 
 	TRACE_TASK(t, "job_completion().\n");
-
+	
 	/* set flags */
 	set_rt_flags(t, RT_F_SLEEP);
 	/* prepare for next period */
 	prepare_for_next_period(t);
+	
 	if (is_released(t, litmus_clock()))
 		sched_trace_task_release(t);
 	/* unlink */
@@ -902,8 +965,7 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 	else if (exists && !next)
 		TRACE("becomes idle at %llu.\n", litmus_clock());
 #endif
-
-
+	
 	return next;
 }
 
@@ -997,13 +1059,18 @@ static void gsnedf_task_wake_up(struct task_struct *task)
 	set_rt_flags(task, RT_F_RUNNING);
 #endif
 
+	if (tsk_rt(task)->has_aux_tasks) {
+		
+		TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid);
+		disable_aux_task_owner(task);
+	}
+	
 	gsnedf_job_arrival(task);
 	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
 }
 
 static void gsnedf_task_block(struct task_struct *t)
 {
-	// TODO: is this called on preemption??
 	unsigned long flags;
 
 	TRACE_TASK(t, "block at %llu\n", litmus_clock());
@@ -1013,6 +1080,12 @@ static void gsnedf_task_block(struct task_struct *t)
 
 	unlink(t);
 
+	if (tsk_rt(t)->has_aux_tasks) {
+		
+		TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
+		enable_aux_task_owner(t);
+	}
+	
 	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
 
 	BUG_ON(!is_realtime(t));
@@ -1027,8 +1100,22 @@ static void gsnedf_task_exit(struct task_struct * t)
 	gsnedf_change_prio_pai_tasklet(t, NULL);
 #endif
 
+#ifdef CONFIG_LITMUS_LOCKING
+	if (tsk_rt(t)->is_aux_task) {
+		exit_aux_task(t); /* cannot be called with gsnedf_lock held */
+	}
+#endif
+	
 	/* unlink if necessary */
 	raw_spin_lock_irqsave(&gsnedf_lock, flags);
+	
+#ifdef CONFIG_LITMUS_LOCKING
+	/* make sure we clean up on our way out */
+	if(tsk_rt(t)->has_aux_tasks) {
+		disable_aux_task_owner(t); /* must be called witl gsnedf_lock held */
+	}
+#endif
+	
 	unlink(t);
 	if (tsk_rt(t)->scheduled_on != NO_CPU) {
 		gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
@@ -1037,7 +1124,7 @@ static void gsnedf_task_exit(struct task_struct * t)
 	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
 
 	BUG_ON(!is_realtime(t));
-        TRACE_TASK(t, "RIP\n");
+	TRACE_TASK(t, "RIP\n");
 }
 
 
@@ -1061,12 +1148,20 @@ static long gsnedf_admit_task(struct task_struct* tsk)
 #include <litmus/fdso.h>
 
 /* called with IRQs off */
-static void __increase_priority_inheritance(struct task_struct* t,
+static int __increase_priority_inheritance(struct task_struct* t,
 										    struct task_struct* prio_inh)
 {
+	int success = 1;
 	int linked_on;
 	int check_preempt = 0;
 
+	if (prio_inh && prio_inh == effective_priority(t)) {
+		/* relationship already established. */
+		TRACE_TASK(t, "already has effective priority of %s/%d\n",
+					prio_inh->comm, prio_inh->pid);
+		goto out;
+	}
+	
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	/* this sanity check allows for weaker locking in protocols */
 	/* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
@@ -1126,28 +1221,40 @@ static void __increase_priority_inheritance(struct task_struct* t,
 						 &gsnedf.ready_queue);
 				check_for_preemptions();
 			}
+			
+			
+			/* propagate to aux tasks */
+			if (tsk_rt(t)->has_aux_tasks) {
+				aux_task_owner_increase_priority(t);
+			}
 		}
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	}
 	else {
 		TRACE_TASK(t, "Spurious invalid priority increase. "
-				      "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
+					  "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
 					  "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
 				   t->comm, t->pid,
 				   effective_priority(t)->comm, effective_priority(t)->pid,
 				   (prio_inh) ? prio_inh->comm : "nil",
 				   (prio_inh) ? prio_inh->pid : -1);
 		WARN_ON(!prio_inh);
+		success = 0;
 	}
 #endif
+	
+out:
+	return success;
 }
 
 /* called with IRQs off */
 static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
 {
+	int success;
+	
 	raw_spin_lock(&gsnedf_lock);
 
-	__increase_priority_inheritance(t, prio_inh);
+	success = __increase_priority_inheritance(t, prio_inh);
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
 	if(tsk_rt(t)->cur_klitirqd != NULL)
@@ -1160,7 +1267,7 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str
 #endif
 
 	raw_spin_unlock(&gsnedf_lock);
-
+	
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
 	if(tsk_rt(t)->held_gpus) {
 		int i;
@@ -1175,9 +1282,19 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str
 
 
 /* called with IRQs off */
-static void __decrease_priority_inheritance(struct task_struct* t,
+static int __decrease_priority_inheritance(struct task_struct* t,
 											struct task_struct* prio_inh)
 {
+	int success = 1;
+	
+	if (prio_inh == tsk_rt(t)->inh_task) {
+		/* relationship already established. */
+		TRACE_TASK(t, "already inherits priority from %s/%d\n",
+				   (prio_inh) ? prio_inh->comm : "(nil)",
+				   (prio_inh) ? prio_inh->pid : 0);
+		goto out;
+	}
+	
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
 #endif
@@ -1214,6 +1331,11 @@ static void __decrease_priority_inheritance(struct task_struct* t,
 			}
 			raw_spin_unlock(&gsnedf.release_lock);
 		}
+		
+		/* propagate to aux tasks */
+		if (tsk_rt(t)->has_aux_tasks) {
+			aux_task_owner_decrease_priority(t);
+		}
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	}
 	else {
@@ -1224,16 +1346,23 @@ static void __decrease_priority_inheritance(struct task_struct* t,
 				   effective_priority(t)->comm, effective_priority(t)->pid,
 				   (prio_inh) ? prio_inh->comm : "nil",
 				   (prio_inh) ? prio_inh->pid : -1);
+		success = 0;
 	}
 #endif
+	
+out:
+	return success;
 }
 
 static void decrease_priority_inheritance(struct task_struct* t,
 										  struct task_struct* prio_inh)
 {
+	int success;
+	
 	raw_spin_lock(&gsnedf_lock);
-	__decrease_priority_inheritance(t, prio_inh);
-
+		
+	success = __decrease_priority_inheritance(t, prio_inh);
+	
 #ifdef CONFIG_LITMUS_SOFTIRQD
 	if(tsk_rt(t)->cur_klitirqd != NULL)
 	{
@@ -1245,7 +1374,7 @@ static void decrease_priority_inheritance(struct task_struct* t,
 #endif
 
 	raw_spin_unlock(&gsnedf_lock);
-
+	
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
 	if(tsk_rt(t)->held_gpus) {
 		int i;
@@ -1828,6 +1957,8 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
 	.allocate_lock		= gsnedf_allocate_lock,
 	.increase_prio		= increase_priority_inheritance,
 	.decrease_prio		= decrease_priority_inheritance,
+	.__increase_prio	= __increase_priority_inheritance,
+	.__decrease_prio	= __decrease_priority_inheritance,
 #endif
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	.nested_increase_prio		= nested_increase_priority_inheritance,
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 245e41c25a5d..d24c9167cff8 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -137,6 +137,18 @@ static void litmus_dummy_increase_prio(struct task_struct* t, struct task_struct
 static void litmus_dummy_decrease_prio(struct task_struct* t, struct task_struct* prio_inh)
 {
 }
+
+static int litmus_dummy___increase_prio(struct task_struct* t, struct task_struct* prio_inh)
+{
+	TRACE_CUR("WARNING: Dummy litmus_dummy___increase_prio called!\n");
+	return 0;
+}
+
+static int litmus_dummy___decrease_prio(struct task_struct* t, struct task_struct* prio_inh)
+{
+	TRACE_CUR("WARNING: Dummy litmus_dummy___decrease_prio called!\n");
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
@@ -227,6 +239,8 @@ struct sched_plugin linux_sched_plugin = {
 	.allocate_lock = litmus_dummy_allocate_lock,
 	.increase_prio = litmus_dummy_increase_prio,
 	.decrease_prio = litmus_dummy_decrease_prio,
+	.__increase_prio = litmus_dummy___increase_prio,
+	.__decrease_prio = litmus_dummy___decrease_prio,
 #endif
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	.nested_increase_prio = litmus_dummy_nested_increase_prio,
@@ -289,6 +303,8 @@ int register_sched_plugin(struct sched_plugin* plugin)
 	CHECK(allocate_lock);
 	CHECK(increase_prio);
 	CHECK(decrease_prio);
+	CHECK(__increase_prio);
+	CHECK(__decrease_prio);
 #endif
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	CHECK(nested_increase_prio);
-- 
cgit v1.2.2


From 4e8f9b7c2e9134ca31feb91dee3609a95df6de56 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Sun, 16 Sep 2012 17:44:37 -0400
Subject: Implement real-time aux threads. G-EDF only.

---
 include/litmus/aux_tasks.h    |   4 +-
 include/litmus/litmus.h       |   4 +-
 include/litmus/rt_param.h     |   4 +-
 include/litmus/sched_plugin.h |   2 +-
 kernel/sched.c                |  24 ++++-
 litmus/aux_tasks.c            | 243 +++++++++++++++++++++++-------------------
 litmus/edf_common.c           |  83 +++++++++++----
 litmus/litmus.c               |  44 +++-----
 litmus/preempt.c              |  25 ++++-
 litmus/sched_gsn_edf.c        | 169 ++++++++++++++---------------
 litmus/sched_litmus.c         |   4 +-
 litmus/sched_plugin.c         |  22 +++-
 12 files changed, 357 insertions(+), 271 deletions(-)

diff --git a/include/litmus/aux_tasks.h b/include/litmus/aux_tasks.h
index 8e50ac85b082..3bb6b26fef09 100644
--- a/include/litmus/aux_tasks.h
+++ b/include/litmus/aux_tasks.h
@@ -3,8 +3,6 @@
 
 struct task_struct;
 
-#define MAGIC_AUX_TASK_PERIOD ~((lt_t)0)
-
 /* admit an aux task with default parameters */
 //int admit_aux_task(struct task_struct *t);
 
@@ -30,4 +28,4 @@ int aux_task_owner_increase_priority(struct task_struct *t);
 /* call when an aux_owner decreases its priority */
 int aux_task_owner_decrease_priority(struct task_struct *t);
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index f9829167294d..db2987a24686 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -184,8 +184,10 @@ static inline int request_exit_np_atomic(struct task_struct *t)
 			 * retry loop here since tasks might exploit that to
 			 * keep the kernel busy indefinitely. */
 		}
-	} else
+	}
+	else {
 		return 0;
+	}
 }
 
 #else
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 2a6c70f1dd37..c45ba23d7650 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -284,11 +284,11 @@ struct rt_param {
 	struct litmus_lock* blocked_lock;
 #endif
 
-	
+
 #ifdef CONFIG_LITMUS_LOCKING
 	unsigned int		is_aux_task:1;
 	unsigned int		has_aux_tasks:1;
-	
+
 	struct list_head	aux_task_node;
 	struct binheap_node	aux_task_owner_node;
 #endif
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index bd75e7c09a10..65736b2a9199 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -150,7 +150,7 @@ struct sched_plugin {
 	allocate_lock_t		allocate_lock;
 	increase_prio_t		increase_prio;
 	decrease_prio_t		decrease_prio;
-	
+
 	__increase_prio_t	__increase_prio;
 	__decrease_prio_t	__decrease_prio;
 #endif
diff --git a/kernel/sched.c b/kernel/sched.c
index 9e8d8698323b..0e4b3d40cd29 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2703,8 +2703,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	unsigned long flags;
 	int cpu, success = 0;
 
-	if (is_realtime(p))
+	if (is_realtime(p)) {
+		//WARN_ON(1);
 		TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state);
+	}
 
 	smp_wmb();
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
@@ -3169,6 +3171,12 @@ asmlinkage void schedule_tail(struct task_struct *prev)
 {
 	struct rq *rq = this_rq();
 
+	sched_trace_task_switch_to(current);
+
+	if (sched_state_validate_switch()) {
+		WARN_ON(1);
+	}
+
 	finish_task_switch(rq, prev);
 
 	/*
@@ -4416,8 +4424,16 @@ litmus_need_resched_nonpreemptible:
 
 	post_schedule(rq);
 
-	if (sched_state_validate_switch())
+	if (sched_state_validate_switch()) {
+		TRACE_CUR("cpu %d: have to redo scheduling decision!\n", cpu);
 		goto litmus_need_resched_nonpreemptible;
+	}
+	else if (current->policy == SCHED_LITMUS) {
+		TRACE_CUR("cpu %d: valid switch to rt task %s/%d.\n", cpu, current->comm, current->pid);
+	}
+	else {
+//		TRACE_CUR("cpu %d: switch: %s/%d\n", cpu, current->comm, current->pid);
+	}
 
 	preempt_enable_no_resched();
 
@@ -4430,8 +4446,8 @@ litmus_need_resched_nonpreemptible:
 
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 	litmus->run_tasklets(prev);
-#endif	
-	
+#endif
+
 	srp_ceiling_block();
 }
 EXPORT_SYMBOL(schedule);
diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c
index c197a95fc3a1..5057137bbbea 100644
--- a/litmus/aux_tasks.c
+++ b/litmus/aux_tasks.c
@@ -10,22 +10,37 @@ static int admit_aux_task(struct task_struct *t)
 {
 	int retval = 0;
 	struct task_struct *leader = t->group_leader;
-	
+
+
+	/* budget enforcement increments job numbers.  job numbers are used in
+	 * tie-breaking of aux_tasks.  method helps ensure:
+	 * 1) aux threads with no inherited priority can starve another (they share
+	 *    the CPUs equally.
+	 * 2) aux threads that inherit the same priority cannot starve each other.
+	 *
+	 * Assuming aux threads are well-behavied (they do very little work and
+	 * suspend), risk of starvation should not be an issue, but this is a
+	 * fail-safe.
+	 */
 	struct rt_task tp = {
-		.exec_cost = 0,
-		.period = MAGIC_AUX_TASK_PERIOD,
-		.relative_deadline = MAGIC_AUX_TASK_PERIOD,
+		//.period = MAGIC_AUX_TASK_PERIOD,
+		//.relative_deadline = MAGIC_AUX_TASK_PERIOD,
+		.period = 1000000,   /* has to wait 1 ms before it can run again once it has exhausted budget */
+		.relative_deadline = 1000000,
+		.exec_cost = 1000000, /* allow full utilization */
 		.phase = 0,
 		.cpu = task_cpu(leader),  /* take CPU of group leader */
-		.budget_policy = NO_ENFORCEMENT,
+		//.budget_policy = NO_ENFORCEMENT,
+		.budget_policy = QUANTUM_ENFORCEMENT,
+		.budget_signal_policy = NO_SIGNALS,
 		.cls = RT_CLASS_BEST_EFFORT
 	};
-	
+
 	struct sched_param param = { .sched_priority = 0};
-	
+
 	tsk_rt(t)->task_params = tp;
 	retval = sched_setscheduler_nocheck(t, SCHED_LITMUS, &param);
-	
+
 	return retval;
 }
 
@@ -33,19 +48,19 @@ int exit_aux_task(struct task_struct *t)
 {
 	int retval = 0;
 	struct task_struct *leader = t->group_leader;
-	
+
 	BUG_ON(!tsk_rt(t)->is_aux_task);
-	
+
 	TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
-	
+
 	list_del(&tsk_rt(t)->aux_task_node);
-	
+
 	tsk_rt(t)->is_aux_task = 0;
-	
+
 	if (tsk_rt(t)->inh_task) {
 		litmus->decrease_prio(t, NULL);
 	}
-	
+
 	return retval;
 }
 
@@ -53,34 +68,23 @@ static int aux_tasks_increase_priority(struct task_struct *leader, struct task_s
 {
 	int retval = 0;
 	struct list_head *pos;
-	
+
 	TRACE_CUR("Increasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid);
-	
+
 	list_for_each(pos, &tsk_aux(leader)->aux_tasks) {
 		struct task_struct *aux =
 			container_of(list_entry(pos, struct rt_param, aux_task_node),
 						struct task_struct, rt_param);
 
 		if (!is_realtime(aux)) {
-#if 0
-			/* currently can't do this here because of scheduler deadlock on itself */
-			TRACE_CUR("aux_tasks_increase_priorityting aux task: %s/%d\n", aux->comm, aux->pid);
-			retval = admit_aux_task(aux);
-			
-			if (retval != 0) {
-				TRACE_CUR("failed to admit aux task %s/%d\n", aux->comm, aux->pid);
-				goto out;
-			}
-#endif
 			TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid);
 		}
-		
+
 		// aux tasks don't touch rt locks, so no nested call needed.
 		TRACE_CUR("increasing %s/%d.\n", aux->comm, aux->pid);
 		retval = litmus->__increase_prio(aux, hp);
 	}
-	
-	//out:
+
 	return retval;
 }
 
@@ -88,30 +92,15 @@ static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_s
 {
 	int retval = 0;
 	struct list_head *pos;
-	
+
 	TRACE_CUR("Decreasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid);
-	
+
 	list_for_each(pos, &tsk_aux(leader)->aux_tasks) {
 		struct task_struct *aux =
 			container_of(list_entry(pos, struct rt_param, aux_task_node),
 						 struct task_struct, rt_param);
-		
+
 		if (!is_realtime(aux)) {
-#if 0
-			/* currently can't do this here because of scheduler deadlock on itself */
-			TRACE_CUR("aux_tasks_increase_priorityting aux task: %s/%d\n", aux->comm, aux->pid);
-			retval = admit_aux_task(aux);
-			
-			if (retval != 0)
-				goto out;
-			
-			if (hp) {
-				// aux tasks don't touch rt locks, so no nested call needed.
-				TRACE_CUR("decreasing (actually increasing) %s/%d.\n", aux->comm, aux->pid);
-				retval = litmus->__increase_prio(aux, hp);
-			}
-#endif
-			
 			TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid);
 		}
 		else {
@@ -119,8 +108,7 @@ static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_s
 			retval = litmus->__decrease_prio(aux, hp);
 		}
 	}
-	
-	//out:
+
 	return retval;
 }
 
@@ -133,20 +121,20 @@ int aux_task_owner_increase_priority(struct task_struct *t)
 	BUG_ON(!tsk_rt(t)->has_aux_tasks);
 	BUG_ON(!is_realtime(t));
 	BUG_ON(!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node));
-	
+
 	leader = t->group_leader;
-	
+
 	TRACE_CUR("task %s/%d in group %s/%d increasing priority.\n", t->comm, t->pid, leader->comm, leader->pid);
 
 	hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 					  struct task_struct, rt_param);
-		
+
 	if (hp == t) {
 		goto out; // already hp, nothing to do.
 	}
-	
+
 	binheap_decrease(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners);
-	
+
 	hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 					  struct task_struct, rt_param);
 
@@ -155,7 +143,7 @@ int aux_task_owner_increase_priority(struct task_struct *t)
 		retval = aux_tasks_increase_priority(leader,
 					   (tsk_rt(hp)->inh_task) ? tsk_rt(hp)->inh_task : hp);
 	}
-	
+
 out:
 	return retval;
 }
@@ -166,15 +154,15 @@ int aux_task_owner_decrease_priority(struct task_struct *t)
 	struct task_struct *leader;
 	struct task_struct *hp = NULL;
 	struct task_struct *new_hp = NULL;
-	
+
 	BUG_ON(!tsk_rt(t)->has_aux_tasks);
 	BUG_ON(!is_realtime(t));
 	BUG_ON(!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node));
-	
+
 	leader = t->group_leader;
-	
+
 	TRACE_CUR("task %s/%d in group %s/%d decresing priority.\n", t->comm, t->pid, leader->comm, leader->pid);
-	
+
 	hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 					  struct task_struct, rt_param);
 	binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners);
@@ -182,7 +170,7 @@ int aux_task_owner_decrease_priority(struct task_struct *t)
 				struct rt_param, aux_task_owner_node);
 	new_hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 						  struct task_struct, rt_param);
-	
+
 	if (hp == t && new_hp != t) {
 		TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
 		retval = aux_tasks_decrease_priority(leader,
@@ -204,28 +192,28 @@ long enable_aux_task_owner(struct task_struct *t)
 		TRACE_CUR("task %s/%d is not an aux owner\n", t->comm, t->pid);
 		return -1;
 	}
-	
+
 	BUG_ON(!is_realtime(t));
-	
+
 	if (binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) {
 		TRACE_CUR("task %s/%d is already active\n", t->comm, t->pid);
 		goto out;
 	}
-	
+
 	binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners,
 				struct rt_param, aux_task_owner_node);
-	
+
 	hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 					  struct task_struct, rt_param);
 	if (hp == t) {
 		/* we're the new hp */
 		TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
-		
+
 		retval = aux_tasks_increase_priority(leader,
 					   (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
 	}
 
-	
+
 out:
 	return retval;
 }
@@ -236,42 +224,42 @@ long disable_aux_task_owner(struct task_struct *t)
 	struct task_struct *leader = t->group_leader;
 	struct task_struct *hp;
 	struct task_struct *new_hp = NULL;
-	
+
 	if (!tsk_rt(t)->has_aux_tasks) {
 		TRACE_CUR("task %s/%d is not an aux owner\n", t->comm, t->pid);
 		return -1;
 	}
-	
+
 	BUG_ON(!is_realtime(t));
-	
+
 	if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) {
 		TRACE_CUR("task %s/%d is already not active\n", t->comm, t->pid);
 		goto out;
 	}
-	
+
 	TRACE_CUR("task %s/%d exiting from group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
-	
+
 	hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 					  struct task_struct, rt_param);
 	binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners);
-	
+
 	if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
 		new_hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 							  struct task_struct, rt_param);
 	}
-		
+
 	if (hp == t && new_hp != t) {
 		struct task_struct *to_inh = NULL;
-		
+
 		TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
-		
+
 		if (new_hp) {
 			to_inh = (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp;
 		}
-		
+
 		retval = aux_tasks_decrease_priority(leader, to_inh);
 	}
-	
+
 out:
 	return retval;
 }
@@ -284,60 +272,47 @@ static int aux_task_owner_max_priority_order(struct binheap_node *a,
 										   struct task_struct, rt_param);
 	struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, aux_task_owner_node),
 										   struct task_struct, rt_param);
-	
+
 	BUG_ON(!d_a);
 	BUG_ON(!d_b);
-	
+
 	return litmus->compare(d_a, d_b);
 }
 
 
-asmlinkage long sys_slave_non_rt_threads(void)
+static long __do_enable_slave_non_rt_threads(void)
 {
 	long retval = 0;
 	struct task_struct *leader;
 	struct task_struct *t;
 
-	read_lock_irq(&tasklist_lock);
-
 	leader = current->group_leader;
-	
-#if 0
-	t = leader;
-	do {
-		if (tsk_rt(t)->has_aux_tasks || tsk_rt(t)->is_aux_task) {
-			printk("slave_non_rt_tasks may only be called once per process.\n");
-			retval = -EINVAL;
-			goto out_unlock;
-		}
-	} while (t != leader);
-#endif
-	
+
 	if (!tsk_aux(leader)->initialized) {
 		INIT_LIST_HEAD(&tsk_aux(leader)->aux_tasks);
 		INIT_BINHEAP_HANDLE(&tsk_aux(leader)->aux_task_owners, aux_task_owner_max_priority_order);
 		tsk_aux(leader)->initialized = 1;
 	}
-	
+
 	t = leader;
 	do {
 		/* doesn't hurt to initialize them both */
 		INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node);
 		INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node);
-		
+
 		TRACE_CUR("Checking task in %s/%d: %s/%d = (p = %llu):\n",
 				  leader->comm, leader->pid, t->comm, t->pid,
 				  tsk_rt(t)->task_params.period);
-		
+
 		/* inspect heap_node to see if it is an rt task */
-		if (tsk_rt(t)->task_params.period == 0 ||
-			tsk_rt(t)->task_params.period == MAGIC_AUX_TASK_PERIOD) {
+		if (tsk_rt(t)->task_params.period == 0) { //||
+			//			tsk_rt(t)->task_params.period == MAGIC_AUX_TASK_PERIOD) {
 			if (!tsk_rt(t)->is_aux_task) {
 				TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
 				/* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */
 				tsk_rt(t)->is_aux_task = 1;
 				list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks);
-				
+
 				(void)admit_aux_task(t);
 			}
 			else {
@@ -348,10 +323,6 @@ asmlinkage long sys_slave_non_rt_threads(void)
 			if (!tsk_rt(t)->has_aux_tasks) {
 				TRACE_CUR("task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
 				tsk_rt(t)->has_aux_tasks = 1;
-				if (is_realtime(t)) {
-					binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners,
-								struct rt_param, aux_task_owner_node);
-				}
 			}
 			else {
 				TRACE_CUR("task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid);
@@ -361,16 +332,72 @@ asmlinkage long sys_slave_non_rt_threads(void)
 		t = next_thread(t);
 	} while(t != leader);
 
-	
+
 	if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
 		struct task_struct *hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 											  struct task_struct, rt_param);
 		TRACE_CUR("found hp in group: %s/%d\n", hp->comm, hp->pid);
 		retval = aux_tasks_increase_priority(leader,
-				   (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
+											 (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
+	}
+
+	return retval;
+}
+
+static long __do_disable_slave_non_rt_threads(void)
+{
+	long retval = 0;
+	struct task_struct *leader;
+	struct task_struct *t;
+
+	leader = current->group_leader;
+
+	t = leader;
+	do {
+		if (tsk_rt(t)->is_aux_task) {
+
+			TRACE_CUR("%s/%d is an aux task.\n", t->comm, t->pid);
+
+			if (is_realtime(t)) {
+				long temp_retval;
+				struct sched_param param = { .sched_priority = 0};
+
+				TRACE_CUR("%s/%d is real-time. Changing policy to SCHED_NORMAL.\n", t->comm, t->pid);
+
+				temp_retval = sched_setscheduler_nocheck(t, SCHED_NORMAL, &param);
+
+				if (temp_retval != 0) {
+					TRACE_CUR("error changing policy of %s/%d to SCHED_NORMAL\n", t->comm, t->pid);
+					if (retval == 0) {
+						retval = temp_retval;
+					}
+					else {
+						TRACE_CUR("prior error (%d) masks new error (%d)\n", retval, temp_retval);
+					}
+				}
+			}
+
+			tsk_rt(t)->is_aux_task = 0;
+		}
+		t = next_thread(t);
+	} while(t != leader);
+
+	return retval;
+}
+
+asmlinkage long sys_slave_non_rt_threads(int enable)
+{
+	long retval;
+
+	read_lock_irq(&tasklist_lock);
+
+	if (enable) {
+		retval = __do_enable_slave_non_rt_threads();
+	}
+	else {
+		retval = __do_disable_slave_non_rt_threads();
 	}
 
-	//out_unlock:
 	read_unlock_irq(&tasklist_lock);
 
 	return retval;
@@ -378,7 +405,7 @@ asmlinkage long sys_slave_non_rt_threads(void)
 
 #else
 
-asmlinkage long sys_slave_non_rt_tasks(void)
+asmlinkage long sys_slave_non_rt_tasks(int enable)
 {
 	printk("Unsupported. Recompile with CONFIG_LITMUS_LOCKING.\n");
 	return -EINVAL;
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 9b439299e5fc..ca06f6ec103e 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -22,7 +22,7 @@
 #include <litmus/fpmath.h>
 #endif
 
-#ifdef CONFIG_EDF_TIE_BREAK_HASH
+//#ifdef CONFIG_EDF_TIE_BREAK_HASH
 #include <linux/hash.h>
 static inline long edf_hash(struct task_struct *t)
 {
@@ -41,7 +41,22 @@ static inline long edf_hash(struct task_struct *t)
 	 */
 	return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
 }
-#endif
+//#endif
+
+int aux_tie_break(struct task_struct *first, struct task_struct *second)
+{
+	long fhash = edf_hash(first);
+	long shash = edf_hash(second);
+	if (fhash < shash) {
+		TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, 1);
+		return 1;
+	}
+	else if(fhash == shash) {
+		TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, (first->pid < second->pid));
+		return first->pid < second->pid;
+	}
+	return 0;
+}
 
 
 /* edf_higher_prio -  returns true if first has a higher EDF priority
@@ -60,6 +75,11 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 	struct task_struct *first_task = first;
 	struct task_struct *second_task = second;
 
+	int first_lo_aux;
+	int second_lo_aux;
+	int first_hi_aux;
+	int second_hi_aux;
+
 	/* There is no point in comparing a task to itself. */
 	if (first && first == second) {
 		TRACE_CUR("WARNING: pointless edf priority comparison: %s/%d\n", first->comm, first->pid);
@@ -74,23 +94,34 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 	}
 
 #ifdef CONFIG_LITMUS_LOCKING
-	/* aux threads with no inheritance have lowest priority; however, do a PID
-	 * tie break if both threads are aux threads with no inheritance.
-	 */
-	if (unlikely(first->rt_param.is_aux_task && !first->rt_param.inh_task)) {
-		if (second->rt_param.is_aux_task && !second->rt_param.inh_task) {
-			/* pid break */
-			if (first->pid < second->pid) {
-				return 1;
-			}
-		}
+
+	first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task;
+	second_lo_aux = second->rt_param.is_aux_task && !second->rt_param.inh_task;
+
+	if (first_lo_aux && !second_lo_aux) {
+		TRACE_CUR("%s/%d >> %s/%d --- 0\n", first->comm, first->pid, second->comm, second->pid);
 		return 0;
 	}
-	if (unlikely(second->rt_param.is_aux_task && !second->rt_param.inh_task)) {
-		/* no need for pid break -- case already tested */
+	else if (second_lo_aux && !first_lo_aux) {
+		TRACE_CUR("%s/%d >> %s/%d --- 1\n", first->comm, first->pid, second->comm, second->pid);
 		return 1;
 	}
-	
+	else if (first_lo_aux && second_lo_aux) {
+		int aux_lo_tie_break = aux_tie_break(first, second);
+		TRACE_CUR("low aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_lo_tie_break);
+		return aux_lo_tie_break;
+	}
+
+	first_hi_aux = first->rt_param.is_aux_task && first->rt_param.inh_task;
+	second_hi_aux = second->rt_param.is_aux_task && second->rt_param.inh_task;
+
+	if (first_hi_aux && second_hi_aux && first->rt_param.inh_task == second->rt_param.inh_task) {
+		int aux_hi_tie_break = aux_tie_break(first, second);
+		TRACE_CUR("hi aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_hi_tie_break);
+		return aux_hi_tie_break;
+	}
+
+
 	/* Check for EFFECTIVE priorities. Change task
 	 * used for comparison in such a case.
 	 */
@@ -149,7 +180,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 		 */
 		if (get_lateness(first_task) > get_lateness(second_task)) {
 			return 1;
-		}   
+		}
 		pid_break = (get_lateness(first_task) == get_lateness(second_task));
 
 
@@ -171,8 +202,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 			return 1;
 		}
 		pid_break = _eq(fnorm, snorm);
-		
-		
+
+
 #elif defined(CONFIG_EDF_TIE_BREAK_HASH)
 		/* Tie break by comparing hashs of (pid, job#) tuple.  There should be
 		 * a 50% chance that first_task has a higher priority than second_task.
@@ -184,8 +215,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 		}
 		pid_break = (fhash == shash);
 #else
-		
-		
+
+
 		/* CONFIG_EDF_PID_TIE_BREAK */
 		pid_break = 1; // fall through to tie-break by pid;
 #endif
@@ -197,11 +228,17 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 			}
 			else if (first_task->pid == second_task->pid) {
 #ifdef CONFIG_LITMUS_SOFTIRQD
-				if (first_task->rt_param.is_proxy_thread < 
+				if (first_task->rt_param.is_proxy_thread <
 					second_task->rt_param.is_proxy_thread) {
 					return 1;
 				}
 #endif
+				if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) {
+					TRACE_CUR("AUX BREAK!\n");
+					return 1;
+				}
+
+
 				/* Something could be wrong if you get this far. */
 				if (unlikely(first->rt_param.inh_task ==
 										second->rt_param.inh_task)) {
@@ -220,8 +257,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 					BUG_ON(!first->rt_param.inh_task &&
 						   !second->rt_param.inh_task);
 
-					/* The task with the inherited priority wins. */
-					if (!second->rt_param.inh_task) {
+					/* The task withOUT the inherited priority wins. */
+					if (second->rt_param.inh_task) {
 						/*
 						 * common with aux tasks.
 						TRACE_CUR("unusual comparison: "
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 1b4182ac3337..e2bf2a7ad01b 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -338,7 +338,7 @@ void init_gpu_affinity_state(struct task_struct* p)
 	//p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
 	//p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
 
-#if 0		
+#if 0
 	// emperical;
 	p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
 	p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
@@ -362,13 +362,13 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 {
 	struct rt_task  user_config = {};
 	void*  ctrl_page     = NULL;
-	
+
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	binheap_order_t	prio_order = NULL;
 #endif
 
 	TRACE_TASK(p, "reinit_litmus_state: restore = %d\n", restore);
-	
+
 	if (restore) {
 		/* Safe user-space provided configuration data.
 		 * and allocated page. */
@@ -419,7 +419,7 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 	if (!restore)
 		memset(&p->aux_data, 0, sizeof(p->aux_data));
 #endif
-	
+
 	/* Restore preserved fields. */
 	if (restore) {
 		p->rt_param.task_params = user_config;
@@ -437,11 +437,8 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 }
 
 
-#ifdef CONFIG_LITMUS_LOCKING
-long __litmus_admit_task(struct task_struct* tsk, int clear_aux)
-#else
+
 long __litmus_admit_task(struct task_struct* tsk)
-#endif
 {
 	long retval = 0;
 	unsigned long flags;
@@ -486,14 +483,6 @@ long __litmus_admit_task(struct task_struct* tsk)
 	atomic_set(&tsk_rt(tsk)->klitirqd_sem_stat, NOT_HELD);
 #endif
 
-#ifdef CONFIG_LITMUS_LOCKING
-	/* turns out our aux thread isn't really an aux thread. */
-	if (clear_aux && tsk_rt(tsk)->is_aux_task) {
-		exit_aux_task(tsk);
-		tsk_rt(tsk)->has_aux_tasks = 1;
-	}
-#endif
-	
 	retval = litmus->admit_task(tsk);
 
 	if (!retval) {
@@ -511,7 +500,7 @@ out_unlock:
 long litmus_admit_task(struct task_struct* tsk)
 {
 	long retval = 0;
-	
+
 	BUG_ON(is_realtime(tsk));
 
 	if (get_rt_relative_deadline(tsk) == 0 ||
@@ -533,12 +522,8 @@ long litmus_admit_task(struct task_struct* tsk)
 		goto out;
 	}
 
-#ifdef CONFIG_LITMUS_LOCKING
-	retval = __litmus_admit_task(tsk, (tsk_rt(tsk)->task_params.period != MAGIC_AUX_TASK_PERIOD));
-#else
 	retval = __litmus_admit_task(tsk);
-#endif
-	
+
 out:
 	return retval;
 }
@@ -624,18 +609,21 @@ out:
  */
 void litmus_fork(struct task_struct* p)
 {
-	reinit_litmus_state(p, 0);
-	
 	if (is_realtime(p)) {
 		TRACE_TASK(p, "fork, is real-time\n");
+
 		/* clean out any litmus related state, don't preserve anything */
-		//reinit_litmus_state(p, 0);
+		reinit_litmus_state(p, 0);
+
 		/* Don't let the child be a real-time task.  */
 		p->sched_reset_on_fork = 1;
+
 	} else {
 		/* non-rt tasks might have ctrl_page set */
 		tsk_rt(p)->ctrl_page = NULL;
-		
+
+		reinit_litmus_state(p, 0);
+
 		/* still don't inherit any parental parameters */
 		//memset(&p->rt_param, 0, sizeof(p->rt_param));
 		//memset(&p->aux_data, 0, sizeof(p->aux_data));
@@ -736,10 +724,6 @@ static int __init _init_litmus(void)
 	init_topology();
 #endif
 
-#ifdef CONFIG_LITMUS_NVIDIA
-	//init_nvidia_info();
-#endif
-
 	return 0;
 }
 
diff --git a/litmus/preempt.c b/litmus/preempt.c
index a2cae3648e15..c9ccc80c1df9 100644
--- a/litmus/preempt.c
+++ b/litmus/preempt.c
@@ -74,25 +74,37 @@ void litmus_reschedule(int cpu)
 	 * is not aware of the need to reschedule at this point. */
 
 	/* is a context switch in progress? */
-	if (cpu_is_in_sched_state(cpu, TASK_PICKED))
+	if (cpu_is_in_sched_state(cpu, TASK_PICKED)) {
 		picked_transition_ok = sched_state_transition_on(
 			cpu, TASK_PICKED, PICKED_WRONG_TASK);
 
+		TRACE_CUR("cpu %d: picked_transition_ok = %d\n", cpu, picked_transition_ok);
+	}
+	else {
+		TRACE_CUR("cpu %d: picked_transition_ok = 0 (static)\n", cpu);
+	}
+
 	if (!picked_transition_ok &&
 	    cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) {
 		/* We either raced with the end of the context switch, or the
 		 * CPU was in TASK_SCHEDULED anyway. */
 		scheduled_transition_ok = sched_state_transition_on(
 			cpu, TASK_SCHEDULED, SHOULD_SCHEDULE);
+		TRACE_CUR("cpu %d: scheduled_transition_ok = %d\n", cpu, scheduled_transition_ok);
+	}
+	else {
+		TRACE_CUR("cpu %d: scheduled_transition_ok = 0 (static)\n", cpu);
 	}
 
 	/* If the CPU was in state TASK_SCHEDULED, then we need to cause the
 	 * scheduler to be invoked. */
 	if (scheduled_transition_ok) {
-		if (smp_processor_id() == cpu)
+		if (smp_processor_id() == cpu) {
 			set_tsk_need_resched(current);
-		else
+		}
+		else {
 			smp_send_reschedule(cpu);
+		}
 	}
 
 	TRACE_STATE("%s picked-ok:%d sched-ok:%d\n",
@@ -103,11 +115,16 @@ void litmus_reschedule(int cpu)
 
 void litmus_reschedule_local(void)
 {
-	if (is_in_sched_state(TASK_PICKED))
+	if (is_in_sched_state(TASK_PICKED)) {
 		set_sched_state(PICKED_WRONG_TASK);
+
+		TRACE_CUR("cpu %d: transitioned to PICKED_WRONG_TASK\n", smp_processor_id());
+	}
 	else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) {
 		set_sched_state(WILL_SCHEDULE);
 		set_tsk_need_resched(current);
+
+		TRACE_CUR("cpu %d: transitioned to WILL_SCHEDULE\n", smp_processor_id());
 	}
 }
 
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 5b8ca6698423..270e06c20bbf 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -167,6 +167,7 @@ struct tasklet_head gsnedf_pending_tasklets;
  * TRACE() log.
 #define WANT_ALL_SCHED_EVENTS
  */
+//#define WANT_ALL_SCHED_EVENTS
 
 static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
 {
@@ -209,8 +210,17 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
 	struct task_struct* tmp;
 	int on_cpu;
 
+	//int print = (linked != NULL || entry->linked != NULL);
+
 	BUG_ON(linked && !is_realtime(linked));
 
+	/*
+	if (print) {
+		TRACE_CUR("linked = %s/%d\n", (linked) ? linked->comm : "(nil)", (linked)? linked->pid : 0);
+		TRACE_CUR("entry->linked = %s/%d\n", (entry->linked) ? entry->linked->comm : "(nil)", (entry->linked)? entry->linked->pid : 0);
+	}
+	*/
+
 	/* Currently linked task is set to be unlinked. */
 	if (entry->linked) {
 		entry->linked->rt_param.linked_on = NO_CPU;
@@ -246,12 +256,18 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
 			linked->rt_param.linked_on = entry->cpu;
 	}
 	entry->linked = linked;
-#ifdef WANT_ALL_SCHED_EVENTS
-	if (linked)
-		TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
-	else
-		TRACE("NULL linked to %d.\n", entry->cpu);
-#endif
+
+	/*
+	if (print) {
+		//#ifdef WANT_ALL_SCHED_EVENTS
+		if (linked)
+			TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
+		else
+			TRACE("NULL linked to %d.\n", entry->cpu);
+		//#endif
+	}
+	*/
+
 	update_cpu_position(entry);
 }
 
@@ -297,36 +313,19 @@ static noinline void requeue(struct task_struct* task)
 	BUG_ON(is_queued(task));
 
 	if (is_released(task, litmus_clock())) {
-		
+
 		if (unlikely(tsk_rt(task)->is_aux_task && !is_running(task))) {
 			/* aux_task probably transitioned to real-time while it was blocked */
 			TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid);
+			unlink(task); /* really needed? */
 		}
 		else {
 			__add_ready(&gsnedf, task);
-			
-#if 0
-			if (tsk_rt(task)->has_aux_tasks) {
-				
-				TRACE_CUR("%s/%d is ready and has aux tasks.\n", task->comm, task->pid);
-				/* allow it's prio inheritance to act on aux threads */
-				enable_aux_task_owner(task);
-			}
-#endif
 		}
 	}
 	else {
 		/* it has got to wait */
 		add_release(&gsnedf, task);
-		
-#if 0
-		if (tsk_rt(task)->has_aux_tasks) {
-			
-			TRACE_CUR("%s/%d is waiting for release and has aux tasks.\n", task->comm, task->pid);
-			/* prevent prio inheritance from acting while it's not ready */
-			disable_aux_task_owner(task);
-		}
-#endif
 	}
 }
 
@@ -368,7 +367,8 @@ static void check_for_preemptions(void)
 						&per_cpu(gsnedf_cpu_entries, task_cpu(task)));
 			if (affinity)
 				last = affinity;
-			else if (requeue_preempted_job(last->linked))
+
+			if (requeue_preempted_job(last->linked))
 				requeue(last->linked);
 		}
 #else
@@ -393,45 +393,11 @@ static noinline void gsnedf_job_arrival(struct task_struct* task)
 static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
 {
 	unsigned long flags;
-	//struct bheap_node* node;
 
 	raw_spin_lock_irqsave(&gsnedf_lock, flags);
 
-#if 0
-	node = tasks->head;
-	while(node) {
-		struct task_struct *task = bheap2task(node);
-		
-		if (tsk_rt(task)->has_aux_tasks) {
-			
-			TRACE_CUR("%s/%d is ready and has aux tasks.\n", task->comm, task->pid);
-			
-			/* allow it's prio inheritance to act on aux threads */
-			enable_aux_task_owner(task);
-		}
-		
-		/* pre-order sub-tree traversal */
-		if (node->child) {
-			/* go down */
-			node = node->child;
-		}
-		else if(node->parent && node->parent->next) {
-			/* go up a level and across */
-			node = node->parent->next;
-		}
-		else if(!node->parent && node->next) {
-			/* go to the next binomial tree */
-			node = node->next;
-		}
-		else {
-			/* the end! */
-			node = NULL;
-		}
-	}
-#endif
-	
 	__merge_ready(rt, tasks);
-	
+
 	check_for_preemptions();
 
 	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
@@ -449,12 +415,12 @@ static noinline void job_completion(struct task_struct *t, int forced)
 #endif
 
 	TRACE_TASK(t, "job_completion().\n");
-	
+
 	/* set flags */
 	set_rt_flags(t, RT_F_SLEEP);
 	/* prepare for next period */
 	prepare_for_next_period(t);
-	
+
 	if (is_released(t, litmus_clock()))
 		sched_trace_task_release(t);
 	/* unlink */
@@ -497,6 +463,10 @@ static void gsnedf_tick(struct task_struct* t)
 			}
 		}
 	}
+
+	if(is_realtime(t)) {
+		TRACE_TASK(t, "tick %llu\n", litmus_clock());
+	}
 }
 
 
@@ -838,6 +808,8 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 	int out_of_time, signal_budget, sleep, preempt, np, exists, blocks;
 	struct task_struct* next = NULL;
 
+	//int completion = 0;
+
 #ifdef CONFIG_RELEASE_MASTER
 	/* Bail out early if we are the release master.
 	 * The release master never schedules any real-time tasks.
@@ -873,22 +845,22 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 	TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
 #endif
 
-	/*
-	if (exists)
+	if (exists) {
 		TRACE_TASK(prev,
 			   "blocks:%d out_of_time:%d signal_budget: %d np:%d sleep:%d preempt:%d "
 			   "state:%d sig:%d\n",
 			   blocks, out_of_time, signal_budget, np, sleep, preempt,
 			   prev->state, signal_pending(prev));
-	 */
+	}
 
 	if (entry->linked && preempt)
 		TRACE_TASK(prev, "will be preempted by %s/%d\n",
 			   entry->linked->comm, entry->linked->pid);
 
 	/* Send the signal that the budget has been exhausted */
-	if (signal_budget)
+	if (signal_budget) {
 		send_sigbudget(entry->scheduled);
+	}
 
 	/* If a task blocks we have no choice but to reschedule.
 	 */
@@ -919,8 +891,10 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 	 * this. Don't do a job completion if we block (can't have timers running
 	 * for blocked jobs).
 	 */
-	if (!np && (out_of_time || sleep) && !blocks)
+	if (!np && (out_of_time || sleep) && !blocks) {
 		job_completion(entry->scheduled, !sleep);
+		//completion = 1;
+	}
 
 	/* Link pending task if we became unlinked.
 	 */
@@ -953,8 +927,21 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 			next = prev;
 	}
 
+#if 0
+	if (completion) {
+		TRACE_CUR("switching away from a completion\n");
+	}
+#endif
+
 	sched_state_task_picked();
 
+#if 0
+	if (next && is_realtime(next) && tsk_rt(next)->is_aux_task && !tsk_rt(next)->inh_task) {
+		TRACE_TASK(next, "is aux with no inheritance. preventing it from actually running.\n");
+		next = NULL;
+	}
+#endif
+
 	raw_spin_unlock(&gsnedf_lock);
 
 #ifdef WANT_ALL_SCHED_EVENTS
@@ -965,7 +952,7 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 	else if (exists && !next)
 		TRACE("becomes idle at %llu.\n", litmus_clock());
 #endif
-	
+
 	return next;
 }
 
@@ -991,7 +978,7 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
 	unsigned long 		flags;
 	cpu_entry_t* 		entry;
 
-	TRACE("gsn edf: task new %d\n", t->pid);
+	TRACE("gsn edf: task new = %d on_rq = %d running = %d\n", t->pid, on_rq, running);
 
 	raw_spin_lock_irqsave(&gsnedf_lock, flags);
 
@@ -1060,11 +1047,11 @@ static void gsnedf_task_wake_up(struct task_struct *task)
 #endif
 
 	if (tsk_rt(task)->has_aux_tasks) {
-		
+
 		TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid);
 		disable_aux_task_owner(task);
 	}
-	
+
 	gsnedf_job_arrival(task);
 	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
 }
@@ -1081,11 +1068,11 @@ static void gsnedf_task_block(struct task_struct *t)
 	unlink(t);
 
 	if (tsk_rt(t)->has_aux_tasks) {
-		
+
 		TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
 		enable_aux_task_owner(t);
 	}
-	
+
 	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
 
 	BUG_ON(!is_realtime(t));
@@ -1105,17 +1092,17 @@ static void gsnedf_task_exit(struct task_struct * t)
 		exit_aux_task(t); /* cannot be called with gsnedf_lock held */
 	}
 #endif
-	
+
 	/* unlink if necessary */
 	raw_spin_lock_irqsave(&gsnedf_lock, flags);
-	
+
 #ifdef CONFIG_LITMUS_LOCKING
 	/* make sure we clean up on our way out */
 	if(tsk_rt(t)->has_aux_tasks) {
 		disable_aux_task_owner(t); /* must be called witl gsnedf_lock held */
 	}
 #endif
-	
+
 	unlink(t);
 	if (tsk_rt(t)->scheduled_on != NO_CPU) {
 		gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
@@ -1161,7 +1148,7 @@ static int __increase_priority_inheritance(struct task_struct* t,
 					prio_inh->comm, prio_inh->pid);
 		goto out;
 	}
-	
+
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	/* this sanity check allows for weaker locking in protocols */
 	/* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
@@ -1221,8 +1208,8 @@ static int __increase_priority_inheritance(struct task_struct* t,
 						 &gsnedf.ready_queue);
 				check_for_preemptions();
 			}
-			
-			
+
+
 			/* propagate to aux tasks */
 			if (tsk_rt(t)->has_aux_tasks) {
 				aux_task_owner_increase_priority(t);
@@ -1242,7 +1229,7 @@ static int __increase_priority_inheritance(struct task_struct* t,
 		success = 0;
 	}
 #endif
-	
+
 out:
 	return success;
 }
@@ -1251,7 +1238,7 @@ out:
 static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
 {
 	int success;
-	
+
 	raw_spin_lock(&gsnedf_lock);
 
 	success = __increase_priority_inheritance(t, prio_inh);
@@ -1267,7 +1254,7 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str
 #endif
 
 	raw_spin_unlock(&gsnedf_lock);
-	
+
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
 	if(tsk_rt(t)->held_gpus) {
 		int i;
@@ -1286,7 +1273,7 @@ static int __decrease_priority_inheritance(struct task_struct* t,
 											struct task_struct* prio_inh)
 {
 	int success = 1;
-	
+
 	if (prio_inh == tsk_rt(t)->inh_task) {
 		/* relationship already established. */
 		TRACE_TASK(t, "already inherits priority from %s/%d\n",
@@ -1294,7 +1281,7 @@ static int __decrease_priority_inheritance(struct task_struct* t,
 				   (prio_inh) ? prio_inh->pid : 0);
 		goto out;
 	}
-	
+
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
 #endif
@@ -1331,7 +1318,7 @@ static int __decrease_priority_inheritance(struct task_struct* t,
 			}
 			raw_spin_unlock(&gsnedf.release_lock);
 		}
-		
+
 		/* propagate to aux tasks */
 		if (tsk_rt(t)->has_aux_tasks) {
 			aux_task_owner_decrease_priority(t);
@@ -1349,7 +1336,7 @@ static int __decrease_priority_inheritance(struct task_struct* t,
 		success = 0;
 	}
 #endif
-	
+
 out:
 	return success;
 }
@@ -1358,11 +1345,11 @@ static void decrease_priority_inheritance(struct task_struct* t,
 										  struct task_struct* prio_inh)
 {
 	int success;
-	
+
 	raw_spin_lock(&gsnedf_lock);
-		
+
 	success = __decrease_priority_inheritance(t, prio_inh);
-	
+
 #ifdef CONFIG_LITMUS_SOFTIRQD
 	if(tsk_rt(t)->cur_klitirqd != NULL)
 	{
@@ -1374,7 +1361,7 @@ static void decrease_priority_inheritance(struct task_struct* t,
 #endif
 
 	raw_spin_unlock(&gsnedf_lock);
-	
+
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
 	if(tsk_rt(t)->held_gpus) {
 		int i;
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
index 9a6fe487718e..62854b576796 100644
--- a/litmus/sched_litmus.c
+++ b/litmus/sched_litmus.c
@@ -177,8 +177,10 @@ static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
 		litmus->task_wake_up(p);
 
 		rq->litmus.nr_running++;
-	} else
+	} else {
 		TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
+		//WARN_ON(1);
+	}
 }
 
 static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index d24c9167cff8..f9423861eb1f 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -31,11 +31,19 @@ void preempt_if_preemptable(struct task_struct* t, int cpu)
 
 	int reschedule = 0;
 
-	if (!t)
+	TRACE_CUR("preempt_if_preemptable: %s/%d\n",
+				(t) ? t->comm : "(nil)",
+				(t) ? t->pid : 0);
+
+	if (!t) {
+		TRACE_CUR("unconditionally reshcedule\n");
 		/* move non-real-time task out of the way */
 		reschedule = 1;
+	}
 	else {
 		if (smp_processor_id() == cpu) {
+			TRACE_CUR("preempt local cpu.\n");
+
 			/* local CPU case */
 			/* check if we need to poke userspace */
 			if (is_user_np(t))
@@ -47,14 +55,22 @@ void preempt_if_preemptable(struct task_struct* t, int cpu)
 				 * currently-executing task */
 				reschedule = 1;
 		} else {
+			int is_knp = is_kernel_np(t);
+			int reqexit = request_exit_np_atomic(t);
+			TRACE_CUR("preempt remote cpu: isknp = %d  reqexit = %d\n", is_knp, reqexit);
+
 			/* Remote CPU case.  Only notify if it's not a kernel
 			 * NP section and if we didn't set the userspace
 			 * flag. */
-			reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
+			//reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
+			reschedule = !(is_knp || reqexit);
 		}
 	}
-	if (likely(reschedule))
+
+	if (likely(reschedule)) {
+		TRACE_CUR("calling litmus_reschedule()\n");
 		litmus_reschedule(cpu);
+	}
 }
 
 
-- 
cgit v1.2.2


From c58a74c8ad2d2b1b01be12afb9bac58dfef0d16a Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Sun, 16 Sep 2012 18:29:36 -0400
Subject: Added CONFIG_REALTIME_AUX_TASKS option

Auxillary task features were enabled by CONFIG_LITMUS_LOCKING.
Made auxillary tasks a seperate feature that depends upon
CONFIG_LITMUS_LOCKING.
---
 include/linux/sched.h      |  2 ++
 include/litmus/rt_param.h  |  4 +++-
 include/litmus/unistd_32.h |  2 +-
 include/litmus/unistd_64.h |  6 +++---
 litmus/Kconfig             | 31 +++++++++++++++++++++++++++++++
 litmus/Makefile            |  3 ++-
 litmus/aux_tasks.c         | 23 ++++++++++-------------
 litmus/edf_common.c        | 29 ++++++++++++++++++-----------
 litmus/litmus.c            |  8 ++------
 litmus/sched_gsn_edf.c     | 26 ++++++++++++++++++--------
 10 files changed, 90 insertions(+), 44 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5d1c041be809..d580959f9f5c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1534,7 +1534,9 @@ struct task_struct {
 
 	/*** LITMUS RT parameters and state ***/
 	struct rt_param rt_param;
+#ifdef CONFIG_REALTIME_AUX_TASKS
 	struct aux_data aux_data;
+#endif
 	/*****/
 
 	/* references to PI semaphores, etc. */
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index c45ba23d7650..8b9e14c461dc 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -285,7 +285,7 @@ struct rt_param {
 #endif
 
 
-#ifdef CONFIG_LITMUS_LOCKING
+#ifdef CONFIG_REALTIME_AUX_TASKS
 	unsigned int		is_aux_task:1;
 	unsigned int		has_aux_tasks:1;
 
@@ -358,12 +358,14 @@ struct rt_param {
 	struct control_page * ctrl_page;
 };
 
+#ifdef CONFIG_REALTIME_AUX_TASKS
 struct aux_data
 {
 	struct list_head	aux_tasks;
 	struct binheap		aux_task_owners;
 	unsigned int		initialized:1;
 };
+#endif
 
 /*	Possible RT flags	*/
 #define RT_F_RUNNING		0x00000000
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
index 4fd92956d13f..c86b743408ed 100644
--- a/include/litmus/unistd_32.h
+++ b/include/litmus/unistd_32.h
@@ -21,6 +21,6 @@
 #define __NR_litmus_dgl_unlock	__LSC(13)
 #define __NR_register_nv_device			__LSC(14)
 
-#define __NR_slave_non_rt_threads	_LSC(15)
+#define __NR_set_aux_tasks		_LSC(15)
 
 #define NR_litmus_syscalls 16
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index abb45c181e8e..3825bc129dbd 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -33,10 +33,10 @@ __SYSCALL(__NR_null_call, sys_null_call)
 __SYSCALL(__NR_litmus_dgl_lock, sys_litmus_dgl_lock)
 #define __NR_litmus_dgl_unlock		__LSC(13)
 __SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock)
-#define __NR_register_nv_device			__LSC(14)
+#define __NR_register_nv_device		__LSC(14)
 __SYSCALL(__NR_register_nv_device, sys_register_nv_device)
 
-#define __NR_slave_non_rt_threads	__LSC(15)
-__SYSCALL(__NR_slave_non_rt_threads, sys_slave_non_rt_threads)
+#define __NR_set_aux_tasks			__LSC(15)
+__SYSCALL(__NR_set_aux_tasks, sys_set_aux_tasks)
 
 #define NR_litmus_syscalls 16
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 95e0671e2aec..c5dbc4a176ae 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -34,6 +34,37 @@ config RELEASE_MASTER
 	   (http://www.cs.unc.edu/~anderson/papers.html).
            Currently only supported by GSN-EDF.
 
+config REALTIME_AUX_TASKS
+	bool "Real-Time Auxillary Tasks"
+	depends on LITMUS_LOCKING
+	default n
+	help
+		Adds a system call that forces all non-real-time threads in a process
+		to become auxillary real-time tasks. These tasks inherit the priority of
+		the highest-prio *BLOCKED* real-time task (non-auxillary) in the process.
+		This allows the integration of COTS code that has background helper threads
+		used primarily for message passing and synchronization. If these
+		background threads are NOT real-time scheduled, then unbounded priority
+		inversions may occur if a real-time task blocks on a non-real-time thread.
+
+		Beware of the following pitfalls:
+		  1) Auxillary threads should not be CPU intensive. They should mostly
+		     block on mutexes and condition variables. Violating this will
+			 likely prevent meaningful analysis.
+		  2) Since there may be more than one auxillary thread per process,
+		     priority inversions may occur with respect to single-threaded
+			 task models if/when one of threads are scheduled simultanously
+			 with another of the same identity.
+		  3) Busy-wait deadlock is likely between normal real-time tasks and
+		     auxillary tasks synchronize using _preemptive_ spinlocks that do
+			 not use priority inheritance.
+
+		These pitfalls are mitgated by the fact that auxillary tasks only
+		inherit priorities from blocked tasks (Blocking signifies that the
+		blocked task _may_ be waiting on an auxillary task to perform some
+		work.). Futher, auxillary tasks without an inherited priority are
+		_always_ scheduled with a priority less than any normal real-time task!!
+
 endmenu
 
 menu "Real-Time Synchronization"
diff --git a/litmus/Makefile b/litmus/Makefile
index f2dd7be7ae4a..67d8b8ee72bc 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -18,6 +18,7 @@ obj-y     = sched_plugin.o litmus.o \
 	    bheap.o \
 	    binheap.o \
 	    ctrldev.o \
+		aux_tasks.o \
 	    sched_gsn_edf.o \
 	    sched_psn_edf.o \
 	    sched_pfp.o
@@ -31,7 +32,7 @@ obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
 obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
 obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
 
-obj-$(CONFIG_LITMUS_LOCKING) += aux_tasks.o kfmlp_lock.o
+obj-$(CONFIG_LITMUS_LOCKING) += kfmlp_lock.o
 obj-$(CONFIG_LITMUS_NESTED_LOCKING) += rsm_lock.o ikglp_lock.o
 obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
 obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c
index 5057137bbbea..b0617accdf7f 100644
--- a/litmus/aux_tasks.c
+++ b/litmus/aux_tasks.c
@@ -1,8 +1,8 @@
-#ifdef CONFIG_LITMUS_LOCKING
-
 #include <litmus/sched_plugin.h>
 #include <litmus/trace.h>
 #include <litmus/litmus.h>
+
+#ifdef CONFIG_REALTIME_AUX_TASKS
 #include <litmus/rt_param.h>
 #include <litmus/aux_tasks.h>
 
@@ -23,14 +23,11 @@ static int admit_aux_task(struct task_struct *t)
 	 * fail-safe.
 	 */
 	struct rt_task tp = {
-		//.period = MAGIC_AUX_TASK_PERIOD,
-		//.relative_deadline = MAGIC_AUX_TASK_PERIOD,
-		.period = 1000000,   /* has to wait 1 ms before it can run again once it has exhausted budget */
+		.period = 1000000,   /* 1ms */
 		.relative_deadline = 1000000,
 		.exec_cost = 1000000, /* allow full utilization */
 		.phase = 0,
 		.cpu = task_cpu(leader),  /* take CPU of group leader */
-		//.budget_policy = NO_ENFORCEMENT,
 		.budget_policy = QUANTUM_ENFORCEMENT,
 		.budget_signal_policy = NO_SIGNALS,
 		.cls = RT_CLASS_BEST_EFFORT
@@ -280,7 +277,7 @@ static int aux_task_owner_max_priority_order(struct binheap_node *a,
 }
 
 
-static long __do_enable_slave_non_rt_threads(void)
+static long __do_enable_aux_tasks(void)
 {
 	long retval = 0;
 	struct task_struct *leader;
@@ -344,7 +341,7 @@ static long __do_enable_slave_non_rt_threads(void)
 	return retval;
 }
 
-static long __do_disable_slave_non_rt_threads(void)
+static long __do_disable_aux_tasks(void)
 {
 	long retval = 0;
 	struct task_struct *leader;
@@ -385,17 +382,17 @@ static long __do_disable_slave_non_rt_threads(void)
 	return retval;
 }
 
-asmlinkage long sys_slave_non_rt_threads(int enable)
+asmlinkage long sys_set_aux_tasks(int enable)
 {
 	long retval;
 
 	read_lock_irq(&tasklist_lock);
 
 	if (enable) {
-		retval = __do_enable_slave_non_rt_threads();
+		retval = __do_enable_aux_tasks();
 	}
 	else {
-		retval = __do_disable_slave_non_rt_threads();
+		retval = __do_disable_aux_tasks();
 	}
 
 	read_unlock_irq(&tasklist_lock);
@@ -405,9 +402,9 @@ asmlinkage long sys_slave_non_rt_threads(int enable)
 
 #else
 
-asmlinkage long sys_slave_non_rt_tasks(int enable)
+asmlinkage long sys_set_aux_tasks(int enable)
 {
-	printk("Unsupported. Recompile with CONFIG_LITMUS_LOCKING.\n");
+	printk("Unsupported. Recompile with CONFIG_REALTIME_AUX_TASKS.\n");
 	return -EINVAL;
 }
 
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index ca06f6ec103e..7e0d3a5d0c4d 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -22,7 +22,7 @@
 #include <litmus/fpmath.h>
 #endif
 
-//#ifdef CONFIG_EDF_TIE_BREAK_HASH
+#if defined(CONFIG_EDF_TIE_BREAK_HASH) || defined(CONFIG_REALTIME_AUX_TASKS)
 #include <linux/hash.h>
 static inline long edf_hash(struct task_struct *t)
 {
@@ -41,8 +41,9 @@ static inline long edf_hash(struct task_struct *t)
 	 */
 	return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
 }
-//#endif
+#endif
 
+#ifdef CONFIG_REALTIME_AUX_TASKS
 int aux_tie_break(struct task_struct *first, struct task_struct *second)
 {
 	long fhash = edf_hash(first);
@@ -57,6 +58,7 @@ int aux_tie_break(struct task_struct *first, struct task_struct *second)
 	}
 	return 0;
 }
+#endif
 
 
 /* edf_higher_prio -  returns true if first has a higher EDF priority
@@ -75,11 +77,6 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 	struct task_struct *first_task = first;
 	struct task_struct *second_task = second;
 
-	int first_lo_aux;
-	int second_lo_aux;
-	int first_hi_aux;
-	int second_hi_aux;
-
 	/* There is no point in comparing a task to itself. */
 	if (first && first == second) {
 		TRACE_CUR("WARNING: pointless edf priority comparison: %s/%d\n", first->comm, first->pid);
@@ -93,8 +90,14 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 		return first && !second;
 	}
 
-#ifdef CONFIG_LITMUS_LOCKING
+#ifdef CONFIG_REALTIME_AUX_TASKS
+	{
+	/* statically prioritize all auxillary tasks that have no inheritance
+	 * below all other regular real-time tasks.
+	 */
 
+	int first_lo_aux, second_lo_aux;
+	int first_hi_aux, second_hi_aux;
 	first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task;
 	second_lo_aux = second->rt_param.is_aux_task && !second->rt_param.inh_task;
 
@@ -120,8 +123,10 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 		TRACE_CUR("hi aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_hi_tie_break);
 		return aux_hi_tie_break;
 	}
+	}
+#endif
 
-
+#ifdef CONFIG_LITMUS_LOCKING
 	/* Check for EFFECTIVE priorities. Change task
 	 * used for comparison in such a case.
 	 */
@@ -233,11 +238,13 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 					return 1;
 				}
 #endif
+
+#ifdef CONFIG_REALTIME_AUX_TASKS
+				/* is this dead code? */
 				if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) {
-					TRACE_CUR("AUX BREAK!\n");
 					return 1;
 				}
-
+#endif
 
 				/* Something could be wrong if you get this far. */
 				if (unlikely(first->rt_param.inh_task ==
diff --git a/litmus/litmus.c b/litmus/litmus.c
index e2bf2a7ad01b..d368202ab8c3 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -25,7 +25,7 @@
 #include <litmus/nvidia_info.h>
 #endif
 
-#ifdef CONFIG_LITMUS_LOCKING
+#ifdef CONFIG_REALTIME_AUX_TASKS
 #include <litmus/aux_tasks.h>
 #endif
 
@@ -413,7 +413,7 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 	/* Cleanup everything else. */
 	memset(&p->rt_param, 0, sizeof(p->rt_param));
 
-#ifdef CONFIG_LITMUS_LOCKING
+#ifdef CONFIG_REALTIME_AUX_TASKS
 	/* also clear out the aux_data. the !restore case is only called on
 	 * fork (initial thread creation). */
 	if (!restore)
@@ -623,10 +623,6 @@ void litmus_fork(struct task_struct* p)
 		tsk_rt(p)->ctrl_page = NULL;
 
 		reinit_litmus_state(p, 0);
-
-		/* still don't inherit any parental parameters */
-		//memset(&p->rt_param, 0, sizeof(p->rt_param));
-		//memset(&p->aux_data, 0, sizeof(p->aux_data));
 	}
 
 	/* od tables are never inherited across a fork */
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 270e06c20bbf..5fc330f14a0e 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -29,7 +29,6 @@
 
 #ifdef CONFIG_LITMUS_LOCKING
 #include <litmus/kfmlp_lock.h>
-#include <litmus/aux_tasks.h>
 #endif
 
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
@@ -41,6 +40,10 @@
 #include <litmus/affinity.h>
 #endif
 
+#ifdef CONFIG_REALTIME_AUX_TASKS
+#include <litmus/aux_tasks.h>
+#endif
+
 #ifdef CONFIG_LITMUS_SOFTIRQD
 #include <litmus/litmus_softirq.h>
 #endif
@@ -313,15 +316,15 @@ static noinline void requeue(struct task_struct* task)
 	BUG_ON(is_queued(task));
 
 	if (is_released(task, litmus_clock())) {
-
+#ifdef CONFIG_REALTIME_AUX_TASKS
 		if (unlikely(tsk_rt(task)->is_aux_task && !is_running(task))) {
 			/* aux_task probably transitioned to real-time while it was blocked */
 			TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid);
 			unlink(task); /* really needed? */
 		}
-		else {
+		else
+#endif
 			__add_ready(&gsnedf, task);
-		}
 	}
 	else {
 		/* it has got to wait */
@@ -1046,11 +1049,12 @@ static void gsnedf_task_wake_up(struct task_struct *task)
 	set_rt_flags(task, RT_F_RUNNING);
 #endif
 
+#ifdef CONFIG_REALTIME_AUX_TASKS
 	if (tsk_rt(task)->has_aux_tasks) {
-
 		TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid);
 		disable_aux_task_owner(task);
 	}
+#endif
 
 	gsnedf_job_arrival(task);
 	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
@@ -1067,11 +1071,13 @@ static void gsnedf_task_block(struct task_struct *t)
 
 	unlink(t);
 
+#ifdef CONFIG_REALTIME_AUX_TASKS
 	if (tsk_rt(t)->has_aux_tasks) {
 
 		TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
 		enable_aux_task_owner(t);
 	}
+#endif
 
 	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
 
@@ -1087,7 +1093,7 @@ static void gsnedf_task_exit(struct task_struct * t)
 	gsnedf_change_prio_pai_tasklet(t, NULL);
 #endif
 
-#ifdef CONFIG_LITMUS_LOCKING
+#ifdef CONFIG_REALTIME_AUX_TASKS
 	if (tsk_rt(t)->is_aux_task) {
 		exit_aux_task(t); /* cannot be called with gsnedf_lock held */
 	}
@@ -1096,7 +1102,7 @@ static void gsnedf_task_exit(struct task_struct * t)
 	/* unlink if necessary */
 	raw_spin_lock_irqsave(&gsnedf_lock, flags);
 
-#ifdef CONFIG_LITMUS_LOCKING
+#ifdef CONFIG_REALTIME_AUX_TASKS
 	/* make sure we clean up on our way out */
 	if(tsk_rt(t)->has_aux_tasks) {
 		disable_aux_task_owner(t); /* must be called witl gsnedf_lock held */
@@ -1209,11 +1215,12 @@ static int __increase_priority_inheritance(struct task_struct* t,
 				check_for_preemptions();
 			}
 
-
+#ifdef CONFIG_REALTIME_AUX_TASKS
 			/* propagate to aux tasks */
 			if (tsk_rt(t)->has_aux_tasks) {
 				aux_task_owner_increase_priority(t);
 			}
+#endif
 		}
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	}
@@ -1319,10 +1326,13 @@ static int __decrease_priority_inheritance(struct task_struct* t,
 			raw_spin_unlock(&gsnedf.release_lock);
 		}
 
+#ifdef CONFIG_REALTIME_AUX_TASKS
 		/* propagate to aux tasks */
 		if (tsk_rt(t)->has_aux_tasks) {
 			aux_task_owner_decrease_priority(t);
 		}
+#endif
+
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	}
 	else {
-- 
cgit v1.2.2


From 6b3b85da89aee11ed47369833470b9282dd5994f Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Sun, 16 Sep 2012 18:45:05 -0400
Subject: C-EDF support for auxillary tasks.

Extended auxillary task support to C-EDF. Modeld after G-EDF.
---
 litmus/sched_cedf.c    | 85 +++++++++++++++++++++++++++++++++++++++++++++++---
 litmus/sched_gsn_edf.c |  6 ++--
 2 files changed, 84 insertions(+), 7 deletions(-)

diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index f030f027b486..f5c9807090a1 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -58,6 +58,10 @@
 #include <litmus/affinity.h>
 #endif
 
+#ifdef CONFIG_REALTIME_AUX_TASKS
+#include <litmus/aux_tasks.h>
+#endif
+
 /* to configure the cluster size */
 #include <litmus/litmus_proc.h>
 
@@ -313,7 +317,15 @@ static noinline void requeue(struct task_struct* task)
 	BUG_ON(is_queued(task));
 
 	if (is_released(task, litmus_clock()))
-		__add_ready(&cluster->domain, task);
+#ifdef CONFIG_REALTIME_AUX_TASKS
+		if (unlikely(tsk_rt(task)->is_aux_task && !is_running(task))) {
+			/* aux_task probably transitioned to real-time while it was blocked */
+			TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid);
+			unlink(task); /* really needed? */
+		}
+		else
+#endif
+			__add_ready(&cluster->domain, task);
 	else {
 		/* it has got to wait */
 		add_release(&cluster->domain, task);
@@ -1019,9 +1031,14 @@ static void cedf_task_wake_up(struct task_struct *task)
 	set_rt_flags(task, RT_F_RUNNING);  // periodic model
 #endif
 
-	if(tsk_rt(task)->linked_on == NO_CPU)
-		cedf_job_arrival(task);
+#ifdef CONFIG_REALTIME_AUX_TASKS
+	if (tsk_rt(task)->has_aux_tasks) {
+		TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid);
+		disable_aux_task_owner(task);
+	}
+#endif
 
+	cedf_job_arrival(task);
 	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
 }
 
@@ -1036,7 +1053,17 @@ static void cedf_task_block(struct task_struct *t)
 
 	/* unlink if necessary */
 	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+
 	unlink(t);
+
+#ifdef CONFIG_REALTIME_AUX_TASKS
+	if (tsk_rt(t)->has_aux_tasks) {
+
+		TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
+		enable_aux_task_owner(t);
+	}
+#endif
+
 	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
 
 	BUG_ON(!is_realtime(t));
@@ -1052,8 +1079,22 @@ static void cedf_task_exit(struct task_struct * t)
 	cedf_change_prio_pai_tasklet(t, NULL);
 #endif
 
+#ifdef CONFIG_REALTIME_AUX_TASKS
+	if (tsk_rt(t)->is_aux_task) {
+		exit_aux_task(t); /* cannot be called with gsnedf_lock held */
+	}
+#endif
+
 	/* unlink if necessary */
 	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+
+#ifdef CONFIG_REALTIME_AUX_TASKS
+	/* make sure we clean up on our way out */
+	if(tsk_rt(t)->has_aux_tasks) {
+		disable_aux_task_owner(t); /* must be called witl gsnedf_lock held */
+	}
+#endif
+
 	unlink(t);
 	if (tsk_rt(t)->scheduled_on != NO_CPU) {
 		cpu_entry_t *cpu;
@@ -1092,8 +1133,16 @@ static int __increase_priority_inheritance(struct task_struct* t,
 	int success = 1;
 	int linked_on;
 	int check_preempt = 0;
+	cedf_domain_t* cluster;
 
-	cedf_domain_t* cluster = task_cpu_cluster(t);
+	if (prio_inh && prio_inh == effective_priority(t)) {
+		/* relationship already established. */
+		TRACE_TASK(t, "already has effective priority of %s/%d\n",
+				   prio_inh->comm, prio_inh->pid);
+		goto out;
+	}
+
+	cluster = task_cpu_cluster(t);
 
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	/* this sanity check allows for weaker locking in protocols */
@@ -1155,6 +1204,13 @@ static int __increase_priority_inheritance(struct task_struct* t,
 								  &cluster->domain.ready_queue);
 				check_for_preemptions(cluster);
 			}
+
+#ifdef CONFIG_REALTIME_AUX_TASKS
+			/* propagate to aux tasks */
+			if (tsk_rt(t)->has_aux_tasks) {
+				aux_task_owner_increase_priority(t);
+			}
+#endif
 		}
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	}
@@ -1170,6 +1226,8 @@ static int __increase_priority_inheritance(struct task_struct* t,
 		success = 0;
 	}
 #endif
+
+out:
 	return success;
 }
 
@@ -1211,6 +1269,15 @@ static int __decrease_priority_inheritance(struct task_struct* t,
 											struct task_struct* prio_inh)
 {
 	int success = 1;
+
+	if (prio_inh == tsk_rt(t)->inh_task) {
+		/* relationship already established. */
+		TRACE_TASK(t, "already inherits priority from %s/%d\n",
+				   (prio_inh) ? prio_inh->comm : "(nil)",
+				   (prio_inh) ? prio_inh->pid : 0);
+		goto out;
+	}
+
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
 #endif
@@ -1248,6 +1315,14 @@ static int __decrease_priority_inheritance(struct task_struct* t,
 			}
 			raw_spin_unlock(&cluster->domain.release_lock);
 		}
+
+#ifdef CONFIG_REALTIME_AUX_TASKS
+		/* propagate to aux tasks */
+		if (tsk_rt(t)->has_aux_tasks) {
+			aux_task_owner_decrease_priority(t);
+		}
+#endif
+
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	}
 	else {
@@ -1261,6 +1336,8 @@ static int __decrease_priority_inheritance(struct task_struct* t,
 		success = 0;
 	}
 #endif
+
+out:
 	return success;
 }
 
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 5fc330f14a0e..ed9b4697a5a2 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -170,7 +170,6 @@ struct tasklet_head gsnedf_pending_tasklets;
  * TRACE() log.
 #define WANT_ALL_SCHED_EVENTS
  */
-//#define WANT_ALL_SCHED_EVENTS
 
 static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
 {
@@ -370,8 +369,7 @@ static void check_for_preemptions(void)
 						&per_cpu(gsnedf_cpu_entries, task_cpu(task)));
 			if (affinity)
 				last = affinity;
-
-			if (requeue_preempted_job(last->linked))
+			else if (requeue_preempted_job(last->linked))
 				requeue(last->linked);
 		}
 #else
@@ -467,9 +465,11 @@ static void gsnedf_tick(struct task_struct* t)
 		}
 	}
 
+	/*
 	if(is_realtime(t)) {
 		TRACE_TASK(t, "tick %llu\n", litmus_clock());
 	}
+	 */
 }
 
 
-- 
cgit v1.2.2


From 2d1fe1a20a9f2784ec4172429f31c228274ed8ac Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Sun, 16 Sep 2012 20:05:32 -0400
Subject: Add support for CUDA 5.0 (release candidate)

---
 litmus/Kconfig       | 10 ++++++++--
 litmus/nvidia_info.c |  7 ++++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/litmus/Kconfig b/litmus/Kconfig
index c5dbc4a176ae..b28fe2c09acd 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -435,11 +435,17 @@ config NV_MAX_SIMULT_USERS
 
 choice
 	  prompt "CUDA/Driver Version Support"
-	  default CUDA_4_0
+	  default CUDA_5_0
 	  depends on LITMUS_NVIDIA
 	  help
 	  	Select the version of CUDA/driver to support.
-	
+
+config CUDA_5_0
+	  bool "CUDA 5.0"
+	  depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS
+	  help
+	    Support CUDA 5.0 RCx (dev. driver version: x86_64-304.33)
+
 config CUDA_4_0
 	  bool "CUDA 4.0"
 	  depends on LITMUS_NVIDIA
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index b6ead58802f6..d04c6efa5f05 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -31,6 +31,9 @@ typedef union
 typedef struct
 {
     NvU64 address;
+#ifdef CONFIG_CUDA_5_0
+	NvU64 strapped_size;
+#endif
     NvU64 size;
     NvU32 offset;
     NvU32 *map;
@@ -42,7 +45,9 @@ typedef struct
     void  *priv;                    /* private data */
     void  *os_state;                /* os-specific device state */
 
+#ifndef CONFIG_CUDA_5_0
     int    rmInitialized;
+#endif
     int    flags;
 
     /* PCI config info */
@@ -101,7 +106,7 @@ typedef struct litmus_nv_linux_state_s {
     void *pci_cfgchk_sp;
     void *isr_bh_sp;
 
-#ifdef CONFIG_CUDA_4_0
+#if defined(CONFIG_CUDA_4_0) || defined(CONFIG_CUDA_5_0)
 	char registry_keys[512];
 #endif
 
-- 
cgit v1.2.2


From 0fb745065f08796fe4f17acb9b9edacc1e374842 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Mon, 17 Sep 2012 09:57:37 -0400
Subject: Do processor state transitions in schedule_tail().

Fixes a bug in Litmus where processor scheduling states
could become corrupted. Corruption can occur when a
just-forked thread is externally forced to be scheduled
by SCHED_LITMUS before this just-forked thread can complete
post-fork processing. Specifically, before schedule_tail()
has completed.
---
 kernel/sched.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 2229d0deec4b..65aba7ec564d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3163,16 +3163,26 @@ static inline void post_schedule(struct rq *rq)
 asmlinkage void schedule_tail(struct task_struct *prev)
 	__releases(rq->lock)
 {
-	struct rq *rq = this_rq();
-
+	struct rq *rq;
+	
+	preempt_disable();
+	
+	rq = this_rq();
 	finish_task_switch(rq, prev);
 
+	sched_trace_task_switch_to(current);
+
 	/*
 	 * FIXME: do we need to worry about rq being invalidated by the
 	 * task_switch?
 	 */
 	post_schedule(rq);
 
+	if (sched_state_validate_switch())
+		litmus_reschedule_local();
+
+	preempt_enable();
+
 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
 	/* In this case, finish_task_switch does not reenable preemption */
 	preempt_enable();
-- 
cgit v1.2.2


From ba54b1096870fba6e3bbb99aafc713e76b747353 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Mon, 17 Sep 2012 19:31:04 -0400
Subject: Fixed three bugs with aux threads and nested locks

Fixes two bugs with nested locks:
1) List of aux threads could become corrupted.
  -- moved modifications to be within scheduler lock.

2) Fixed bad EDF comparison ordering that could lead
to schedule thrashing in an infinite loop.

3) Prevent aux threads from inheriting a priority from
a task that is blocked on a real-time litmus lock.
(since the aux threads can't possibly hold these locks,
we don't have to worry about inheritance.)
---
 include/litmus/locking.h  |  3 +++
 include/litmus/rt_param.h |  1 +
 litmus/aux_tasks.c        | 58 ++++++++++++++++++++++++++++++-----------------
 litmus/edf_common.c       | 10 ++++++++
 litmus/ikglp_lock.c       |  2 +-
 litmus/kfmlp_lock.c       |  2 +-
 litmus/locking.c          | 29 ++++++++++++++++++++++++
 litmus/rsm_lock.c         |  2 +-
 litmus/sched_cedf.c       | 17 ++++++--------
 litmus/sched_gsn_edf.c    | 19 +++++++---------
 10 files changed, 98 insertions(+), 45 deletions(-)

diff --git a/include/litmus/locking.h b/include/litmus/locking.h
index 296bbf6f7af0..4a5f198a0407 100644
--- a/include/litmus/locking.h
+++ b/include/litmus/locking.h
@@ -156,5 +156,8 @@ struct litmus_lock_ops {
 #endif
 
 
+void suspend_for_lock(void);
+
+
 #endif
 
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 8b9e14c461dc..44f85a366574 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -288,6 +288,7 @@ struct rt_param {
 #ifdef CONFIG_REALTIME_AUX_TASKS
 	unsigned int		is_aux_task:1;
 	unsigned int		has_aux_tasks:1;
+	unsigned int		hide_from_aux_tasks:1;
 
 	struct list_head	aux_task_node;
 	struct binheap_node	aux_task_owner_node;
diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c
index b0617accdf7f..efda7dc0bd76 100644
--- a/litmus/aux_tasks.c
+++ b/litmus/aux_tasks.c
@@ -55,7 +55,7 @@ int exit_aux_task(struct task_struct *t)
 	tsk_rt(t)->is_aux_task = 0;
 
 	if (tsk_rt(t)->inh_task) {
-		litmus->decrease_prio(t, NULL);
+		litmus->__decrease_prio(t, NULL);
 	}
 
 	return retval;
@@ -114,31 +114,37 @@ int aux_task_owner_increase_priority(struct task_struct *t)
 	int retval = 0;
 	struct task_struct *leader;
 	struct task_struct *hp = NULL;
+	struct task_struct *hp_eff = NULL;
 
-	BUG_ON(!tsk_rt(t)->has_aux_tasks);
 	BUG_ON(!is_realtime(t));
-	BUG_ON(!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node));
+	BUG_ON(!tsk_rt(t)->has_aux_tasks);
 
 	leader = t->group_leader;
 
+	if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) {
+		WARN_ON(!is_running(t));
+		TRACE_CUR("aux tasks may not inherit from %s/%d in group %s/%d\n",
+						t->comm, t->pid, leader->comm, leader->pid);
+		goto out;
+	}
+
 	TRACE_CUR("task %s/%d in group %s/%d increasing priority.\n", t->comm, t->pid, leader->comm, leader->pid);
 
 	hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 					  struct task_struct, rt_param);
+	hp_eff = effective_priority(hp);
 
-	if (hp == t) {
-		goto out; // already hp, nothing to do.
+	if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */
+		binheap_decrease(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners);
 	}
 
-	binheap_decrease(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners);
-
 	hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 					  struct task_struct, rt_param);
 
-	if (hp == t) {
+	if (effective_priority(hp) != hp_eff) { /* the eff. prio. of hp has changed */
+		hp_eff = effective_priority(hp);
 		TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
-		retval = aux_tasks_increase_priority(leader,
-					   (tsk_rt(hp)->inh_task) ? tsk_rt(hp)->inh_task : hp);
+		retval = aux_tasks_increase_priority(leader, hp_eff);
 	}
 
 out:
@@ -150,30 +156,41 @@ int aux_task_owner_decrease_priority(struct task_struct *t)
 	int retval = 0;
 	struct task_struct *leader;
 	struct task_struct *hp = NULL;
-	struct task_struct *new_hp = NULL;
+	struct task_struct *hp_eff = NULL;
 
-	BUG_ON(!tsk_rt(t)->has_aux_tasks);
 	BUG_ON(!is_realtime(t));
-	BUG_ON(!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node));
+	BUG_ON(!tsk_rt(t)->has_aux_tasks);
 
 	leader = t->group_leader;
 
+	if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) {
+		WARN_ON(!is_running(t));
+		TRACE_CUR("aux tasks may not inherit from %s/%d in group %s/%d\n",
+						t->comm, t->pid, leader->comm, leader->pid);
+		goto out;
+	}
+
 	TRACE_CUR("task %s/%d in group %s/%d decresing priority.\n", t->comm, t->pid, leader->comm, leader->pid);
 
 	hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 					  struct task_struct, rt_param);
+	hp_eff = effective_priority(hp);
 	binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners);
 	binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners,
 				struct rt_param, aux_task_owner_node);
-	new_hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
-						  struct task_struct, rt_param);
 
-	if (hp == t && new_hp != t) {
-		TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
-		retval = aux_tasks_decrease_priority(leader,
-				   (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp);
+	if (hp == t) { /* t was originally the hp */
+		struct task_struct *new_hp =
+			container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
+						struct task_struct, rt_param);
+		if (effective_priority(new_hp) != hp_eff) { /* eff prio. of hp has changed */
+			hp_eff = effective_priority(new_hp);
+			TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
+			retval = aux_tasks_decrease_priority(leader, hp_eff);
+		}
 	}
 
+out:
 	return retval;
 }
 
@@ -302,8 +319,7 @@ static long __do_enable_aux_tasks(void)
 				  tsk_rt(t)->task_params.period);
 
 		/* inspect heap_node to see if it is an rt task */
-		if (tsk_rt(t)->task_params.period == 0) { //||
-			//			tsk_rt(t)->task_params.period == MAGIC_AUX_TASK_PERIOD) {
+		if (tsk_rt(t)->task_params.period == 0) {
 			if (!tsk_rt(t)->is_aux_task) {
 				TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
 				/* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 7e0d3a5d0c4d..f4881452373d 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -237,6 +237,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 					second_task->rt_param.is_proxy_thread) {
 					return 1;
 				}
+				else if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) {
 #endif
 
 #ifdef CONFIG_REALTIME_AUX_TASKS
@@ -244,6 +245,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 				if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) {
 					return 1;
 				}
+				else if (tsk_rt(first)->is_aux_task == tsk_rt(second)->is_aux_task) {
 #endif
 
 				/* Something could be wrong if you get this far. */
@@ -281,6 +283,14 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 						return 1;
 					}
 				}
+
+#ifdef CONFIG_REALTIME_AUX_TASKS
+				}
+#endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+				}
+#endif
+
 			}
 		}
 	}
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 83b708ab85cb..bd7bfc0f48ac 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -963,7 +963,7 @@ int ikglp_lock(struct litmus_lock* l)
 
 	TS_LOCK_SUSPEND;
 
-	schedule();
+	suspend_for_lock();
 
 	TS_LOCK_RESUME;
 
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index bff857ed8d4e..ab472330095d 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -267,7 +267,7 @@ int kfmlp_lock(struct litmus_lock* l)
 		 * when we wake up; we are guaranteed to have the lock since
 		 * there is only one wake up per release (or steal).
 		 */
-		schedule();
+		suspend_for_lock();
 
 
 		if(my_queue->owner == t) {
diff --git a/litmus/locking.c b/litmus/locking.c
index 12a23eb715cc..16c936ba8139 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -540,6 +540,35 @@ out:
 	return passed;
 }
 
+
+void suspend_for_lock(void)
+{
+#ifdef CONFIG_REALTIME_AUX_TASKS
+	unsigned int restore = 0;
+	struct task_struct *t = current;
+	unsigned int hide;
+
+	if (tsk_rt(t)->has_aux_tasks) {
+		/* hide from aux tasks so they can't inherit our priority when we block
+		 * for a litmus lock. inheritance is already going to a litmus lock
+		 * holder. */
+		hide = tsk_rt(t)->hide_from_aux_tasks;
+		restore = 1;
+		tsk_rt(t)->hide_from_aux_tasks = 1;
+	}
+#endif
+
+	schedule();
+
+#ifdef CONFIG_REALTIME_AUX_TASKS
+	if (restore) {
+		/* restore our state */
+		tsk_rt(t)->hide_from_aux_tasks = hide;
+	}
+#endif
+}
+
+
 #else  // CONFIG_LITMUS_LOCKING
 
 struct fdso_ops generic_lock_ops = {};
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
index 75ed87c5ed48..3dfd8ae9d221 100644
--- a/litmus/rsm_lock.c
+++ b/litmus/rsm_lock.c
@@ -289,7 +289,7 @@ int rsm_mutex_lock(struct litmus_lock* l)
 		 * there is only one wake up per release.
 		 */
 
-		schedule();
+		suspend_for_lock();
 
 		TS_LOCK_RESUME;
 
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index f5c9807090a1..6746d4d6033e 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -1032,7 +1032,7 @@ static void cedf_task_wake_up(struct task_struct *task)
 #endif
 
 #ifdef CONFIG_REALTIME_AUX_TASKS
-	if (tsk_rt(task)->has_aux_tasks) {
+	if (tsk_rt(task)->has_aux_tasks && !tsk_rt(task)->hide_from_aux_tasks) {
 		TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid);
 		disable_aux_task_owner(task);
 	}
@@ -1057,7 +1057,7 @@ static void cedf_task_block(struct task_struct *t)
 	unlink(t);
 
 #ifdef CONFIG_REALTIME_AUX_TASKS
-	if (tsk_rt(t)->has_aux_tasks) {
+	if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) {
 
 		TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
 		enable_aux_task_owner(t);
@@ -1079,19 +1079,16 @@ static void cedf_task_exit(struct task_struct * t)
 	cedf_change_prio_pai_tasklet(t, NULL);
 #endif
 
-#ifdef CONFIG_REALTIME_AUX_TASKS
-	if (tsk_rt(t)->is_aux_task) {
-		exit_aux_task(t); /* cannot be called with gsnedf_lock held */
-	}
-#endif
-
 	/* unlink if necessary */
 	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
 
 #ifdef CONFIG_REALTIME_AUX_TASKS
 	/* make sure we clean up on our way out */
-	if(tsk_rt(t)->has_aux_tasks) {
-		disable_aux_task_owner(t); /* must be called witl gsnedf_lock held */
+	if (unlikely(tsk_rt(t)->is_aux_task)) {
+		exit_aux_task(t);
+	}
+	else if(tsk_rt(t)->has_aux_tasks) {
+		disable_aux_task_owner(t);
 	}
 #endif
 
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index ed9b4697a5a2..04b189e54b03 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -1050,7 +1050,7 @@ static void gsnedf_task_wake_up(struct task_struct *task)
 #endif
 
 #ifdef CONFIG_REALTIME_AUX_TASKS
-	if (tsk_rt(task)->has_aux_tasks) {
+	if (tsk_rt(task)->has_aux_tasks && !tsk_rt(task)->hide_from_aux_tasks) {
 		TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid);
 		disable_aux_task_owner(task);
 	}
@@ -1072,7 +1072,7 @@ static void gsnedf_task_block(struct task_struct *t)
 	unlink(t);
 
 #ifdef CONFIG_REALTIME_AUX_TASKS
-	if (tsk_rt(t)->has_aux_tasks) {
+	if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) {
 
 		TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
 		enable_aux_task_owner(t);
@@ -1093,19 +1093,16 @@ static void gsnedf_task_exit(struct task_struct * t)
 	gsnedf_change_prio_pai_tasklet(t, NULL);
 #endif
 
-#ifdef CONFIG_REALTIME_AUX_TASKS
-	if (tsk_rt(t)->is_aux_task) {
-		exit_aux_task(t); /* cannot be called with gsnedf_lock held */
-	}
-#endif
-
 	/* unlink if necessary */
 	raw_spin_lock_irqsave(&gsnedf_lock, flags);
 
 #ifdef CONFIG_REALTIME_AUX_TASKS
 	/* make sure we clean up on our way out */
-	if(tsk_rt(t)->has_aux_tasks) {
-		disable_aux_task_owner(t); /* must be called witl gsnedf_lock held */
+	if (unlikely(tsk_rt(t)->is_aux_task)) {
+		exit_aux_task(t);
+	}
+	else if(tsk_rt(t)->has_aux_tasks) {
+		disable_aux_task_owner(t);
 	}
 #endif
 
@@ -1664,7 +1661,7 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
 		 * there is only one wake up per release.
 		 */
 
-		schedule();
+		suspend_for_lock();
 
 		TS_LOCK_RESUME;
 
-- 
cgit v1.2.2


From 33cb64c787070d6b60a02ea40064d717d3b9dc07 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Mon, 17 Sep 2012 19:42:28 -0400
Subject: Description of refined aux task inheritance.

---
 litmus/Kconfig | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/litmus/Kconfig b/litmus/Kconfig
index b28fe2c09acd..a32f42898148 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -41,11 +41,12 @@ config REALTIME_AUX_TASKS
 	help
 		Adds a system call that forces all non-real-time threads in a process
 		to become auxillary real-time tasks. These tasks inherit the priority of
-		the highest-prio *BLOCKED* real-time task (non-auxillary) in the process.
-		This allows the integration of COTS code that has background helper threads
-		used primarily for message passing and synchronization. If these
-		background threads are NOT real-time scheduled, then unbounded priority
-		inversions may occur if a real-time task blocks on a non-real-time thread.
+		the highest-prio *BLOCKED* (but NOT blocked on a Litmus lock) real-time
+		task (non-auxillary) in the process. This allows the integration of COTS
+		code that has background helper threads used primarily for message passing
+		and synchronization. If these background threads are NOT real-time scheduled,
+		then unbounded priority inversions may occur if a real-time task blocks on
+		a non-real-time thread.
 
 		Beware of the following pitfalls:
 		  1) Auxillary threads should not be CPU intensive. They should mostly
@@ -65,6 +66,14 @@ config REALTIME_AUX_TASKS
 		work.). Futher, auxillary tasks without an inherited priority are
 		_always_ scheduled with a priority less than any normal real-time task!!
 
+		NOTE: Aux tasks do not _directly_ inherit a priority from rt tasks that
+		are blocked on Litmus locks. Aux task should be COTS code that know nothing
+		of Litmus, so they won't hold Litmus locks. Nothing the aux task can do can
+		_directly_ unblock the rt task blocked on a Litmus lock. However, the lock
+		holder that blocks the rt task CAN block on I/O and contribute its priority
+		to the aux tasks. Aux tasks may still _indirectly_ inherit the priority of
+		the blocked rt task via the lock holder.
+
 endmenu
 
 menu "Real-Time Synchronization"
-- 
cgit v1.2.2


From 5dbf603e3ca76a3903b83aacc6bb19e6aa0d924a Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Sun, 11 Nov 2012 11:43:03 -0500
Subject: boost aux tasks unconditionally

---
 litmus/aux_tasks.c  | 26 +++++++++++++++++++++++++-
 litmus/edf_common.c | 24 ++++++++++++++++++++----
 litmus/litmus.c     |  4 ++--
 litmus/locking.c    |  4 ++++
 4 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c
index efda7dc0bd76..9572d960b46a 100644
--- a/litmus/aux_tasks.c
+++ b/litmus/aux_tasks.c
@@ -50,6 +50,7 @@ int exit_aux_task(struct task_struct *t)
 
 	TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
 
+#if 0
 	list_del(&tsk_rt(t)->aux_task_node);
 
 	tsk_rt(t)->is_aux_task = 0;
@@ -57,6 +58,7 @@ int exit_aux_task(struct task_struct *t)
 	if (tsk_rt(t)->inh_task) {
 		litmus->__decrease_prio(t, NULL);
 	}
+#endif
 
 	return retval;
 }
@@ -64,6 +66,8 @@ int exit_aux_task(struct task_struct *t)
 static int aux_tasks_increase_priority(struct task_struct *leader, struct task_struct *hp)
 {
 	int retval = 0;
+
+#if 0
 	struct list_head *pos;
 
 	TRACE_CUR("Increasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid);
@@ -81,6 +85,7 @@ static int aux_tasks_increase_priority(struct task_struct *leader, struct task_s
 		TRACE_CUR("increasing %s/%d.\n", aux->comm, aux->pid);
 		retval = litmus->__increase_prio(aux, hp);
 	}
+#endif
 
 	return retval;
 }
@@ -88,6 +93,8 @@ static int aux_tasks_increase_priority(struct task_struct *leader, struct task_s
 static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_struct *hp)
 {
 	int retval = 0;
+
+#if 0
 	struct list_head *pos;
 
 	TRACE_CUR("Decreasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid);
@@ -105,6 +112,7 @@ static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_s
 			retval = litmus->__decrease_prio(aux, hp);
 		}
 	}
+#endif
 
 	return retval;
 }
@@ -112,6 +120,8 @@ static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_s
 int aux_task_owner_increase_priority(struct task_struct *t)
 {
 	int retval = 0;
+
+#if 0
 	struct task_struct *leader;
 	struct task_struct *hp = NULL;
 	struct task_struct *hp_eff = NULL;
@@ -146,6 +156,7 @@ int aux_task_owner_increase_priority(struct task_struct *t)
 		TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
 		retval = aux_tasks_increase_priority(leader, hp_eff);
 	}
+#endif
 
 out:
 	return retval;
@@ -154,6 +165,8 @@ out:
 int aux_task_owner_decrease_priority(struct task_struct *t)
 {
 	int retval = 0;
+
+#if 0
 	struct task_struct *leader;
 	struct task_struct *hp = NULL;
 	struct task_struct *hp_eff = NULL;
@@ -189,6 +202,7 @@ int aux_task_owner_decrease_priority(struct task_struct *t)
 			retval = aux_tasks_decrease_priority(leader, hp_eff);
 		}
 	}
+#endif
 
 out:
 	return retval;
@@ -199,6 +213,8 @@ out:
 long enable_aux_task_owner(struct task_struct *t)
 {
 	long retval = 0;
+
+#if 0
 	struct task_struct *leader = t->group_leader;
 	struct task_struct *hp;
 
@@ -226,7 +242,7 @@ long enable_aux_task_owner(struct task_struct *t)
 		retval = aux_tasks_increase_priority(leader,
 					   (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
 	}
-
+#endif
 
 out:
 	return retval;
@@ -235,6 +251,8 @@ out:
 long disable_aux_task_owner(struct task_struct *t)
 {
 	long retval = 0;
+
+#if 0
 	struct task_struct *leader = t->group_leader;
 	struct task_struct *hp;
 	struct task_struct *new_hp = NULL;
@@ -273,6 +291,7 @@ long disable_aux_task_owner(struct task_struct *t)
 
 		retval = aux_tasks_decrease_priority(leader, to_inh);
 	}
+#endif
 
 out:
 	return retval;
@@ -324,7 +343,10 @@ static long __do_enable_aux_tasks(void)
 				TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
 				/* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */
 				tsk_rt(t)->is_aux_task = 1;
+
+#if 0
 				list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks);
+#endif
 
 				(void)admit_aux_task(t);
 			}
@@ -346,6 +368,7 @@ static long __do_enable_aux_tasks(void)
 	} while(t != leader);
 
 
+#if 0
 	if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
 		struct task_struct *hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 											  struct task_struct, rt_param);
@@ -353,6 +376,7 @@ static long __do_enable_aux_tasks(void)
 		retval = aux_tasks_increase_priority(leader,
 											 (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
 	}
+#endif
 
 	return retval;
 }
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index f4881452373d..916b1b4309b7 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -92,10 +92,20 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 
 #ifdef CONFIG_REALTIME_AUX_TASKS
 	{
-	/* statically prioritize all auxillary tasks that have no inheritance
-	 * below all other regular real-time tasks.
-	 */
 
+	/* run aux tasks at max priority */
+	if (first->rt_param.is_aux_task != second->rt_param.is_aux_task)
+	{
+		return (first->rt_param.is_aux_task > second->rt_param.is_aux_task);
+	}
+	else if(first->rt_param.is_aux_task && second->rt_param.is_aux_task)
+	{
+		first = first->group_leader;
+		second = second->group_leader;
+	}
+
+
+#if 0
 	int first_lo_aux, second_lo_aux;
 	int first_hi_aux, second_hi_aux;
 	first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task;
@@ -123,6 +133,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 		TRACE_CUR("hi aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_hi_tie_break);
 		return aux_hi_tie_break;
 	}
+#endif
+
 	}
 #endif
 
@@ -240,12 +252,14 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 				else if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) {
 #endif
 
+#if 0
 #ifdef CONFIG_REALTIME_AUX_TASKS
 				/* is this dead code? */
 				if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) {
 					return 1;
 				}
 				else if (tsk_rt(first)->is_aux_task == tsk_rt(second)->is_aux_task) {
+#endif
 #endif
 
 				/* Something could be wrong if you get this far. */
@@ -283,10 +297,12 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 						return 1;
 					}
 				}
-
+#if 0
 #ifdef CONFIG_REALTIME_AUX_TASKS
 				}
 #endif
+#endif
+
 #ifdef CONFIG_LITMUS_SOFTIRQD
 				}
 #endif
diff --git a/litmus/litmus.c b/litmus/litmus.c
index d368202ab8c3..953a591fad5f 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -388,8 +388,8 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 #endif
 
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
-	WARN_ON(p->rt_param.blocked_lock);
-    WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks));
+//	WARN_ON(p->rt_param.blocked_lock);
+//    WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks));
 #endif
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
diff --git a/litmus/locking.c b/litmus/locking.c
index 16c936ba8139..22f46df4308a 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -544,6 +544,7 @@ out:
 void suspend_for_lock(void)
 {
 #ifdef CONFIG_REALTIME_AUX_TASKS
+#if 0
 	unsigned int restore = 0;
 	struct task_struct *t = current;
 	unsigned int hide;
@@ -556,16 +557,19 @@ void suspend_for_lock(void)
 		restore = 1;
 		tsk_rt(t)->hide_from_aux_tasks = 1;
 	}
+#endif
 #endif
 
 	schedule();
 
 #ifdef CONFIG_REALTIME_AUX_TASKS
+#if 0
 	if (restore) {
 		/* restore our state */
 		tsk_rt(t)->hide_from_aux_tasks = hide;
 	}
 #endif
+#endif
 }
 
 
-- 
cgit v1.2.2


From dede6a6b8ce09f48295d8ba4635480c98ef85284 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Sun, 11 Nov 2012 13:10:43 -0500
Subject: improve ikglp heuristics

---
 include/litmus/ikglp_lock.h |   4 ++
 include/litmus/rt_param.h   |   5 ++
 litmus/ikglp_lock.c         | 117 +++++++++++++++++++++++++++++++++++++++++---
 litmus/litmus.c             |   5 ++
 4 files changed, 123 insertions(+), 8 deletions(-)

diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
index 0b89c8135360..9d0cd3d1904e 100644
--- a/include/litmus/ikglp_lock.h
+++ b/include/litmus/ikglp_lock.h
@@ -114,6 +114,7 @@ struct ikglp_queue_info
 	struct fifo_queue* q;
 	lt_t estimated_len;
 	int *nr_cur_users;
+	int *nr_aff_users;
 };
 
 struct ikglp_affinity_ops
@@ -128,6 +129,8 @@ struct ikglp_affinity_ops
 	void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);	// replica acquired
 	void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);		// replica freed
 	int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq);		// convert a replica # to a GPU (includes offsets and simult user folding)
+
+	int (*notify_exit)(struct ikglp_affinity* aff, struct task_struct* t);
 };
 
 struct ikglp_affinity
@@ -136,6 +139,7 @@ struct ikglp_affinity
 	struct ikglp_affinity_ops *ops;
 	struct ikglp_queue_info *q_info;
 	int *nr_cur_users_on_rsrc;
+	int *nr_aff_on_rsrc;
 	int offset;
 	int nr_simult;
 	int nr_rsrc;
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 44f85a366574..cb7c3ac64339 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -188,6 +188,8 @@ typedef struct feedback_est{
 
 #define AVG_EST_WINDOW_SIZE 20
 
+typedef int (*notify_rsrc_exit_t)(struct task_struct* tsk);
+
 typedef struct avg_est{
 	lt_t history[AVG_EST_WINDOW_SIZE];
 	uint16_t count;
@@ -248,6 +250,9 @@ struct rt_param {
 	gpu_migration_dist_t	gpu_migration;
 	int				last_gpu;
 
+	notify_rsrc_exit_t	rsrc_exit_cb;
+	void* rsrc_exit_cb_args;
+
 	lt_t accum_gpu_time;
 	lt_t gpu_time_stamp;
 
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index bd7bfc0f48ac..9c57bc24e8bd 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1896,7 +1896,18 @@ int ikglp_aff_obs_close(struct affinity_observer* obs)
 void ikglp_aff_obs_free(struct affinity_observer* obs)
 {
 	struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs);
+
+	// make sure the thread destroying this semaphore will not
+	// call the exit callback on a destroyed lock.
+	struct task_struct *t = current;
+	if (is_realtime(t) && tsk_rt(t)->rsrc_exit_cb_args == ikglp_aff)
+	{
+		tsk_rt(t)->rsrc_exit_cb = NULL;
+		tsk_rt(t)->rsrc_exit_cb_args = NULL;
+	}
+
 	kfree(ikglp_aff->nr_cur_users_on_rsrc);
+	kfree(ikglp_aff->nr_aff_on_rsrc);
 	kfree(ikglp_aff->q_info);
 	kfree(ikglp_aff);
 }
@@ -1960,6 +1971,14 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 		return(NULL);
 	}
 
+	ikglp_aff->nr_aff_on_rsrc =  kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
+	if(!ikglp_aff->nr_aff_on_rsrc) {
+		kfree(ikglp_aff->nr_cur_users_on_rsrc);
+		kfree(ikglp_aff->q_info);
+		kfree(ikglp_aff);
+		return(NULL);
+	}
+
 	affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs);
 
 	ikglp_aff->ops = ikglp_ops;
@@ -1974,6 +1993,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 			  ikglp_aff->relax_max_fifo_len);
 
 	memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
+	memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
 
 	for(i = 0; i < sem->nr_replicas; ++i) {
 		ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
@@ -1982,6 +2002,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 		// multiple q_info's will point to the same resource (aka GPU) if
 		// aff_args.nr_simult_users > 1
 		ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
+		ikglp_aff->q_info[i].nr_aff_users = &ikglp_aff->nr_aff_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
 	}
 
 	// attach observer to the lock
@@ -2035,7 +2056,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
 
 	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
 	lt_t min_len;
-	int min_nr_users;
+	int min_nr_users, min_nr_aff_users;
 	struct ikglp_queue_info *shortest;
 	struct fifo_queue *to_enqueue;
 	int i;
@@ -2044,11 +2065,20 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
 	int max_fifo_len = (aff->relax_max_fifo_len) ?
 		sem->m : sem->max_fifo_len;
 
-	// simply pick the shortest queue if, we have no affinity, or we have
-	// affinity with the shortest
+	// if we have no affinity, find the GPU with the least number of users
+	// with active affinity
 	if(unlikely(tsk_rt(t)->last_gpu < 0)) {
-		affinity_gpu = aff->offset;  // first gpu
-		TRACE_CUR("no affinity\n");
+		int temp_min = aff->nr_aff_on_rsrc[0];
+		affinity_gpu = aff->offset;
+
+		for(i = 1; i < aff->nr_rsrc; ++i) {
+			if(aff->nr_aff_on_rsrc[i] < temp_min) {
+				affinity_gpu = aff->offset + i;
+			}
+		}
+
+		TRACE_CUR("no affinity. defaulting to %d with %d aff users.\n",
+						affinity_gpu, temp_min);
 	}
 	else {
 		affinity_gpu = tsk_rt(t)->last_gpu;
@@ -2066,6 +2096,8 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
 
 	min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
 	min_nr_users = *(shortest->nr_cur_users);
+	min_nr_aff_users = *(shortest->nr_aff_users);
+
 
 	TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
 			  get_gpu_estimate(t, MIG_LOCAL),
@@ -2088,14 +2120,21 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
 		//
 		// tie-break on the shortest number of simult users.  this only kicks in
 		// when there are more than 1 empty queues.
+
+				// TODO: Make "est_len < min_len" a fuzzy function that allows
+				// queues "close enough" in length to be considered equal.
+
 				if((shortest->q->count >= max_fifo_len) ||		/* 'shortest' is full and i-th queue is not */
 				   (est_len < min_len) ||						/* i-th queue has shortest length */
 				   ((est_len == min_len) &&						/* equal lengths, but one has fewer over-all users */
-					(*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
+					((*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) ||
+					 ((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) &&
+						(*(aff->q_info[i].nr_cur_users) < min_nr_users))))) {
 
 					shortest = &aff->q_info[i];
 					min_len = est_len;
 					min_nr_users = *(aff->q_info[i].nr_cur_users);
+					min_nr_aff_users = *(aff->q_info[i].nr_aff_users);
 				}
 
 				TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
@@ -2612,6 +2651,51 @@ void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq,
 	//	}
 }
 
+int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t)
+{
+	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
+	unsigned long flags = 0, real_flags;
+	int aff_rsrc;
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+	raw_spinlock_t *dgl_lock;
+
+	dgl_lock = litmus->get_dgl_spinlock(t);
+#endif
+
+	if (tsk_rt(t)->last_gpu < 0)
+		return 0;
+
+	raw_spin_lock_irqsave(&sem->real_lock, real_flags);
+	lock_global_irqsave(dgl_lock, flags);
+	lock_fine_irqsave(&sem->lock, flags);
+
+	// decrement affinity count on old GPU
+	aff_rsrc = tsk_rt(t)->last_gpu - aff->offset;
+	--(aff->nr_aff_on_rsrc[aff_rsrc]);
+
+	if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) {
+		WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0);
+		aff->nr_aff_on_rsrc[aff_rsrc] = 0;
+	}
+
+	unlock_fine_irqrestore(&sem->lock, flags);
+	unlock_global_irqrestore(dgl_lock, flags);
+	raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
+
+	return 0;
+}
+
+int gpu_ikglp_notify_exit_trampoline(struct task_struct* t)
+{
+	struct ikglp_affinity* aff = (struct ikglp_affinity*)tsk_rt(t)->rsrc_exit_cb_args;
+	if(likely(aff)) {
+		return gpu_ikglp_notify_exit(aff, t);
+	}
+	else {
+		return -1;
+	}
+}
+
 void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
 							   struct fifo_queue* fq,
 							   struct task_struct* t)
@@ -2619,15 +2703,28 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
 	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
 	int replica = ikglp_get_idx(sem, fq);
 	int gpu = replica_to_gpu(aff, replica);
+	int last_gpu = tsk_rt(t)->last_gpu;
 
-	tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
+	tsk_rt(t)->gpu_migration = gpu_migration_distance(last_gpu, gpu);  // record the type of migration
 
 	TRACE_CUR("%s/%d acquired gpu %d (prev = %d).  migration type = %d\n",
-			  t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration);
+			  t->comm, t->pid, gpu, last_gpu, tsk_rt(t)->gpu_migration);
 
 	// count the number or resource holders
 	++(*(aff->q_info[replica].nr_cur_users));
 
+	if(gpu != last_gpu) {
+		if(last_gpu >= 0) {
+			int old_rsrc = last_gpu - aff->offset;
+			--(aff->nr_aff_on_rsrc[old_rsrc]);
+		}
+
+		// increment affinity count on new GPU
+		++(aff->nr_aff_on_rsrc[gpu - aff->offset]);
+		tsk_rt(t)->rsrc_exit_cb_args = aff;
+		tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline;
+	}
+
 	reg_nv_device(gpu, 1, t);  // register
 
 	tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
@@ -2679,6 +2776,8 @@ struct ikglp_affinity_ops gpu_ikglp_affinity =
 	.notify_acquired = gpu_ikglp_notify_acquired,
 	.notify_freed = gpu_ikglp_notify_freed,
 
+	.notify_exit = gpu_ikglp_notify_exit,
+
 	.replica_to_resource = gpu_replica_to_resource,
 };
 
@@ -2817,6 +2916,8 @@ struct ikglp_affinity_ops simple_gpu_ikglp_affinity =
 	.notify_acquired = simple_gpu_ikglp_notify_acquired,
 	.notify_freed = simple_gpu_ikglp_notify_freed,
 
+	.notify_exit = NULL,
+
 	.replica_to_resource = gpu_replica_to_resource,
 };
 
diff --git a/litmus/litmus.c b/litmus/litmus.c
index d368202ab8c3..143c746c344e 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -533,6 +533,11 @@ void litmus_exit_task(struct task_struct* tsk)
 	if (is_realtime(tsk)) {
 		sched_trace_task_completion(tsk, 1);
 
+		if (tsk_rt(tsk)->rsrc_exit_cb) {
+			int ret = tsk_rt(tsk)->rsrc_exit_cb(tsk);
+			WARN_ON(ret != 0);
+		}
+
 		litmus->task_exit(tsk);
 
 		BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
-- 
cgit v1.2.2


From 3ee5f13b8213270ba30e4b3625dff46b1cc8326f Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Fri, 30 Nov 2012 13:36:03 -0500
Subject: More improvements on affinity heuristics

---
 include/litmus/gpu_affinity.h | 16 ++++++++++
 include/litmus/ikglp_lock.h   |  2 +-
 litmus/edf_common.c           |  2 +-
 litmus/ikglp_lock.c           | 68 ++++++++++++++++++++++++++++++++-----------
 4 files changed, 69 insertions(+), 19 deletions(-)

diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
index d64a15cbf2a5..47da725717b0 100644
--- a/include/litmus/gpu_affinity.h
+++ b/include/litmus/gpu_affinity.h
@@ -31,6 +31,21 @@ static inline lt_t get_gpu_time(struct task_struct* t)
 
 static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist)
 {
+	int i;
+	lt_t val;
+
+	if(dist == MIG_NONE) {
+		dist = MIG_LOCAL;
+	}
+
+	val = t->rt_param.gpu_migration_est[dist].avg;
+	for(i = dist-1; i >= 0; --i) {
+		if(t->rt_param.gpu_migration_est[i].avg > val) {
+			val = t->rt_param.gpu_migration_est[i].avg;
+		}
+	}
+
+#if 0
 //	int i;
 //	fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est);
 //	lt_t val = (temp >= 0) ? temp : 0;  // never allow negative estimates...
@@ -43,6 +58,7 @@ static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t
 //	for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) {
 //		val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
 //	}
+#endif
 
 	return ((val > 0) ? val : dist+1);
 }
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
index 9d0cd3d1904e..89d9c37c7631 100644
--- a/include/litmus/ikglp_lock.h
+++ b/include/litmus/ikglp_lock.h
@@ -139,7 +139,7 @@ struct ikglp_affinity
 	struct ikglp_affinity_ops *ops;
 	struct ikglp_queue_info *q_info;
 	int *nr_cur_users_on_rsrc;
-	int *nr_aff_on_rsrc;
+	int64_t *nr_aff_on_rsrc;
 	int offset;
 	int nr_simult;
 	int nr_rsrc;
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 916b1b4309b7..a9bf0c08e125 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -272,7 +272,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 						return 1;
 					}
 					else if (first->pid == second->pid) {
-						WARN_ON(1);
+						//WARN_ON(1);
 					}
 				}
 				else {
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 9c57bc24e8bd..16ae621bbf75 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1887,6 +1887,19 @@ static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
 	return replica;
 }
 
+static inline int same_gpu(struct ikglp_affinity* aff, int replica_a, int replica_b)
+{
+	return(replica_to_gpu(aff, replica_a) == replica_to_gpu(aff, replica_b));
+}
+
+static inline int has_affinity(struct ikglp_affinity* aff, struct task_struct* t, int replica)
+{
+	if(tsk_rt(t)->last_gpu >= 0)
+	{
+		return (tsk_rt(t)->last_gpu == replica_to_gpu(aff, replica));
+	}
+	return 0;
+}
 
 int ikglp_aff_obs_close(struct affinity_observer* obs)
 {
@@ -1971,7 +1984,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 		return(NULL);
 	}
 
-	ikglp_aff->nr_aff_on_rsrc =  kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
+	ikglp_aff->nr_aff_on_rsrc =  kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
 	if(!ikglp_aff->nr_aff_on_rsrc) {
 		kfree(ikglp_aff->nr_cur_users_on_rsrc);
 		kfree(ikglp_aff->q_info);
@@ -1993,7 +2006,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 			  ikglp_aff->relax_max_fifo_len);
 
 	memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
-	memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
+	memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc));
 
 	for(i = 0; i < sem->nr_replicas; ++i) {
 		ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
@@ -2057,7 +2070,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
 	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
 	lt_t min_len;
 	int min_nr_users, min_nr_aff_users;
-	struct ikglp_queue_info *shortest;
+	struct ikglp_queue_info *shortest, *aff_queue;
 	struct fifo_queue *to_enqueue;
 	int i;
 	int affinity_gpu;
@@ -2087,7 +2100,8 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
 	// all things being equal, let's start with the queue with which we have
 	// affinity.  this helps us maintain affinity even when we don't have
 	// an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
-	shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
+	aff_queue = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
+	shortest = aff_queue;
 
 	//	if(shortest == aff->shortest_queue) {
 	//		TRACE_CUR("special case: have affinity with shortest queue\n");
@@ -2108,29 +2122,46 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
 	for(i = 0; i < sem->nr_replicas; ++i) {
 		if(&aff->q_info[i] != shortest) {
 			if(aff->q_info[i].q->count < max_fifo_len) {
+				int want = 0;
 
-				lt_t est_len =
-					aff->q_info[i].estimated_len +
+				lt_t migration = 
 					get_gpu_estimate(t,
 								gpu_migration_distance(tsk_rt(t)->last_gpu,
 													replica_to_gpu(aff, i)));
+				lt_t est_len = aff->q_info[i].estimated_len + migration;
 
-		// queue is smaller, or they're equal and the other has a smaller number
-		// of total users.
-		//
-		// tie-break on the shortest number of simult users.  this only kicks in
-		// when there are more than 1 empty queues.
+				// queue is smaller, or they're equal and the other has a smaller number
+				// of total users.
+				//
+				// tie-break on the shortest number of simult users.  this only kicks in
+				// when there are more than 1 empty queues.
 
 				// TODO: Make "est_len < min_len" a fuzzy function that allows
 				// queues "close enough" in length to be considered equal.
 
-				if((shortest->q->count >= max_fifo_len) ||		/* 'shortest' is full and i-th queue is not */
-				   (est_len < min_len) ||						/* i-th queue has shortest length */
-				   ((est_len == min_len) &&						/* equal lengths, but one has fewer over-all users */
-					((*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) ||
-					 ((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) &&
-						(*(aff->q_info[i].nr_cur_users) < min_nr_users))))) {
+				/* NOTE: 'shortest' starts out with affinity GPU */
+				if(unlikely(shortest->q->count >= max_fifo_len)) { 						/* 'shortest' is full and i-th queue is not */
+					want = 1;
+				}
+				else if(est_len < min_len) {
+					want = 1;															/* i-th queue has shortest length */
+				}
+				else if(unlikely(est_len == min_len)) {  								/* equal lengths */
+					if(!has_affinity(aff, t, ikglp_get_idx(sem, shortest->q))) {		/* don't sacrifice affinity on tie */
+						if(has_affinity(aff, t, i)) {
+							want = 1;													/* switch to maintain affinity */
+						}
+						else if(*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) {	/* favor one with less affinity load */
+							want = 1;
+						}
+						else if((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && /* equal number of affinity */
+								(*(aff->q_info[i].nr_cur_users) < min_nr_users)) {		/* favor one with current fewer users */
+							want = 1;
+						}
+					}
+				}
 
+				if(want) {
 					shortest = &aff->q_info[i];
 					min_len = est_len;
 					min_nr_users = *(aff->q_info[i].nr_cur_users);
@@ -2672,6 +2703,7 @@ int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t)
 	// decrement affinity count on old GPU
 	aff_rsrc = tsk_rt(t)->last_gpu - aff->offset;
 	--(aff->nr_aff_on_rsrc[aff_rsrc]);
+//	aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t);
 
 	if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) {
 		WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0);
@@ -2717,10 +2749,12 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
 		if(last_gpu >= 0) {
 			int old_rsrc = last_gpu - aff->offset;
 			--(aff->nr_aff_on_rsrc[old_rsrc]);
+//			aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t));
 		}
 
 		// increment affinity count on new GPU
 		++(aff->nr_aff_on_rsrc[gpu - aff->offset]);
+//		aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t));
 		tsk_rt(t)->rsrc_exit_cb_args = aff;
 		tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline;
 	}
-- 
cgit v1.2.2


From 964297dd588ee6feab1aedecb2611bece2681973 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Fri, 30 Nov 2012 14:33:52 -0500
Subject: Fix compilation warnings.

---
 include/litmus/ikglp_lock.h |  2 +-
 litmus/Kconfig              | 22 ++++++++++++++++++++++
 litmus/aux_tasks.c          | 18 +++++++++---------
 litmus/edf_common.c         | 21 +++++++--------------
 litmus/ikglp_lock.c         |  3 +++
 5 files changed, 42 insertions(+), 24 deletions(-)
 mode change 100644 => 100755 include/litmus/ikglp_lock.h
 mode change 100644 => 100755 litmus/Kconfig
 mode change 100644 => 100755 litmus/aux_tasks.c
 mode change 100644 => 100755 litmus/edf_common.c
 mode change 100644 => 100755 litmus/ikglp_lock.c

diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
old mode 100644
new mode 100755
index 89d9c37c7631..af155eadbb35
--- a/include/litmus/ikglp_lock.h
+++ b/include/litmus/ikglp_lock.h
@@ -114,7 +114,7 @@ struct ikglp_queue_info
 	struct fifo_queue* q;
 	lt_t estimated_len;
 	int *nr_cur_users;
-	int *nr_aff_users;
+	int64_t *nr_aff_users;
 };
 
 struct ikglp_affinity_ops
diff --git a/litmus/Kconfig b/litmus/Kconfig
old mode 100644
new mode 100755
index a32f42898148..16087b9e4e81
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -56,6 +56,26 @@ config REALTIME_AUX_TASKS
 		     priority inversions may occur with respect to single-threaded
 			 task models if/when one of threads are scheduled simultanously
 			 with another of the same identity.
+
+choice
+	prompt "Scheduling prioritization of AUX tasks."
+	default REALTIME_AUX_TASK_PRIORITY_BOOSTED
+	help
+		Select the prioritization method for auxillary tasks.
+
+config REALTIME_AUX_TASK_PRIORITY_BOOSTED
+	bool "Boosted"
+	help
+		Run all auxillary task threads at a maximum priority. Useful for
+		temporarily working around bugs during development.
+
+config REALTIME_AUX_TASK_PRIORITY_INHERITANCE
+	bool "Inheritance"
+	help
+		Auxillary tasks inherit the maximum priority from blocked real-time
+		threads within the same process.
+
+		Additional pitfall:
 		  3) Busy-wait deadlock is likely between normal real-time tasks and
 		     auxillary tasks synchronize using _preemptive_ spinlocks that do
 			 not use priority inheritance.
@@ -73,9 +93,11 @@ config REALTIME_AUX_TASKS
 		holder that blocks the rt task CAN block on I/O and contribute its priority
 		to the aux tasks. Aux tasks may still _indirectly_ inherit the priority of
 		the blocked rt task via the lock holder.
+endchoice
 
 endmenu
 
+
 menu "Real-Time Synchronization"
 
 config NP_SECTION
diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c
old mode 100644
new mode 100755
index 9572d960b46a..bd7bcbed58fe
--- a/litmus/aux_tasks.c
+++ b/litmus/aux_tasks.c
@@ -50,7 +50,7 @@ int exit_aux_task(struct task_struct *t)
 
 	TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
 
-#if 0
+#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 	list_del(&tsk_rt(t)->aux_task_node);
 
 	tsk_rt(t)->is_aux_task = 0;
@@ -67,7 +67,7 @@ static int aux_tasks_increase_priority(struct task_struct *leader, struct task_s
 {
 	int retval = 0;
 
-#if 0
+#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 	struct list_head *pos;
 
 	TRACE_CUR("Increasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid);
@@ -94,7 +94,7 @@ static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_s
 {
 	int retval = 0;
 
-#if 0
+#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 	struct list_head *pos;
 
 	TRACE_CUR("Decreasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid);
@@ -121,7 +121,7 @@ int aux_task_owner_increase_priority(struct task_struct *t)
 {
 	int retval = 0;
 
-#if 0
+#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 	struct task_struct *leader;
 	struct task_struct *hp = NULL;
 	struct task_struct *hp_eff = NULL;
@@ -166,7 +166,7 @@ int aux_task_owner_decrease_priority(struct task_struct *t)
 {
 	int retval = 0;
 
-#if 0
+#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 	struct task_struct *leader;
 	struct task_struct *hp = NULL;
 	struct task_struct *hp_eff = NULL;
@@ -214,7 +214,7 @@ long enable_aux_task_owner(struct task_struct *t)
 {
 	long retval = 0;
 
-#if 0
+#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 	struct task_struct *leader = t->group_leader;
 	struct task_struct *hp;
 
@@ -252,7 +252,7 @@ long disable_aux_task_owner(struct task_struct *t)
 {
 	long retval = 0;
 
-#if 0
+#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 	struct task_struct *leader = t->group_leader;
 	struct task_struct *hp;
 	struct task_struct *new_hp = NULL;
@@ -344,7 +344,7 @@ static long __do_enable_aux_tasks(void)
 				/* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */
 				tsk_rt(t)->is_aux_task = 1;
 
-#if 0
+#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 				list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks);
 #endif
 
@@ -368,7 +368,7 @@ static long __do_enable_aux_tasks(void)
 	} while(t != leader);
 
 
-#if 0
+#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 	if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
 		struct task_struct *hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 											  struct task_struct, rt_param);
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
old mode 100644
new mode 100755
index a9bf0c08e125..5a3f5b417f73
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -43,7 +43,7 @@ static inline long edf_hash(struct task_struct *t)
 }
 #endif
 
-#ifdef CONFIG_REALTIME_AUX_TASKS
+#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 int aux_tie_break(struct task_struct *first, struct task_struct *second)
 {
 	long fhash = edf_hash(first);
@@ -90,9 +90,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 		return first && !second;
 	}
 
-#ifdef CONFIG_REALTIME_AUX_TASKS
-	{
 
+#if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_BOOSTED)
 	/* run aux tasks at max priority */
 	if (first->rt_param.is_aux_task != second->rt_param.is_aux_task)
 	{
@@ -103,9 +102,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 		first = first->group_leader;
 		second = second->group_leader;
 	}
-
-
-#if 0
+#elif defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE)
+	{
 	int first_lo_aux, second_lo_aux;
 	int first_hi_aux, second_hi_aux;
 	first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task;
@@ -133,11 +131,10 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 		TRACE_CUR("hi aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_hi_tie_break);
 		return aux_hi_tie_break;
 	}
-#endif
-
 	}
 #endif
 
+
 #ifdef CONFIG_LITMUS_LOCKING
 	/* Check for EFFECTIVE priorities. Change task
 	 * used for comparison in such a case.
@@ -252,14 +249,12 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 				else if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) {
 #endif
 
-#if 0
-#ifdef CONFIG_REALTIME_AUX_TASKS
+#if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE)
 				/* is this dead code? */
 				if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) {
 					return 1;
 				}
 				else if (tsk_rt(first)->is_aux_task == tsk_rt(second)->is_aux_task) {
-#endif
 #endif
 
 				/* Something could be wrong if you get this far. */
@@ -297,11 +292,9 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 						return 1;
 					}
 				}
-#if 0
-#ifdef CONFIG_REALTIME_AUX_TASKS
+#if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE)
 				}
 #endif
-#endif
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
 				}
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
old mode 100644
new mode 100755
index 16ae621bbf75..b29828344dd1
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -2598,7 +2598,10 @@ ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, str
 	ikglp_wait_state_t *donor = NULL;
 	int distance = MIG_NONE;
 	int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq));
+
+#ifdef CONFIG_SCHED_DEBUG_TRACE
 	ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
+#endif
 
 	__find_closest_donor(gpu, sem->donors.root, &donor, &distance);
 
-- 
cgit v1.2.2


From 983773f990053cb0ced72afb4b69594e5d32c779 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Thu, 6 Dec 2012 20:37:20 -0500
Subject: AUX_FUTURE and revised inh-based aux tie break

---
 include/litmus/aux_tasks.h |   2 +
 include/litmus/litmus.h    |   1 +
 include/litmus/rt_param.h  |   7 ++
 kernel/fork.c              |   6 +-
 litmus/aux_tasks.c         | 198 +++++++++++++++++++++++++++++++--------------
 litmus/edf_common.c        |  78 ++++++++----------
 litmus/litmus.c            |   8 ++
 7 files changed, 192 insertions(+), 108 deletions(-)
 mode change 100644 => 100755 include/litmus/aux_tasks.h
 mode change 100644 => 100755 include/litmus/litmus.h
 mode change 100644 => 100755 include/litmus/rt_param.h
 mode change 100644 => 100755 litmus/litmus.c

diff --git a/include/litmus/aux_tasks.h b/include/litmus/aux_tasks.h
old mode 100644
new mode 100755
index 3bb6b26fef09..87745c1c0df0
--- a/include/litmus/aux_tasks.h
+++ b/include/litmus/aux_tasks.h
@@ -6,6 +6,8 @@ struct task_struct;
 /* admit an aux task with default parameters */
 //int admit_aux_task(struct task_struct *t);
 
+int make_aux_task_if_required(struct task_struct *t);
+
 /* call on an aux task when it exits real-time */
 int exit_aux_task(struct task_struct *t);
 
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
old mode 100644
new mode 100755
index db2987a24686..711b88e2b3d1
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -32,6 +32,7 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
 #define NO_CPU			0xffffffff
 
 void litmus_fork(struct task_struct *tsk);
+void litmus_post_fork_thread(struct task_struct *tsk);
 void litmus_exec(void);
 /* clean up real-time state of a task */
 void exit_litmus(struct task_struct *dead_tsk);
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
old mode 100644
new mode 100755
index cb7c3ac64339..aca78a835529
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -41,6 +41,12 @@ typedef enum {
 	PRECISE_SIGNALS,	/* budget signals are triggered with hrtimers */
 } budget_signal_policy_t;
 
+typedef enum {
+	AUX_ENABLE	= 0x1,
+	AUX_CURRENT = (AUX_ENABLE<<1),
+	AUX_FUTURE	= (AUX_CURRENT<<2)
+} aux_flags_t;
+
 /* We use the common priority interpretation "lower index == higher priority",
  * which is commonly used in fixed-priority schedulability analysis papers.
  * So, a numerically lower priority value implies higher scheduling priority,
@@ -370,6 +376,7 @@ struct aux_data
 	struct list_head	aux_tasks;
 	struct binheap		aux_task_owners;
 	unsigned int		initialized:1;
+	unsigned int		aux_future:1;
 };
 #endif
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 25c6111fe3a6..7491c4f5e78c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1370,8 +1370,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
-	if (clone_flags & CLONE_THREAD)
+	if (clone_flags & CLONE_THREAD) {
 		threadgroup_fork_read_unlock(current);
+#ifdef CONFIG_REALTIME_AUX_TASKS
+		litmus_post_fork_thread(p);
+#endif
+	}
 	perf_event_fork(p);
 	return p;
 
diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c
index bd7bcbed58fe..e5f3c82d32e9 100755
--- a/litmus/aux_tasks.c
+++ b/litmus/aux_tasks.c
@@ -25,7 +25,7 @@ static int admit_aux_task(struct task_struct *t)
 	struct rt_task tp = {
 		.period = 1000000,   /* 1ms */
 		.relative_deadline = 1000000,
-		.exec_cost = 1000000, /* allow full utilization */
+		.exec_cost = 1000000, /* allow full utilization with buget tracking */
 		.phase = 0,
 		.cpu = task_cpu(leader),  /* take CPU of group leader */
 		.budget_policy = QUANTUM_ENFORCEMENT,
@@ -44,17 +44,15 @@ static int admit_aux_task(struct task_struct *t)
 int exit_aux_task(struct task_struct *t)
 {
 	int retval = 0;
-	struct task_struct *leader = t->group_leader;
 
 	BUG_ON(!tsk_rt(t)->is_aux_task);
 
-	TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
+	TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, t->group_leader->comm, t->group_leader->pid);
 
+	tsk_rt(t)->is_aux_task = 0;
+	
 #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 	list_del(&tsk_rt(t)->aux_task_node);
-
-	tsk_rt(t)->is_aux_task = 0;
-
 	if (tsk_rt(t)->inh_task) {
 		litmus->__decrease_prio(t, NULL);
 	}
@@ -80,10 +78,14 @@ static int aux_tasks_increase_priority(struct task_struct *leader, struct task_s
 		if (!is_realtime(aux)) {
 			TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid);
 		}
-
-		// aux tasks don't touch rt locks, so no nested call needed.
-		TRACE_CUR("increasing %s/%d.\n", aux->comm, aux->pid);
-		retval = litmus->__increase_prio(aux, hp);
+		else if(tsk_rt(aux)->inh_task == hp) {
+			TRACE_CUR("skipping real-time aux task %s/%d that already inherits from %s/%d\n", aux->comm, aux->pid, hp->comm, hp->pid);
+		}
+		else {
+			// aux tasks don't touch rt locks, so no nested call needed.
+			TRACE_CUR("increasing %s/%d.\n", aux->comm, aux->pid);
+			retval = litmus->__increase_prio(aux, hp);
+		}
 	}
 #endif
 
@@ -208,6 +210,54 @@ out:
 	return retval;
 }
 
+int make_aux_task_if_required(struct task_struct *t)
+{
+	struct task_struct *leader;
+	int retval = 0;
+	
+	read_lock_irq(&tasklist_lock);	
+	
+	leader = t->group_leader;
+
+	if(!tsk_aux(leader)->initialized || !tsk_aux(leader)->aux_future) {
+		goto out;
+	}
+	
+	TRACE_CUR("Making %s/%d in %s/%d an aux thread.\n", t->comm, t->pid, leader->comm, leader->pid);
+	
+	INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node);
+	INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node);
+	
+	retval = admit_aux_task(t);
+	if (retval == 0) {
+		tsk_rt(t)->is_aux_task = 1;
+		
+#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE	
+		list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks);
+		
+		if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
+			struct task_struct *hp =
+				container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
+							 struct task_struct, rt_param);
+			
+			TRACE_CUR("hp in group: %s/%d\n", hp->comm, hp->pid);
+			
+			retval = litmus->__increase_prio(t, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
+			
+			if (retval != 0) {
+				/* don't know how to recover from bugs with prio inheritance.  better just crash. */
+				read_unlock_irq(&tasklist_lock);
+				BUG();
+			}
+		}
+#endif
+	}
+	
+out:
+	read_unlock_irq(&tasklist_lock);
+
+	return retval;
+}
 
 
 long enable_aux_task_owner(struct task_struct *t)
@@ -313,11 +363,12 @@ static int aux_task_owner_max_priority_order(struct binheap_node *a,
 }
 
 
-static long __do_enable_aux_tasks(void)
+static long __do_enable_aux_tasks(int flags)
 {
 	long retval = 0;
 	struct task_struct *leader;
 	struct task_struct *t;
+	int aux_tasks_added = 0;
 
 	leader = current->group_leader;
 
@@ -327,34 +378,52 @@ static long __do_enable_aux_tasks(void)
 		tsk_aux(leader)->initialized = 1;
 	}
 
+	if (flags & AUX_FUTURE) {
+		tsk_aux(leader)->aux_future = 1;
+	}
+	
 	t = leader;
 	do {
-		/* doesn't hurt to initialize them both */
-		INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node);
-		INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node);
+		if (!tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->is_aux_task) {
+			/* This may harmlessly reinit unused nodes. TODO: Don't reinit already init nodes. */
+			/* doesn't hurt to initialize both nodes */
+			INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node);
+			INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node);
+		}
 
 		TRACE_CUR("Checking task in %s/%d: %s/%d = (p = %llu):\n",
 				  leader->comm, leader->pid, t->comm, t->pid,
 				  tsk_rt(t)->task_params.period);
-
-		/* inspect heap_node to see if it is an rt task */
+		
+		/* inspect period to see if it is an rt task */
 		if (tsk_rt(t)->task_params.period == 0) {
-			if (!tsk_rt(t)->is_aux_task) {
-				TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
-				/* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */
-				tsk_rt(t)->is_aux_task = 1;
-
+			if (flags && AUX_CURRENT) {
+				if (!tsk_rt(t)->is_aux_task) {
+					int admit_ret;
+					
+					TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
+
+					admit_ret = admit_aux_task(t);
+					
+					if (admit_ret == 0) {
+						/* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */
+						tsk_rt(t)->is_aux_task = 1;
+						aux_tasks_added = 1;
+						
 #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
-				list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks);
+						list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks);
 #endif
-
-				(void)admit_aux_task(t);
+					}
+				}
+				else {
+					TRACE_CUR("AUX task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid);
+				}
 			}
 			else {
-				TRACE_CUR("AUX task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid);
+				TRACE_CUR("Not changing thread in %s/%d to AUX task: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid);
 			}
 		}
-		else {
+		else if (!tsk_rt(t)->is_aux_task) {  /* don't let aux tasks get aux tasks of their own */
 			if (!tsk_rt(t)->has_aux_tasks) {
 				TRACE_CUR("task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
 				tsk_rt(t)->has_aux_tasks = 1;
@@ -369,19 +438,18 @@ static long __do_enable_aux_tasks(void)
 
 
 #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
-	if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
+	if (aux_tasks_added && !binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
 		struct task_struct *hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 											  struct task_struct, rt_param);
-		TRACE_CUR("found hp in group: %s/%d\n", hp->comm, hp->pid);
-		retval = aux_tasks_increase_priority(leader,
-											 (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
+		TRACE_CUR("hp in group: %s/%d\n", hp->comm, hp->pid);
+		retval = aux_tasks_increase_priority(leader, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
 	}
 #endif
 
 	return retval;
 }
 
-static long __do_disable_aux_tasks(void)
+static long __do_disable_aux_tasks(int flags)
 {
 	long retval = 0;
 	struct task_struct *leader;
@@ -389,50 +457,56 @@ static long __do_disable_aux_tasks(void)
 
 	leader = current->group_leader;
 
-	t = leader;
-	do {
-		if (tsk_rt(t)->is_aux_task) {
-
-			TRACE_CUR("%s/%d is an aux task.\n", t->comm, t->pid);
-
-			if (is_realtime(t)) {
-				long temp_retval;
-				struct sched_param param = { .sched_priority = 0};
-
-				TRACE_CUR("%s/%d is real-time. Changing policy to SCHED_NORMAL.\n", t->comm, t->pid);
-
-				temp_retval = sched_setscheduler_nocheck(t, SCHED_NORMAL, &param);
-
-				if (temp_retval != 0) {
-					TRACE_CUR("error changing policy of %s/%d to SCHED_NORMAL\n", t->comm, t->pid);
-					if (retval == 0) {
-						retval = temp_retval;
-					}
-					else {
-						TRACE_CUR("prior error (%d) masks new error (%d)\n", retval, temp_retval);
+	if (flags & AUX_FUTURE) {
+		tsk_aux(leader)->aux_future = 0;
+	}
+	
+	if (flags & AUX_CURRENT) {
+		t = leader;
+		do {
+			if (tsk_rt(t)->is_aux_task) {
+
+				TRACE_CUR("%s/%d is an aux task.\n", t->comm, t->pid);
+
+				if (is_realtime(t)) {
+					long temp_retval;
+					struct sched_param param = { .sched_priority = 0};
+
+					TRACE_CUR("%s/%d is real-time. Changing policy to SCHED_NORMAL.\n", t->comm, t->pid);
+
+					temp_retval = sched_setscheduler_nocheck(t, SCHED_NORMAL, &param);
+
+					if (temp_retval != 0) {
+						TRACE_CUR("error changing policy of %s/%d to SCHED_NORMAL\n", t->comm, t->pid);
+						if (retval == 0) {
+							retval = temp_retval;
+						}
+						else {
+							TRACE_CUR("prior error (%d) masks new error (%d)\n", retval, temp_retval);
+						}
 					}
 				}
-			}
 
-			tsk_rt(t)->is_aux_task = 0;
-		}
-		t = next_thread(t);
-	} while(t != leader);
+				tsk_rt(t)->is_aux_task = 0;
+			}
+			t = next_thread(t);
+		} while(t != leader);
+	}
 
 	return retval;
 }
 
-asmlinkage long sys_set_aux_tasks(int enable)
+asmlinkage long sys_set_aux_tasks(int flags)
 {
 	long retval;
 
 	read_lock_irq(&tasklist_lock);
 
-	if (enable) {
-		retval = __do_enable_aux_tasks();
+	if (flags & AUX_ENABLE) {
+		retval = __do_enable_aux_tasks(flags);
 	}
 	else {
-		retval = __do_disable_aux_tasks();
+		retval = __do_disable_aux_tasks(flags);
 	}
 
 	read_unlock_irq(&tasklist_lock);
@@ -442,7 +516,7 @@ asmlinkage long sys_set_aux_tasks(int enable)
 
 #else
 
-asmlinkage long sys_set_aux_tasks(int enable)
+asmlinkage long sys_set_aux_tasks(int flags)
 {
 	printk("Unsupported. Recompile with CONFIG_REALTIME_AUX_TASKS.\n");
 	return -EINVAL;
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 5a3f5b417f73..c279bf12a7f5 100755
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -22,7 +22,7 @@
 #include <litmus/fpmath.h>
 #endif
 
-#if defined(CONFIG_EDF_TIE_BREAK_HASH) || defined(CONFIG_REALTIME_AUX_TASKS)
+#if defined(CONFIG_EDF_TIE_BREAK_HASH)
 #include <linux/hash.h>
 static inline long edf_hash(struct task_struct *t)
 {
@@ -43,23 +43,6 @@ static inline long edf_hash(struct task_struct *t)
 }
 #endif
 
-#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
-int aux_tie_break(struct task_struct *first, struct task_struct *second)
-{
-	long fhash = edf_hash(first);
-	long shash = edf_hash(second);
-	if (fhash < shash) {
-		TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, 1);
-		return 1;
-	}
-	else if(fhash == shash) {
-		TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, (first->pid < second->pid));
-		return first->pid < second->pid;
-	}
-	return 0;
-}
-#endif
-
 
 /* edf_higher_prio -  returns true if first has a higher EDF priority
  *                    than second. Deadline ties are broken by PID.
@@ -93,44 +76,47 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 
 #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_BOOSTED)
 	/* run aux tasks at max priority */
+	/* TODO: Actually use prio-boosting. */
 	if (first->rt_param.is_aux_task != second->rt_param.is_aux_task)
 	{
 		return (first->rt_param.is_aux_task > second->rt_param.is_aux_task);
 	}
 	else if(first->rt_param.is_aux_task && second->rt_param.is_aux_task)
 	{
+		if(first->group_leader == second->group_leader) {
+			TRACE_CUR("aux tie break!\n");  // tie-break by BASE priority of the aux tasks
+			goto aux_tie_break;
+		}
 		first = first->group_leader;
 		second = second->group_leader;
 	}
 #elif defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE)
 	{
-	int first_lo_aux, second_lo_aux;
-	int first_hi_aux, second_hi_aux;
-	first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task;
-	second_lo_aux = second->rt_param.is_aux_task && !second->rt_param.inh_task;
-
-	if (first_lo_aux && !second_lo_aux) {
-		TRACE_CUR("%s/%d >> %s/%d --- 0\n", first->comm, first->pid, second->comm, second->pid);
-		return 0;
-	}
-	else if (second_lo_aux && !first_lo_aux) {
-		TRACE_CUR("%s/%d >> %s/%d --- 1\n", first->comm, first->pid, second->comm, second->pid);
-		return 1;
-	}
-	else if (first_lo_aux && second_lo_aux) {
-		int aux_lo_tie_break = aux_tie_break(first, second);
-		TRACE_CUR("low aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_lo_tie_break);
-		return aux_lo_tie_break;
-	}
-
-	first_hi_aux = first->rt_param.is_aux_task && first->rt_param.inh_task;
-	second_hi_aux = second->rt_param.is_aux_task && second->rt_param.inh_task;
-
-	if (first_hi_aux && second_hi_aux && first->rt_param.inh_task == second->rt_param.inh_task) {
-		int aux_hi_tie_break = aux_tie_break(first, second);
-		TRACE_CUR("hi aux tie break: %s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, aux_hi_tie_break);
-		return aux_hi_tie_break;
-	}
+		int first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task;
+		int second_lo_aux = second->rt_param.is_aux_task && !second->rt_param.inh_task;
+
+		/* prioritize aux tasks without inheritance below real-time tasks */
+		if (first_lo_aux || second_lo_aux) {
+			// one of these is an aux task without inheritance.
+			if(first_lo_aux && second_lo_aux) {
+				TRACE_CUR("aux tie break!\n");  // tie-break by BASE priority of the aux tasks
+				goto aux_tie_break;
+			}
+			else {
+				// make the aux thread lowest priority real-time task
+				int temp = (first_lo_aux) ? !is_realtime(second) : !is_realtime(first);
+				TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp);
+				return temp;
+			}
+		}
+		
+		if (first->rt_param.is_aux_task && second->rt_param.is_aux_task &&
+			first->rt_param.inh_task == second->rt_param.inh_task) {  // inh_task is !NULL for both tasks since neither was a lo_aux task
+			// Both aux tasks inherit from the same task, so tie-break
+			// by base priority of the aux tasks.
+			TRACE_CUR("aux tie break!\n");
+			goto aux_tie_break;
+		}
 	}
 #endif
 
@@ -174,6 +160,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 
 #endif
 
+aux_tie_break:
+	
 	if (!is_realtime(second_task)) {
 		return 1;
 	}
diff --git a/litmus/litmus.c b/litmus/litmus.c
old mode 100644
new mode 100755
index cfd14852502b..8bc159b2fcce
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -634,6 +634,14 @@ void litmus_fork(struct task_struct* p)
 	p->od_table = NULL;
 }
 
+/* Called right before copy_process() returns a forked thread. */
+void litmus_post_fork_thread(struct task_struct* p)
+{
+#ifdef CONFIG_REALTIME_AUX_TASKS
+	make_aux_task_if_required(p);
+#endif
+}
+
 /* Called upon execve().
  * current is doing the exec.
  * Don't let address space specific stuff leak.
-- 
cgit v1.2.2


From 1caac0a4cd3027de123306ac7b12bf4c0393f3ed Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Fri, 7 Dec 2012 14:31:07 -0500
Subject: Send reboot notifications to Litmus

This patch causes reboot notifications to be send
to Litmus.  With this patch, Litmus attempts to
switch back to the Linux-plugin before the reboot
proceeds.  Any failures to switch back are reported
via printk() (the reboot is not halted).
---
 litmus/litmus.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/litmus/litmus.c b/litmus/litmus.c
index 8bc159b2fcce..065ef7d3192a 100755
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -9,6 +9,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/reboot.h>
 
 #include <litmus/litmus.h>
 #include <litmus/bheap.h>
@@ -704,6 +705,25 @@ static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
 
 extern struct sched_plugin linux_sched_plugin;
 
+static int litmus_shutdown_nb(struct notifier_block *unused1,
+				unsigned long unused2, void *unused3)
+{
+	/* Attempt to switch back to regular Linux scheduling.
+	 * Forces the active plugin to clean up.
+	 */
+	if (litmus != &linux_sched_plugin) {
+		int ret = switch_sched_plugin(&linux_sched_plugin);
+		if (ret) {
+			printk("Auto-shutdown of active Litmus plugin failed.\n");
+		}
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block shutdown_notifier = {
+	.notifier_call = litmus_shutdown_nb,
+};
+
 static int __init _init_litmus(void)
 {
 	/*      Common initializers,
@@ -733,11 +753,15 @@ static int __init _init_litmus(void)
 	init_topology();
 #endif
 
+	register_reboot_notifier(&shutdown_notifier);
+
 	return 0;
 }
 
 static void _exit_litmus(void)
 {
+	unregister_reboot_notifier(&shutdown_notifier);
+
 	exit_litmus_proc();
 	kmem_cache_destroy(bheap_node_cache);
 	kmem_cache_destroy(release_heap_cache);
-- 
cgit v1.2.2


From fccce270a5540021b544d439595fa0a736242ff0 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Sun, 9 Dec 2012 16:34:36 -0500
Subject: Specify aux thread granularity in jiffies

---
 litmus/aux_tasks.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c
index e5f3c82d32e9..20f477f6e3bc 100755
--- a/litmus/aux_tasks.c
+++ b/litmus/aux_tasks.c
@@ -6,12 +6,16 @@
 #include <litmus/rt_param.h>
 #include <litmus/aux_tasks.h>
 
+#include <linux/time.h>
+
+#define AUX_SLICE_NR_JIFFIES 1
+#define AUX_SLICE_NS ((NSEC_PER_SEC / HZ) * AUX_SLICE_NR_JIFFIES)
+
 static int admit_aux_task(struct task_struct *t)
 {
 	int retval = 0;
 	struct task_struct *leader = t->group_leader;
 
-
 	/* budget enforcement increments job numbers.  job numbers are used in
 	 * tie-breaking of aux_tasks.  method helps ensure:
 	 * 1) aux threads with no inherited priority can starve another (they share
@@ -23,9 +27,9 @@ static int admit_aux_task(struct task_struct *t)
 	 * fail-safe.
 	 */
 	struct rt_task tp = {
-		.period = 1000000,   /* 1ms */
-		.relative_deadline = 1000000,
-		.exec_cost = 1000000, /* allow full utilization with buget tracking */
+		.period = AUX_SLICE_NS,
+		.relative_deadline = AUX_SLICE_NS,
+		.exec_cost = AUX_SLICE_NS, /* allow full utilization with buget tracking */
 		.phase = 0,
 		.cpu = task_cpu(leader),  /* take CPU of group leader */
 		.budget_policy = QUANTUM_ENFORCEMENT,
-- 
cgit v1.2.2


From fbd9574e298157b54c38f82f536e5cea8f766dff Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Sun, 9 Dec 2012 16:53:50 -0500
Subject: Rename klitirqd klmirqd.

---
 include/litmus/aux_tasks.h      |   0
 include/litmus/ikglp_lock.h     |   0
 include/litmus/litmus.h         |   0
 include/litmus/litmus_softirq.h |  42 ++---
 include/litmus/rt_param.h       |  18 +-
 include/litmus/sched_plugin.h   |   8 +-
 litmus/Kconfig                  |  10 +-
 litmus/aux_tasks.c              |   0
 litmus/edf_common.c             |   0
 litmus/ikglp_lock.c             |   0
 litmus/litmus.c                 |  22 +--
 litmus/litmus_proc.c            |  14 +-
 litmus/litmus_softirq.c         | 388 ++++++++++++++++++++--------------------
 litmus/nvidia_info.c            |   8 +-
 litmus/sched_cedf.c             |  48 ++---
 litmus/sched_gsn_edf.c          |  44 ++---
 litmus/sched_plugin.c           |  12 +-
 17 files changed, 307 insertions(+), 307 deletions(-)
 mode change 100755 => 100644 include/litmus/aux_tasks.h
 mode change 100755 => 100644 include/litmus/ikglp_lock.h
 mode change 100755 => 100644 include/litmus/litmus.h
 mode change 100755 => 100644 include/litmus/rt_param.h
 mode change 100755 => 100644 litmus/Kconfig
 mode change 100755 => 100644 litmus/aux_tasks.c
 mode change 100755 => 100644 litmus/edf_common.c
 mode change 100755 => 100644 litmus/ikglp_lock.c
 mode change 100755 => 100644 litmus/litmus.c

diff --git a/include/litmus/aux_tasks.h b/include/litmus/aux_tasks.h
old mode 100755
new mode 100644
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
old mode 100755
new mode 100644
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
old mode 100755
new mode 100644
diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h
index 1eb5ea1a6c4b..46fe89148505 100644
--- a/include/litmus/litmus_softirq.h
+++ b/include/litmus/litmus_softirq.h
@@ -13,7 +13,7 @@
    Tasklets are current scheduled in FIFO order with
    NO priority inheritance for "blocked" tasklets.
 
-   klitirqd assumes the priority of the owner of the
+   klmirqd assumes the priority of the owner of the
    tasklet when the tasklet is next to execute.
 
    Currently, hi-tasklets are scheduled before
@@ -21,7 +21,7 @@
    And likewise, low-tasklets are scheduled before work
    queue objects.  This priority inversion probably needs
    to be fixed, though it is not an issue if our work with
-   GPUs as GPUs are owned (and associated klitirqds) for
+   GPUs as GPUs are owned (and associated klmirqds) for
    exclusive time periods, thus no inversions can
    occur.
  */
@@ -30,7 +30,7 @@
 
 #define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD
 
-/* Spawns NR_LITMUS_SOFTIRQD klitirqd daemons.
+/* Spawns NR_LITMUS_SOFTIRQD klmirqd daemons.
    Actual launch of threads is deffered to kworker's
    workqueue, so daemons will likely not be immediately
    running when this function returns, though the required
@@ -52,43 +52,43 @@
 	FIXME: change array to a CPU topology or array of cpumasks
 
  */
-void spawn_klitirqd(int* affinity);
+void spawn_klmirqd(int* affinity);
 
 
-/* Raises a flag to tell klitirqds to terminate.
+/* Raises a flag to tell klmirqds to terminate.
    Termination is async, so some threads may be running
    after function return. */
-void kill_klitirqd(void);
+void kill_klmirqd(void);
 
 
 /* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready
    to handle tasklets. 0, otherwise.*/
-int klitirqd_is_ready(void);
+int klmirqd_is_ready(void);
 
 /* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready
    to handle tasklets. 0, otherwise.*/
-int klitirqd_is_dead(void);
+int klmirqd_is_dead(void);
 
 /* Flushes all pending work out to the OS for regular
  * tasklet/work processing of the specified 'owner'
  *
- * PRECOND: klitirqd_thread must have a clear entry
+ * PRECOND: klmirqd_thread must have a clear entry
  * in the GPU registry, otherwise this call will become
- * a no-op as work will loop back to the klitirqd_thread.
+ * a no-op as work will loop back to the klmirqd_thread.
  *
  * Pass NULL for owner to flush ALL pending items.
  */
-void flush_pending(struct task_struct* klitirqd_thread,
+void flush_pending(struct task_struct* klmirqd_thread,
 				   struct task_struct* owner);
 
-struct task_struct* get_klitirqd(unsigned int k_id);
+struct task_struct* get_klmirqd(unsigned int k_id);
 
 
 extern int __litmus_tasklet_schedule(
         struct tasklet_struct *t,
         unsigned int k_id);
 
-/* schedule a tasklet on klitirqd #k_id */
+/* schedule a tasklet on klmirqd #k_id */
 static inline int litmus_tasklet_schedule(
     struct tasklet_struct *t,
     unsigned int k_id)
@@ -113,7 +113,7 @@ static inline int _litmus_tasklet_schedule(
 extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t,
                                          unsigned int k_id);
 
-/* schedule a hi tasklet on klitirqd #k_id */
+/* schedule a hi tasklet on klmirqd #k_id */
 static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t,
                                               unsigned int k_id)
 {
@@ -138,7 +138,7 @@ extern int __litmus_tasklet_hi_schedule_first(
     struct tasklet_struct *t,
     unsigned int k_id);
 
-/* schedule a hi tasklet on klitirqd #k_id on next go-around */
+/* schedule a hi tasklet on klmirqd #k_id on next go-around */
 /* PRECONDITION: Interrupts must be disabled. */
 static inline int litmus_tasklet_hi_schedule_first(
     struct tasklet_struct *t,
@@ -178,22 +178,22 @@ static inline int litmus_schedule_work(
 ///////////// mutex operations for client threads.
 
 void down_and_set_stat(struct task_struct* t,
-					 enum klitirqd_sem_status to_set,
+					 enum klmirqd_sem_status to_set,
 					 struct mutex* sem);
 
 void __down_and_reset_and_set_stat(struct task_struct* t,
-				enum klitirqd_sem_status to_reset,
-				enum klitirqd_sem_status to_set,
+				enum klmirqd_sem_status to_reset,
+				enum klmirqd_sem_status to_set,
 				struct mutex* sem);
 
 void up_and_set_stat(struct task_struct* t,
-					enum klitirqd_sem_status to_set,
+					enum klmirqd_sem_status to_set,
 					struct mutex* sem);
 
 
-void release_klitirqd_lock(struct task_struct* t);
+void release_klmirqd_lock(struct task_struct* t);
 
-int reacquire_klitirqd_lock(struct task_struct* t);
+int reacquire_klmirqd_lock(struct task_struct* t);
 
 #endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
old mode 100755
new mode 100644
index aca78a835529..47301c04d862
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -166,7 +166,7 @@ struct rt_job {
 
 struct pfair_param;
 
-enum klitirqd_sem_status
+enum klmirqd_sem_status
 {
 	NEED_TO_REACQUIRE,
 	REACQUIRING,
@@ -223,26 +223,26 @@ struct rt_param {
     /* proxy threads have minimum priority by default */
     unsigned int        is_proxy_thread:1;
 
-	/* pointer to klitirqd currently working on this
+	/* pointer to klmirqd currently working on this
 	   task_struct's behalf.  only set by the task pointed
-	   to by klitirqd.
+	   to by klmirqd.
 
 	   ptr only valid if is_proxy_thread == 0
 	 */
-	struct task_struct* cur_klitirqd;
+	struct task_struct* cur_klmirqd;
 
 	/* Used to implement mutual execution exclusion between
-	 * job and klitirqd execution.  Job must always hold
-	 * it's klitirqd_sem to execute.  klitirqd instance
+	 * job and klmirqd execution.  Job must always hold
+	 * it's klmirqd_sem to execute.  klmirqd instance
 	 * must hold the semaphore before executing on behalf
 	 * of a job.
 	 */
-	struct mutex				klitirqd_sem;
+	struct mutex				klmirqd_sem;
 
-	/* status of held klitirqd_sem, even if the held klitirqd_sem is from
+	/* status of held klmirqd_sem, even if the held klmirqd_sem is from
 	   another task (only proxy threads do this though).
 	 */
-	atomic_t					klitirqd_sem_stat;
+	atomic_t					klmirqd_sem_stat;
 #endif
 
 #ifdef CONFIG_LITMUS_NVIDIA
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 65736b2a9199..e8127f427d56 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -79,10 +79,10 @@ typedef void (*nested_increase_prio_t)(struct task_struct* t, struct task_struct
 typedef void (*nested_decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
 									  raw_spinlock_t *to_unlock, unsigned long irqflags);
 
-typedef void (*increase_prio_klitirq_t)(struct task_struct* klitirqd,
+typedef void (*increase_prio_klitirq_t)(struct task_struct* klmirqd,
                                         struct task_struct* old_owner,
                                         struct task_struct* new_owner);
-typedef void (*decrease_prio_klitirqd_t)(struct task_struct* klitirqd,
+typedef void (*decrease_prio_klmirqd_t)(struct task_struct* klmirqd,
                                          struct task_struct* old_owner);
 
 
@@ -168,8 +168,8 @@ struct sched_plugin {
 #endif
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-    increase_prio_klitirq_t		increase_prio_klitirqd;
-    decrease_prio_klitirqd_t	decrease_prio_klitirqd;
+    increase_prio_klitirq_t		increase_prio_klmirqd;
+    decrease_prio_klmirqd_t	decrease_prio_klmirqd;
 #endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 	enqueue_pai_tasklet_t		enqueue_pai_tasklet;
diff --git a/litmus/Kconfig b/litmus/Kconfig
old mode 100755
new mode 100644
index 16087b9e4e81..f2434b87239b
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -392,11 +392,11 @@ config LITMUS_SOFTIRQD_NONE
 	  Don't schedule tasklets in Litmus.  Default.
 
 config LITMUS_SOFTIRQD
-	bool "Spawn klitirqd interrupt handling threads."
+	bool "Spawn klmirqd interrupt handling threads."
 	help
-	  Create klitirqd interrupt handling threads.  Work must be
+	  Create klmirqd interrupt handling threads.  Work must be
 	  specifically dispatched to these workers.  (Softirqs for
-	  Litmus tasks are not magically redirected to klitirqd.)
+	  Litmus tasks are not magically redirected to klmirqd.)
 
 	  G-EDF/RM, C-EDF/RM ONLY for now!
 
@@ -415,7 +415,7 @@ endchoice
 	   
 
 config NR_LITMUS_SOFTIRQD
-	   int "Number of klitirqd."
+	   int "Number of klmirqd."
 	   depends on LITMUS_SOFTIRQD
 	   range 1 4096
 	   default "1"
@@ -426,7 +426,7 @@ config LITMUS_NVIDIA
 	  bool "Litmus handling of NVIDIA interrupts."
 	  default n
 	  help
-	    Direct tasklets from NVIDIA devices to Litmus's klitirqd
+	    Direct tasklets from NVIDIA devices to Litmus's klmirqd
 		or PAI interrupt handling routines.
 
 		If unsure, say No.
diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c
old mode 100755
new mode 100644
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
old mode 100755
new mode 100644
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
old mode 100755
new mode 100644
diff --git a/litmus/litmus.c b/litmus/litmus.c
old mode 100755
new mode 100644
index 065ef7d3192a..3b8017397e80
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -397,14 +397,14 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 	/* We probably should not have any tasklets executing for
      * us at this time.
 	 */
-    WARN_ON(p->rt_param.cur_klitirqd);
-	WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD);
+    WARN_ON(p->rt_param.cur_klmirqd);
+	WARN_ON(atomic_read(&p->rt_param.klmirqd_sem_stat) == HELD);
 
-	if(p->rt_param.cur_klitirqd)
-		flush_pending(p->rt_param.cur_klitirqd, p);
+	if(p->rt_param.cur_klmirqd)
+		flush_pending(p->rt_param.cur_klmirqd, p);
 
-	if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD)
-		up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem);
+	if(atomic_read(&p->rt_param.klmirqd_sem_stat) == HELD)
+		up_and_set_stat(p, NOT_HELD, &p->rt_param.klmirqd_sem);
 #endif
 
 #ifdef CONFIG_LITMUS_NVIDIA
@@ -479,9 +479,9 @@ long __litmus_admit_task(struct task_struct* tsk)
 #ifdef CONFIG_LITMUS_SOFTIRQD
 	/* proxy thread off by default */
 	tsk_rt(tsk)is_proxy_thread = 0;
-    tsk_rt(tsk)cur_klitirqd = NULL;
-	mutex_init(&tsk_rt(tsk)->klitirqd_sem);
-	atomic_set(&tsk_rt(tsk)->klitirqd_sem_stat, NOT_HELD);
+    tsk_rt(tsk)cur_klmirqd = NULL;
+	mutex_init(&tsk_rt(tsk)->klmirqd_sem);
+	atomic_set(&tsk_rt(tsk)->klmirqd_sem_stat, NOT_HELD);
 #endif
 
 	retval = litmus->admit_task(tsk);
@@ -580,9 +580,9 @@ int switch_sched_plugin(struct sched_plugin* plugin)
 		cpu_relax();
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	if(!klitirqd_is_dead())
+	if(!klmirqd_is_dead())
 	{
-		kill_klitirqd();
+		kill_klmirqd();
 	}
 #endif
 
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
index 9ab7e015a3c1..136fecfb0b8b 100644
--- a/litmus/litmus_proc.c
+++ b/litmus/litmus_proc.c
@@ -21,14 +21,14 @@ static struct proc_dir_entry *litmus_dir = NULL,
 	*release_master_file = NULL,
 #endif
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	*klitirqd_file = NULL,
+	*klmirqd_file = NULL,
 #endif
 	*plugs_file = NULL;
 
 /* in litmus/sync.c */
 int count_tasks_waiting_for_release(void);
 
-extern int proc_read_klitirqd_stats(char *page, char **start,
+extern int proc_read_klmirqd_stats(char *page, char **start,
 									off_t off, int count,
 									int *eof, void *data);
 
@@ -169,9 +169,9 @@ int __init init_litmus_proc(void)
 #endif
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	klitirqd_file =
-		create_proc_read_entry("klitirqd_stats", 0444, litmus_dir,
-							   proc_read_klitirqd_stats, NULL);
+	klmirqd_file =
+		create_proc_read_entry("klmirqd_stats", 0444, litmus_dir,
+							   proc_read_klmirqd_stats, NULL);
 #endif
 
 	stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
@@ -201,8 +201,8 @@ void exit_litmus_proc(void)
 	if (curr_file)
 		remove_proc_entry("active_plugin", litmus_dir);
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	if (klitirqd_file)
-		remove_proc_entry("klitirqd_stats", litmus_dir);
+	if (klmirqd_file)
+		remove_proc_entry("klmirqd_stats", litmus_dir);
 #endif
 #ifdef CONFIG_RELEASE_MASTER
 	if (release_master_file)
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
index 9f7d9da5facb..73a3053e662b 100644
--- a/litmus/litmus_softirq.c
+++ b/litmus/litmus_softirq.c
@@ -20,7 +20,7 @@
 
 
 /* counts number of daemons ready to handle litmus irqs. */
-static atomic_t num_ready_klitirqds = ATOMIC_INIT(0);
+static atomic_t num_ready_klmirqds = ATOMIC_INIT(0);
 
 enum pending_flags
 {
@@ -36,9 +36,9 @@ struct tasklet_head
 	struct tasklet_struct **tail;
 };
 
-struct klitirqd_info
+struct klmirqd_info
 {
-	struct task_struct*		klitirqd;
+	struct task_struct*		klmirqd;
     struct task_struct*     current_owner;
     int						terminating;
 
@@ -56,44 +56,44 @@ struct klitirqd_info
 	struct list_head		worklist;
 };
 
-/* one list for each klitirqd */
-static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD];
+/* one list for each klmirqd */
+static struct klmirqd_info klmirqds[NR_LITMUS_SOFTIRQD];
 
 
-int proc_read_klitirqd_stats(char *page, char **start,
+int proc_read_klmirqd_stats(char *page, char **start,
 							 off_t off, int count,
 							 int *eof, void *data)
 {
 	int len = snprintf(page, PAGE_SIZE,
-				"num ready klitirqds: %d\n\n",
-				atomic_read(&num_ready_klitirqds));
+				"num ready klmirqds: %d\n\n",
+				atomic_read(&num_ready_klmirqds));
 
-	if(klitirqd_is_ready())
+	if(klmirqd_is_ready())
 	{
 		int i;
 		for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
 		{
 			len +=
 				snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
-						 "klitirqd_th%d: %s/%d\n"
+						 "klmirqd_th%d: %s/%d\n"
 						 "\tcurrent_owner: %s/%d\n"
 						 "\tpending: %x\n"
 						 "\tnum hi: %d\n"
 						 "\tnum low: %d\n"
 						 "\tnum work: %d\n\n",
 						 i,
-						 klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid,
-						 (klitirqds[i].current_owner != NULL) ?
-						 	klitirqds[i].current_owner->comm : "(null)",
-						 (klitirqds[i].current_owner != NULL) ?
-							klitirqds[i].current_owner->pid : 0,
-						 klitirqds[i].pending,
-						 atomic_read(&klitirqds[i].num_hi_pending),
-						 atomic_read(&klitirqds[i].num_low_pending),
-						 atomic_read(&klitirqds[i].num_work_pending));
+						 klmirqds[i].klmirqd->comm, klmirqds[i].klmirqd->pid,
+						 (klmirqds[i].current_owner != NULL) ?
+						 	klmirqds[i].current_owner->comm : "(null)",
+						 (klmirqds[i].current_owner != NULL) ?
+							klmirqds[i].current_owner->pid : 0,
+						 klmirqds[i].pending,
+						 atomic_read(&klmirqds[i].num_hi_pending),
+						 atomic_read(&klmirqds[i].num_low_pending),
+						 atomic_read(&klmirqds[i].num_work_pending));
 		}
 	}
 
@@ -107,7 +107,7 @@ int proc_read_klitirqd_stats(char *page, char **start,
 #if 0
 static atomic_t dump_id = ATOMIC_INIT(0);
 
-static void __dump_state(struct klitirqd_info* which, const char* caller)
+static void __dump_state(struct klmirqd_info* which, const char* caller)
 {
 	struct tasklet_struct* list;
 
@@ -118,22 +118,22 @@ static void __dump_state(struct klitirqd_info* which, const char* caller)
 		if(which->current_owner)
 		{
 			TRACE("(id: %d  caller: %s)\n"
-				"klitirqd: %s/%d\n"
+				"klmirqd: %s/%d\n"
 				"current owner: %s/%d\n"
 				"pending: %x\n",
 				id, caller,
-				which->klitirqd->comm, which->klitirqd->pid,
+				which->klmirqd->comm, which->klmirqd->pid,
 				which->current_owner->comm, which->current_owner->pid,
 				which->pending);
 		}
 		else
 		{
 			TRACE("(id: %d  caller: %s)\n"
-				"klitirqd: %s/%d\n"
+				"klmirqd: %s/%d\n"
 				"current owner: %p\n"
 				"pending: %x\n",
 				id, caller,
-				which->klitirqd->comm, which->klitirqd->pid,
+				which->klmirqd->comm, which->klmirqd->pid,
 				NULL,
 				which->pending);
 		}
@@ -151,7 +151,7 @@ static void __dump_state(struct klitirqd_info* which, const char* caller)
 	}
 }
 
-static void dump_state(struct klitirqd_info* which, const char* caller)
+static void dump_state(struct klmirqd_info* which, const char* caller)
 {
 	unsigned long flags;
 
@@ -164,23 +164,23 @@ static void dump_state(struct klitirqd_info* which, const char* caller)
 
 /* forward declarations */
 static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
-									   struct klitirqd_info *which,
+									   struct klmirqd_info *which,
 									   int wakeup);
 static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
-										  struct klitirqd_info *which,
+										  struct klmirqd_info *which,
 										  int wakeup);
 static void ___litmus_schedule_work(struct work_struct *w,
-									struct klitirqd_info *which,
+									struct klmirqd_info *which,
 									int wakeup);
 
 
-inline unsigned int klitirqd_id(struct task_struct* tsk)
+inline unsigned int klmirqd_id(struct task_struct* tsk)
 {
     int i;
     for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
     {
-        if(klitirqds[i].klitirqd == tsk)
+        if(klmirqds[i].klmirqd == tsk)
         {
             return i;
         }
@@ -192,28 +192,28 @@ inline unsigned int klitirqd_id(struct task_struct* tsk)
 }
 
 
-inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which)
+inline static u32 litirq_pending_hi_irqoff(struct klmirqd_info* which)
 {
     return (which->pending & LIT_TASKLET_HI);
 }
 
-inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which)
+inline static u32 litirq_pending_low_irqoff(struct klmirqd_info* which)
 {
     return (which->pending & LIT_TASKLET_LOW);
 }
 
-inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which)
+inline static u32 litirq_pending_work_irqoff(struct klmirqd_info* which)
 {
 	return (which->pending & LIT_WORK);
 }
 
-inline static u32 litirq_pending_irqoff(struct klitirqd_info* which)
+inline static u32 litirq_pending_irqoff(struct klmirqd_info* which)
 {
     return(which->pending);
 }
 
 
-inline static u32 litirq_pending(struct klitirqd_info* which)
+inline static u32 litirq_pending(struct klmirqd_info* which)
 {
     unsigned long flags;
     u32 pending;
@@ -225,7 +225,7 @@ inline static u32 litirq_pending(struct klitirqd_info* which)
     return pending;
 };
 
-inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner)
+inline static u32 litirq_pending_with_owner(struct klmirqd_info* which, struct task_struct* owner)
 {
 	unsigned long flags;
 	u32 pending;
@@ -245,7 +245,7 @@ inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct
 }
 
 
-inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which,
+inline static u32 litirq_pending_and_sem_and_owner(struct klmirqd_info* which,
 				struct mutex** sem,
 				struct task_struct** t)
 {
@@ -264,7 +264,7 @@ inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which,
 		if(which->current_owner != NULL)
 		{
 			*t = which->current_owner;
-			*sem = &tsk_rt(which->current_owner)->klitirqd_sem;
+			*sem = &tsk_rt(which->current_owner)->klmirqd_sem;
 		}
 		else
 		{
@@ -286,7 +286,7 @@ inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which,
 /* returns true if the next piece of work to do is from a different owner.
  */
 static int tasklet_ownership_change(
-				struct klitirqd_info* which,
+				struct klmirqd_info* which,
 				enum pending_flags taskletQ)
 {
 	/* this function doesn't have to look at work objects since they have
@@ -319,16 +319,16 @@ static int tasklet_ownership_change(
 
     raw_spin_unlock_irqrestore(&which->lock, flags);
 
-    TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret);
+    TRACE_TASK(which->klmirqd, "ownership change needed: %d\n", ret);
 
     return ret;
 }
 
 
-static void __reeval_prio(struct klitirqd_info* which)
+static void __reeval_prio(struct klmirqd_info* which)
 {
     struct task_struct* next_owner = NULL;
-	struct task_struct* klitirqd = which->klitirqd;
+	struct task_struct* klmirqd = which->klmirqd;
 
 	/* Check in prio-order */
 	u32 pending = litirq_pending_irqoff(which);
@@ -366,43 +366,43 @@ static void __reeval_prio(struct klitirqd_info* which)
 			if(!in_interrupt())
 			{
 				TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
-						((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
-						((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
+						((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->comm,
+						((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->pid,
 						next_owner->comm, next_owner->pid);
 			}
 			else
 			{
 				TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
-					((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
-					((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
+					((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->comm,
+					((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->pid,
 					next_owner->comm, next_owner->pid);
 			}
 
-			litmus->increase_prio_inheritance_klitirqd(klitirqd, old_owner, next_owner);
+			litmus->increase_prio_inheritance_klmirqd(klmirqd, old_owner, next_owner);
         }
         else
         {
 			if(likely(!in_interrupt()))
 			{
 				TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n",
-						__FUNCTION__, klitirqd->comm, klitirqd->pid);
+						__FUNCTION__, klmirqd->comm, klmirqd->pid);
 			}
 			else
 			{
 				// is this a bug?
 				TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n",
-					__FUNCTION__, klitirqd->comm, klitirqd->pid);
+					__FUNCTION__, klmirqd->comm, klmirqd->pid);
 			}
 
 			BUG_ON(pending != 0);
-			litmus->decrease_prio_inheritance_klitirqd(klitirqd, old_owner, NULL);
+			litmus->decrease_prio_inheritance_klmirqd(klmirqd, old_owner, NULL);
         }
     }
 
 	//__dump_state(which, "__reeval_prio: after");
 }
 
-static void reeval_prio(struct klitirqd_info* which)
+static void reeval_prio(struct klmirqd_info* which)
 {
     unsigned long flags;
 
@@ -412,25 +412,25 @@ static void reeval_prio(struct klitirqd_info* which)
 }
 
 
-static void wakeup_litirqd_locked(struct klitirqd_info* which)
+static void wakeup_litirqd_locked(struct klmirqd_info* which)
 {
 	/* Interrupts are disabled: no need to stop preemption */
-	if (which && which->klitirqd)
+	if (which && which->klmirqd)
 	{
         __reeval_prio(which); /* configure the proper priority */
 
-		if(which->klitirqd->state != TASK_RUNNING)
-		{
-        	TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__,
-			  	which->klitirqd->comm, which->klitirqd->pid);
+        if(which->klmirqd->state != TASK_RUNNING)
+        {
+            TRACE("%s: Waking up klmirqd: %s/%d\n", __FUNCTION__,
+			    which->klmirqd->comm, which->klmirqd->pid);
 
-			wake_up_process(which->klitirqd);
+			wake_up_process(which->klmirqd);
 		}
     }
 }
 
 
-static void do_lit_tasklet(struct klitirqd_info* which,
+static void do_lit_tasklet(struct klmirqd_info* which,
 						   struct tasklet_head* pending_tasklets)
 {
     unsigned long flags;
@@ -503,7 +503,7 @@ static void do_lit_tasklet(struct klitirqd_info* which,
 
 // returns 1 if priorities need to be changed to continue processing
 // pending tasklets.
-static int do_litirq(struct klitirqd_info* which)
+static int do_litirq(struct klmirqd_info* which)
 {
     u32 pending;
     int resched = 0;
@@ -514,17 +514,17 @@ static int do_litirq(struct klitirqd_info* which)
         return(0);
     }
 
-	if(which->klitirqd != current)
+	if(which->klmirqd != current)
 	{
         TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
 				  __FUNCTION__, current->comm, current->pid,
-				  which->klitirqd->comm, which->klitirqd->pid);
+				  which->klmirqd->comm, which->klmirqd->pid);
         return(0);
 	}
 
     if(!is_realtime(current))
     {
-        TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n",
+        TRACE_CUR("%s: exiting early: klmirqd is not real-time. Sched Policy = %d\n",
 				  __FUNCTION__, current->policy);
         return(0);
     }
@@ -567,7 +567,7 @@ static int do_litirq(struct klitirqd_info* which)
 }
 
 
-static void do_work(struct klitirqd_info* which)
+static void do_work(struct klmirqd_info* which)
 {
 	unsigned long flags;
 	work_func_t f;
@@ -646,9 +646,9 @@ static int set_litmus_daemon_sched(void)
 	/* set task params, mark as proxy thread, and init other data */
 	tsk_rt(current)->task_params = tp;
 	tsk_rt(current)->is_proxy_thread = 1;
-	tsk_rt(current)->cur_klitirqd = NULL;
-	mutex_init(&tsk_rt(current)->klitirqd_sem);
-	atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD);
+	tsk_rt(current)->cur_klmirqd = NULL;
+	mutex_init(&tsk_rt(current)->klmirqd_sem);
+	atomic_set(&tsk_rt(current)->klmirqd_sem_stat, NOT_HELD);
 
 	/* inform the OS we're SCHED_LITMUS --
 	   sched_setscheduler_nocheck() calls litmus_admit_task(). */
@@ -657,7 +657,7 @@ static int set_litmus_daemon_sched(void)
     return ret;
 }
 
-static void enter_execution_phase(struct klitirqd_info* which,
+static void enter_execution_phase(struct klmirqd_info* which,
 								  struct mutex* sem,
 								  struct task_struct* t)
 {
@@ -670,14 +670,14 @@ static void enter_execution_phase(struct klitirqd_info* which,
 			  t->comm, t->pid);
 }
 
-static void exit_execution_phase(struct klitirqd_info* which,
+static void exit_execution_phase(struct klmirqd_info* which,
 								 struct mutex* sem,
 								 struct task_struct* t)
 {
 	TRACE_CUR("%s: Exiting execution phase. "
 			  "Releasing semaphore of %s/%d\n", __FUNCTION__,
 			  t->comm, t->pid);
-	if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD)
+	if(atomic_read(&tsk_rt(current)->klmirqd_sem_stat) == HELD)
 	{
 		up_and_set_stat(current, NOT_HELD, sem);
 		TRACE_CUR("%s: Execution phase exited! "
@@ -691,9 +691,9 @@ static void exit_execution_phase(struct klitirqd_info* which,
 }
 
 /* main loop for klitsoftirqd */
-static int run_klitirqd(void* unused)
+static int run_klmirqd(void* unused)
 {
-	struct klitirqd_info* which = &klitirqds[klitirqd_id(current)];
+	struct klmirqd_info* which = &klmirqds[klmirqd_id(current)];
 	struct mutex* sem;
 	struct task_struct* owner;
 
@@ -705,7 +705,7 @@ static int run_klitirqd(void* unused)
         goto rt_failed;
     }
 
-	atomic_inc(&num_ready_klitirqds);
+	atomic_inc(&num_ready_klmirqds);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 
@@ -793,7 +793,7 @@ static int run_klitirqd(void* unused)
 	}
 	__set_current_state(TASK_RUNNING);
 
-	atomic_dec(&num_ready_klitirqds);
+	atomic_dec(&num_ready_klmirqds);
 
 rt_failed:
     litmus_exit_task(current);
@@ -802,57 +802,57 @@ rt_failed:
 }
 
 
-struct klitirqd_launch_data
+struct klmirqd_launch_data
 {
 	int* cpu_affinity;
 	struct work_struct work;
 };
 
 /* executed by a kworker from workqueues */
-static void launch_klitirqd(struct work_struct *work)
+static void launch_klmirqd(struct work_struct *work)
 {
     int i;
 
-	struct klitirqd_launch_data* launch_data =
-		container_of(work, struct klitirqd_launch_data, work);
+	struct klmirqd_launch_data* launch_data =
+		container_of(work, struct klmirqd_launch_data, work);
 
-    TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
+    TRACE("%s: Creating %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
 
     /* create the daemon threads */
     for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
     {
 		if(launch_data->cpu_affinity)
 		{
-			klitirqds[i].klitirqd =
+			klmirqds[i].klmirqd =
 				kthread_create(
-				   run_klitirqd,
+				   run_klmirqd,
 				   /* treat the affinity as a pointer, we'll cast it back later */
 				   (void*)(long long)launch_data->cpu_affinity[i],
-				   "klitirqd_th%d/%d",
+				   "klmirqd_th%d/%d",
 				   i,
 				   launch_data->cpu_affinity[i]);
 
 			/* litmus will put is in the right cluster. */
-			kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]);
+			kthread_bind(klmirqds[i].klmirqd, launch_data->cpu_affinity[i]);
 		}
 		else
 		{
-			klitirqds[i].klitirqd =
+			klmirqds[i].klmirqd =
 				kthread_create(
-				   run_klitirqd,
+				   run_klmirqd,
 				   /* treat the affinity as a pointer, we'll cast it back later */
 				   (void*)(long long)(-1),
-				   "klitirqd_th%d",
+				   "klmirqd_th%d",
 				   i);
 		}
     }
 
-    TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
+    TRACE("%s: Launching %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
 
     /* unleash the daemons */
     for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
     {
-        wake_up_process(klitirqds[i].klitirqd);
+        wake_up_process(klmirqds[i].klmirqd);
     }
 
 	if(launch_data->cpu_affinity)
@@ -861,36 +861,36 @@ static void launch_klitirqd(struct work_struct *work)
 }
 
 
-void spawn_klitirqd(int* affinity)
+void spawn_klmirqd(int* affinity)
 {
     int i;
-    struct klitirqd_launch_data* delayed_launch;
+    struct klmirqd_launch_data* delayed_launch;
 
-	if(atomic_read(&num_ready_klitirqds) != 0)
+	if(atomic_read(&num_ready_klmirqds) != 0)
 	{
-		TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n");
+		TRACE("%s: At least one klmirqd is already running! Need to call kill_klmirqd()?\n");
 		return;
 	}
 
     /* init the tasklet & work queues */
     for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
     {
-		klitirqds[i].terminating = 0;
-		klitirqds[i].pending = 0;
+		klmirqds[i].terminating = 0;
+		klmirqds[i].pending = 0;
 
-		klitirqds[i].num_hi_pending.counter = 0;
-		klitirqds[i].num_low_pending.counter = 0;
-		klitirqds[i].num_work_pending.counter = 0;
+		klmirqds[i].num_hi_pending.counter = 0;
+		klmirqds[i].num_low_pending.counter = 0;
+		klmirqds[i].num_work_pending.counter = 0;
 
-        klitirqds[i].pending_tasklets_hi.head = NULL;
-        klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head;
+        klmirqds[i].pending_tasklets_hi.head = NULL;
+        klmirqds[i].pending_tasklets_hi.tail = &klmirqds[i].pending_tasklets_hi.head;
 
-        klitirqds[i].pending_tasklets.head = NULL;
-        klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head;
+        klmirqds[i].pending_tasklets.head = NULL;
+        klmirqds[i].pending_tasklets.tail = &klmirqds[i].pending_tasklets.head;
 
-		INIT_LIST_HEAD(&klitirqds[i].worklist);
+		INIT_LIST_HEAD(&klmirqds[i].worklist);
 
-		raw_spin_lock_init(&klitirqds[i].lock);
+		raw_spin_lock_init(&klmirqds[i].lock);
     }
 
     /* wait to flush the initializations to memory since other threads
@@ -899,8 +899,8 @@ void spawn_klitirqd(int* affinity)
 
     /* tell a work queue to launch the threads.  we can't make scheduling
        calls since we're in an atomic state. */
-    TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__);
-	delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC);
+    TRACE("%s: Setting callback up to launch klmirqds\n", __FUNCTION__);
+	delayed_launch = kmalloc(sizeof(struct klmirqd_launch_data), GFP_ATOMIC);
 	if(affinity)
 	{
 		delayed_launch->cpu_affinity =
@@ -913,57 +913,57 @@ void spawn_klitirqd(int* affinity)
 	{
 		delayed_launch->cpu_affinity = NULL;
 	}
-    INIT_WORK(&delayed_launch->work, launch_klitirqd);
+    INIT_WORK(&delayed_launch->work, launch_klmirqd);
     schedule_work(&delayed_launch->work);
 }
 
 
-void kill_klitirqd(void)
+void kill_klmirqd(void)
 {
-	if(!klitirqd_is_dead())
+	if(!klmirqd_is_dead())
 	{
     	int i;
 
-    	TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
+        TRACE("%s: Killing %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
 
     	for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
     	{
-			if(klitirqds[i].terminating != 1)
+			if(klmirqds[i].terminating != 1)
 			{
-				klitirqds[i].terminating = 1;
+				klmirqds[i].terminating = 1;
 				mb(); /* just to be sure? */
-				flush_pending(klitirqds[i].klitirqd, NULL);
+				flush_pending(klmirqds[i].klmirqd, NULL);
 
 				/* signal termination */
-       			kthread_stop(klitirqds[i].klitirqd);
+				kthread_stop(klmirqds[i].klmirqd);
 			}
     	}
 	}
 }
 
 
-int klitirqd_is_ready(void)
+int klmirqd_is_ready(void)
 {
-	return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD);
+	return(atomic_read(&num_ready_klmirqds) == NR_LITMUS_SOFTIRQD);
 }
 
-int klitirqd_is_dead(void)
+int klmirqd_is_dead(void)
 {
-	return(atomic_read(&num_ready_klitirqds) == 0);
+	return(atomic_read(&num_ready_klmirqds) == 0);
 }
 
 
-struct task_struct* get_klitirqd(unsigned int k_id)
+struct task_struct* get_klmirqd(unsigned int k_id)
 {
-	return(klitirqds[k_id].klitirqd);
+	return(klmirqds[k_id].klmirqd);
 }
 
 
-void flush_pending(struct task_struct* klitirqd_thread,
+void flush_pending(struct task_struct* klmirqd_thread,
 				   struct task_struct* owner)
 {
-	unsigned int k_id = klitirqd_id(klitirqd_thread);
-	struct klitirqd_info *which = &klitirqds[k_id];
+	unsigned int k_id = klmirqd_id(klmirqd_thread);
+	struct klmirqd_info *which = &klmirqds[k_id];
 
 	unsigned long flags;
 	struct tasklet_struct *list;
@@ -1129,7 +1129,7 @@ void flush_pending(struct task_struct* klitirqd_thread,
 
 
 static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
-									   struct klitirqd_info *which,
+									   struct klmirqd_info *which,
 									   int wakeup)
 {
 	unsigned long flags;
@@ -1153,7 +1153,7 @@ static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
 
 	if(!old_pending && wakeup)
 	{
-		wakeup_litirqd_locked(which); /* wake up the klitirqd */
+		wakeup_litirqd_locked(which); /* wake up the klmirqd */
 	}
 
 	//__dump_state(which, "___litmus_tasklet_schedule: after queuing");
@@ -1172,11 +1172,11 @@ int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
 
     if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
     {
-        TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
+        TRACE("%s: No klmirqd_th%d!\n", __FUNCTION__, k_id);
         BUG();
     }
 
-	if(likely(!klitirqds[k_id].terminating))
+	if(likely(!klmirqds[k_id].terminating))
 	{
 		/* Can't accept tasklets while we're processing a workqueue
 		   because they're handled by the same thread. This case is
@@ -1184,10 +1184,10 @@ int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
 
 		   TODO: Use a separate thread for work objects!!!!!!
          */
-		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
+		if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0))
 		{
 			ret = 1;
-			___litmus_tasklet_schedule(t, &klitirqds[k_id], 1);
+			___litmus_tasklet_schedule(t, &klmirqds[k_id], 1);
 		}
 		else
 		{
@@ -1202,7 +1202,7 @@ EXPORT_SYMBOL(__litmus_tasklet_schedule);
 
 
 static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
-									   struct klitirqd_info *which,
+									   struct klmirqd_info *which,
 									   int wakeup)
 {
 	unsigned long flags;
@@ -1224,7 +1224,7 @@ static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
 
 	if(!old_pending && wakeup)
 	{
-		wakeup_litirqd_locked(which); /* wake up the klitirqd */
+		wakeup_litirqd_locked(which); /* wake up the klmirqd */
 	}
 
     raw_spin_unlock_irqrestore(&which->lock, flags);
@@ -1241,22 +1241,22 @@ int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
 
     if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
     {
-        TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
+        TRACE("%s: No klmirqd_th%d!\n", __FUNCTION__, k_id);
         BUG();
     }
 
-    if(unlikely(!klitirqd_is_ready()))
+    if(unlikely(!klmirqd_is_ready()))
     {
-        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
+        TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id);
         BUG();
     }
 
-	if(likely(!klitirqds[k_id].terminating))
+	if(likely(!klmirqds[k_id].terminating))
 	{
-		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
+		if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0))
 		{
 			ret = 1;
-			___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1);
+			___litmus_tasklet_hi_schedule(t, &klmirqds[k_id], 1);
 		}
 		else
 		{
@@ -1285,36 +1285,36 @@ int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_
 
     if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
     {
-        TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id);
+        TRACE("%s: No klmirqd_th%u!\n", __FUNCTION__, k_id);
         BUG();
     }
 
-    if(unlikely(!klitirqd_is_ready()))
+    if(unlikely(!klmirqd_is_ready()))
     {
-        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
+        TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id);
         BUG();
     }
 
-	if(likely(!klitirqds[k_id].terminating))
+	if(likely(!klmirqds[k_id].terminating))
 	{
-    	raw_spin_lock(&klitirqds[k_id].lock);
+        raw_spin_lock(&klmirqds[k_id].lock);
 
-		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
+		if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0))
 		{
 			ret = 1;  // success!
 
-			t->next = klitirqds[k_id].pending_tasklets_hi.head;
-    		klitirqds[k_id].pending_tasklets_hi.head = t;
+			t->next = klmirqds[k_id].pending_tasklets_hi.head;
+            klmirqds[k_id].pending_tasklets_hi.head = t;
 
-			old_pending = klitirqds[k_id].pending;
-			klitirqds[k_id].pending |= LIT_TASKLET_HI;
+			old_pending = klmirqds[k_id].pending;
+			klmirqds[k_id].pending |= LIT_TASKLET_HI;
 
-			atomic_inc(&klitirqds[k_id].num_hi_pending);
+			atomic_inc(&klmirqds[k_id].num_hi_pending);
 
 			mb();
 
 			if(!old_pending)
-    			wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */
+                wakeup_litirqd_locked(&klmirqds[k_id]); /* wake up the klmirqd */
 		}
 		else
 		{
@@ -1322,7 +1322,7 @@ int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_
 					__FUNCTION__);
 		}
 
-    	raw_spin_unlock(&klitirqds[k_id].lock);
+        raw_spin_unlock(&klmirqds[k_id].lock);
 	}
 	return(ret);
 }
@@ -1332,7 +1332,7 @@ EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
 
 
 static void ___litmus_schedule_work(struct work_struct *w,
-									struct klitirqd_info *which,
+									struct klmirqd_info *which,
 									int wakeup)
 {
 	unsigned long flags;
@@ -1352,7 +1352,7 @@ static void ___litmus_schedule_work(struct work_struct *w,
 
 	if(!old_pending && wakeup)
 	{
-		wakeup_litirqd_locked(which); /* wakeup the klitirqd */
+		wakeup_litirqd_locked(which); /* wakeup the klmirqd */
 	}
 
 	raw_spin_unlock_irqrestore(&which->lock, flags);
@@ -1369,18 +1369,18 @@ int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
 
 	if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
 	{
-		TRACE("%s: No klitirqd_th%u!\n", k_id);
+		TRACE("%s: No klmirqd_th%u!\n", k_id);
 		BUG();
 	}
 
-    if(unlikely(!klitirqd_is_ready()))
+    if(unlikely(!klmirqd_is_ready()))
     {
-        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
+        TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id);
         BUG();
     }
 
-	if(likely(!klitirqds[k_id].terminating))
-		___litmus_schedule_work(w, &klitirqds[k_id], 1);
+	if(likely(!klmirqds[k_id].terminating))
+		___litmus_schedule_work(w, &klmirqds[k_id], 1);
 	else
 		ret = 0;
 	return(ret);
@@ -1388,34 +1388,34 @@ int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
 EXPORT_SYMBOL(__litmus_schedule_work);
 
 
-static int set_klitirqd_sem_status(unsigned long stat)
+static int set_klmirqd_sem_status(unsigned long stat)
 {
 	TRACE_CUR("SETTING STATUS FROM %d TO %d\n",
-					atomic_read(&tsk_rt(current)->klitirqd_sem_stat),
+					atomic_read(&tsk_rt(current)->klmirqd_sem_stat),
 					stat);
-	atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat);
+	atomic_set(&tsk_rt(current)->klmirqd_sem_stat, stat);
 	//mb();
 
 	return(0);
 }
 
-static int set_klitirqd_sem_status_if_not_held(unsigned long stat)
+static int set_klmirqd_sem_status_if_not_held(unsigned long stat)
 {
-	if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD)
+	if(atomic_read(&tsk_rt(current)->klmirqd_sem_stat) != HELD)
 	{
-		return(set_klitirqd_sem_status(stat));
+		return(set_klmirqd_sem_status(stat));
 	}
 	return(-1);
 }
 
 
 void __down_and_reset_and_set_stat(struct task_struct* t,
-					   enum klitirqd_sem_status to_reset,
-					   enum klitirqd_sem_status to_set,
+					   enum klmirqd_sem_status to_reset,
+					   enum klmirqd_sem_status to_set,
 					   struct mutex* sem)
 {
 #if 0
-	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
+	struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem);
 	struct task_struct* task = container_of(param, struct task_struct, rt_param);
 
 	TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
@@ -1423,8 +1423,8 @@ void __down_and_reset_and_set_stat(struct task_struct* t,
 #endif
 
 	mutex_lock_sfx(sem,
-				   set_klitirqd_sem_status_if_not_held, to_reset,
-				   set_klitirqd_sem_status, to_set);
+				   set_klmirqd_sem_status_if_not_held, to_reset,
+				   set_klmirqd_sem_status, to_set);
 #if 0
 	TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
 					__FUNCTION__, task->comm, task->pid);
@@ -1432,11 +1432,11 @@ void __down_and_reset_and_set_stat(struct task_struct* t,
 }
 
 void down_and_set_stat(struct task_struct* t,
-					   enum klitirqd_sem_status to_set,
+					   enum klmirqd_sem_status to_set,
 					   struct mutex* sem)
 {
 #if 0
-	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
+	struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem);
 	struct task_struct* task = container_of(param, struct task_struct, rt_param);
 
 	TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
@@ -1445,7 +1445,7 @@ void down_and_set_stat(struct task_struct* t,
 
 	mutex_lock_sfx(sem,
 				   NULL, 0,
-				   set_klitirqd_sem_status, to_set);
+				   set_klmirqd_sem_status, to_set);
 
 #if 0
 	TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
@@ -1455,11 +1455,11 @@ void down_and_set_stat(struct task_struct* t,
 
 
 void up_and_set_stat(struct task_struct* t,
-					 enum klitirqd_sem_status to_set,
+					 enum klmirqd_sem_status to_set,
 					 struct mutex* sem)
 {
 #if 0
-	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
+	struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem);
 	struct task_struct* task = container_of(param, struct task_struct, rt_param);
 
 	TRACE_CUR("%s: entered.  Unlocking semaphore of %s/%d\n",
@@ -1468,7 +1468,7 @@ void up_and_set_stat(struct task_struct* t,
 #endif
 
 	mutex_unlock_sfx(sem, NULL, 0,
-					 set_klitirqd_sem_status, to_set);
+					 set_klmirqd_sem_status, to_set);
 
 #if 0
 	TRACE_CUR("%s: exiting.  Unlocked semaphore of %s/%d\n",
@@ -1479,33 +1479,33 @@ void up_and_set_stat(struct task_struct* t,
 
 
-void release_klitirqd_lock(struct task_struct* t)
+void release_klmirqd_lock(struct task_struct* t)
 {
-	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD))
+	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klmirqd_sem_stat) == HELD))
 	{
 		struct mutex* sem;
 		struct task_struct* owner = t;
 
 		if(t->state == TASK_RUNNING)
 		{
-			TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n");
+			TRACE_TASK(t, "NOT giving up klmirqd_sem because we're not blocked!\n");
 			return;
 		}
 
 		if(likely(!tsk_rt(t)->is_proxy_thread))
 		{
-			sem = &tsk_rt(t)->klitirqd_sem;
+			sem = &tsk_rt(t)->klmirqd_sem;
 		}
 		else
 		{
-			unsigned int k_id = klitirqd_id(t);
-			owner = klitirqds[k_id].current_owner;
+			unsigned int k_id = klmirqd_id(t);
+			owner = klmirqds[k_id].current_owner;
 
-			BUG_ON(t != klitirqds[k_id].klitirqd);
+			BUG_ON(t != klmirqds[k_id].klmirqd);
 
 			if(likely(owner))
 			{
-				sem = &tsk_rt(owner)->klitirqd_sem;
+				sem = &tsk_rt(owner)->klmirqd_sem;
 			}
 			else
 			{
@@ -1514,7 +1514,7 @@ void release_klitirqd_lock(struct task_struct* t)
 				// We had the rug pulled out from under us.  Abort attempt
 				// to reacquire the lock since our client no longer needs us.
 				TRACE_CUR("HUH?!  How did this happen?\n");
-				atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
+				atomic_set(&tsk_rt(t)->klmirqd_sem_stat, NOT_HELD);
 				return;
 			}
 		}
@@ -1526,42 +1526,42 @@ void release_klitirqd_lock(struct task_struct* t)
 	/*
 	else if(is_realtime(t))
 	{
-		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
+		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klmirqd_sem_stat);
 	}
 	*/
 }
 
-int reacquire_klitirqd_lock(struct task_struct* t)
+int reacquire_klmirqd_lock(struct task_struct* t)
 {
 	int ret = 0;
 
-	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE))
+	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klmirqd_sem_stat) == NEED_TO_REACQUIRE))
 	{
 		struct mutex* sem;
 		struct task_struct* owner = t;
 
 		if(likely(!tsk_rt(t)->is_proxy_thread))
 		{
-			sem = &tsk_rt(t)->klitirqd_sem;
+			sem = &tsk_rt(t)->klmirqd_sem;
 		}
 		else
 		{
-			unsigned int k_id = klitirqd_id(t);
-			//struct task_struct* owner = klitirqds[k_id].current_owner;
-			owner = klitirqds[k_id].current_owner;
+			unsigned int k_id = klmirqd_id(t);
+			//struct task_struct* owner = klmirqds[k_id].current_owner;
+			owner = klmirqds[k_id].current_owner;
 
-			BUG_ON(t != klitirqds[k_id].klitirqd);
+			BUG_ON(t != klmirqds[k_id].klmirqd);
 
 			if(likely(owner))
 			{
-				sem = &tsk_rt(owner)->klitirqd_sem;
+				sem = &tsk_rt(owner)->klmirqd_sem;
 			}
 			else
 			{
 				// We had the rug pulled out from under us.  Abort attempt
 				// to reacquire the lock since our client no longer needs us.
-				TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n");
-				atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
+				TRACE_CUR("No longer needs to reacquire klmirqd_sem!\n");
+				atomic_set(&tsk_rt(t)->klmirqd_sem_stat, NOT_HELD);
 				return(0);
 			}
 		}
@@ -1573,7 +1573,7 @@ int reacquire_klitirqd_lock(struct task_struct* t)
 	/*
 	else if(is_realtime(t))
 	{
-		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
+		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klmirqd_sem_stat);
 	}
 	*/
 
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index d04c6efa5f05..22586cde8255 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -506,7 +506,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
 				}
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-				down_and_set_stat(t, HELD, &tsk_rt(t)->klitirqd_sem);
+				down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem);
 #endif
 				++(reg->nr_owners);
 
@@ -535,7 +535,7 @@ static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
 	nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-    struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
+    struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id);
 #endif
 
 	if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
@@ -549,7 +549,7 @@ static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
 	for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
 		if(reg->owners[i] == t) {
 #ifdef CONFIG_LITMUS_SOFTIRQD
-			flush_pending(klitirqd_th, t);
+			flush_pending(klmirqd_th, t);
 #endif
 			if(reg->max_prio_owner == t) {
 				reg->max_prio_owner = find_hp_owner(reg, t);
@@ -559,7 +559,7 @@ static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
 			}
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-			up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klitirqd_sem);
+			up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem);
 #endif
 
 			reg->owners[i] = NULL;
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 6746d4d6033e..44c8336c5061 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -1143,7 +1143,7 @@ static int __increase_priority_inheritance(struct task_struct* t,
 
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	/* this sanity check allows for weaker locking in protocols */
-	/* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
+	/* TODO (klmirqd): Skip this check if 't' is a proxy thread (???) */
 	if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
 #endif
 		TRACE_TASK(t, "inherits priority from %s/%d\n",
@@ -1238,12 +1238,12 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str
 	__increase_priority_inheritance(t, prio_inh);
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	if(tsk_rt(t)->cur_klitirqd != NULL)
+	if(tsk_rt(t)->cur_klmirqd != NULL)
 	{
 		TRACE_TASK(t, "%s/%d inherits a new priority!\n",
-				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
+				   tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid);
 
-		__increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
+		__increase_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh);
 	}
 #endif
 
@@ -1347,12 +1347,12 @@ static void decrease_priority_inheritance(struct task_struct* t,
 	__decrease_priority_inheritance(t, prio_inh);
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	if(tsk_rt(t)->cur_klitirqd != NULL)
+	if(tsk_rt(t)->cur_klmirqd != NULL)
 	{
 		TRACE_TASK(t, "%s/%d decreases in priority!\n",
-				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
+				   tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid);
 
-		__decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
+		__decrease_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh);
 	}
 #endif
 
@@ -1376,13 +1376,13 @@ static void decrease_priority_inheritance(struct task_struct* t,
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
 /* called with IRQs off */
-static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
+static void increase_priority_inheritance_klmirqd(struct task_struct* klmirqd,
 											  struct task_struct* old_owner,
 											  struct task_struct* new_owner)
 {
-	cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
+	cedf_domain_t* cluster = task_cpu_cluster(klmirqd);
 
-	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
+	BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread));
 
 	raw_spin_lock(&cluster->cluster_lock);
 
@@ -1391,18 +1391,18 @@ static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
 		if(old_owner)
 		{
 			// unreachable?
-			tsk_rt(old_owner)->cur_klitirqd = NULL;
+			tsk_rt(old_owner)->cur_klmirqd = NULL;
 		}
 
-		TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
+		TRACE_TASK(klmirqd, "giving ownership to %s/%d.\n",
 				   new_owner->comm, new_owner->pid);
 
-		tsk_rt(new_owner)->cur_klitirqd = klitirqd;
+		tsk_rt(new_owner)->cur_klmirqd = klmirqd;
 	}
 
-	__decrease_priority_inheritance(klitirqd, NULL);  // kludge to clear out cur prio.
+	__decrease_priority_inheritance(klmirqd, NULL);  // kludge to clear out cur prio.
 
-	__increase_priority_inheritance(klitirqd,
+	__increase_priority_inheritance(klmirqd,
 			(tsk_rt(new_owner)->inh_task == NULL) ?
 				new_owner :
 				tsk_rt(new_owner)->inh_task);
@@ -1412,21 +1412,21 @@ static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
 
 
 /* called with IRQs off */
-static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
+static void decrease_priority_inheritance_klmirqd(struct task_struct* klmirqd,
 												   struct task_struct* old_owner,
 												   struct task_struct* new_owner)
 {
-	cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
+	cedf_domain_t* cluster = task_cpu_cluster(klmirqd);
 
-	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
+	BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread));
 
 	raw_spin_lock(&cluster->cluster_lock);
 
-    TRACE_TASK(klitirqd, "priority restored\n");
+    TRACE_TASK(klmirqd, "priority restored\n");
 
-	__decrease_priority_inheritance(klitirqd, new_owner);
+	__decrease_priority_inheritance(klmirqd, new_owner);
 
-	tsk_rt(old_owner)->cur_klitirqd = NULL;
+	tsk_rt(old_owner)->cur_klmirqd = NULL;
 
 	raw_spin_unlock(&cluster->cluster_lock);
 }
@@ -1859,7 +1859,7 @@ static long cedf_activate_plugin(void)
 			}
 		}
 
-		spawn_klitirqd(affinity);
+		spawn_klmirqd(affinity);
 
 		kfree(affinity);
 	}
@@ -1907,8 +1907,8 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
 	.allocate_aff_obs = cedf_allocate_affinity_observer,
 #endif
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	.increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
-	.decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
+	.increase_prio_klmirqd = increase_priority_inheritance_klmirqd,
+	.decrease_prio_klmirqd = decrease_priority_inheritance_klmirqd,
 #endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 	.enqueue_pai_tasklet = cedf_enqueue_pai_tasklet,
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 04b189e54b03..d52be9325044 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -1154,7 +1154,7 @@ static int __increase_priority_inheritance(struct task_struct* t,
 
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	/* this sanity check allows for weaker locking in protocols */
-	/* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
+	/* TODO (klmirqd): Skip this check if 't' is a proxy thread (???) */
 	if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
 #endif
 		TRACE_TASK(t, "inherits priority from %s/%d\n",
@@ -1248,12 +1248,12 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str
 	success = __increase_priority_inheritance(t, prio_inh);
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	if(tsk_rt(t)->cur_klitirqd != NULL)
+	if(tsk_rt(t)->cur_klmirqd != NULL)
 	{
 		TRACE_TASK(t, "%s/%d inherits a new priority!\n",
-				tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
+				tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid);
 
-		__increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
+		__increase_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh);
 	}
 #endif
 
@@ -1358,12 +1358,12 @@ static void decrease_priority_inheritance(struct task_struct* t,
 	success = __decrease_priority_inheritance(t, prio_inh);
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	if(tsk_rt(t)->cur_klitirqd != NULL)
+	if(tsk_rt(t)->cur_klmirqd != NULL)
 	{
 		TRACE_TASK(t, "%s/%d decreases in priority!\n",
-				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
+				   tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid);
 
-		__decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
+		__decrease_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh);
 	}
 #endif
 
@@ -1384,11 +1384,11 @@ static void decrease_priority_inheritance(struct task_struct* t,
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
 /* called with IRQs off */
-static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
+static void increase_priority_inheritance_klmirqd(struct task_struct* klmirqd,
 											  struct task_struct* old_owner,
 											  struct task_struct* new_owner)
 {
-	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
+	BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread));
 
 	raw_spin_lock(&gsnedf_lock);
 
@@ -1397,18 +1397,18 @@ static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
 		if(old_owner)
 		{
 			// unreachable?
-			tsk_rt(old_owner)->cur_klitirqd = NULL;
+			tsk_rt(old_owner)->cur_klmirqd = NULL;
 		}
 
-		TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
+		TRACE_TASK(klmirqd, "giving ownership to %s/%d.\n",
 				   new_owner->comm, new_owner->pid);
 
-		tsk_rt(new_owner)->cur_klitirqd = klitirqd;
+		tsk_rt(new_owner)->cur_klmirqd = klmirqd;
 	}
 
-	__decrease_priority_inheritance(klitirqd, NULL);  // kludge to clear out cur prio.
+	__decrease_priority_inheritance(klmirqd, NULL);  // kludge to clear out cur prio.
 
-	__increase_priority_inheritance(klitirqd,
+	__increase_priority_inheritance(klmirqd,
 			(tsk_rt(new_owner)->inh_task == NULL) ?
 				new_owner :
 				tsk_rt(new_owner)->inh_task);
@@ -1418,19 +1418,19 @@ static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
 
 
 /* called with IRQs off */
-static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
+static void decrease_priority_inheritance_klmirqd(struct task_struct* klmirqd,
 												   struct task_struct* old_owner,
 												   struct task_struct* new_owner)
 {
-	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
+	BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread));
 
 	raw_spin_lock(&gsnedf_lock);
 
-    TRACE_TASK(klitirqd, "priority restored\n");
+    TRACE_TASK(klmirqd, "priority restored\n");
 
-	__decrease_priority_inheritance(klitirqd, new_owner);
+	__decrease_priority_inheritance(klmirqd, new_owner);
 
-	tsk_rt(old_owner)->cur_klitirqd = NULL;
+	tsk_rt(old_owner)->cur_klmirqd = NULL;
 
 	raw_spin_unlock(&gsnedf_lock);
 }
@@ -1923,7 +1923,7 @@ static long gsnedf_activate_plugin(void)
 #endif
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-    spawn_klitirqd(NULL);
+    spawn_klmirqd(NULL);
 #endif
 
 #ifdef CONFIG_LITMUS_NVIDIA
@@ -1966,8 +1966,8 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
 	.allocate_aff_obs = gsnedf_allocate_affinity_observer,
 #endif
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	.increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
-	.decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
+	.increase_prio_klmirqd = increase_priority_inheritance_klmirqd,
+	.decrease_prio_klmirqd = decrease_priority_inheritance_klmirqd,
 #endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 	.enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet,
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index f9423861eb1f..cda67e0f6bc8 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -168,13 +168,13 @@ static int litmus_dummy___decrease_prio(struct task_struct* t, struct task_struc
 #endif
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-static void litmus_dummy_increase_prio_klitirq(struct task_struct* klitirqd,
+static void litmus_dummy_increase_prio_klmirqd(struct task_struct* klmirqd,
                                        struct task_struct* old_owner,
                                        struct task_struct* new_owner)
 {
 }
 
-static void litmus_dummy_decrease_prio_klitirqd(struct task_struct* klitirqd,
+static void litmus_dummy_decrease_prio_klmirqd(struct task_struct* klmirqd,
                                                 struct task_struct* old_owner)
 {
 }
@@ -264,8 +264,8 @@ struct sched_plugin linux_sched_plugin = {
 	.__compare = litmus_dummy___compare,
 #endif
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	.increase_prio_klitirqd = litmus_dummy_increase_prio_klitirqd,
-	.decrease_prio_klitirqd = litmus_dummy_decrease_prio_klitirqd,
+	.increase_prio_klmirqd = litmus_dummy_increase_prio_klmirqd,
+	.decrease_prio_klmirqd = litmus_dummy_decrease_prio_klmirqd,
 #endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 	.enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet,
@@ -328,8 +328,8 @@ int register_sched_plugin(struct sched_plugin* plugin)
 	CHECK(__compare);
 #endif
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	CHECK(increase_prio_klitirqd);
-	CHECK(decrease_prio_klitirqd);
+	CHECK(increase_prio_klmirqd);
+	CHECK(decrease_prio_klmirqd);
 #endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 	CHECK(enqueue_pai_tasklet);
-- 
cgit v1.2.2


From c8483ef0959672310bf4ebb72e1a308b00543f74 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Tue, 11 Dec 2012 22:01:01 -0500
Subject: make klmirqd work like aux tasks.  checkpoint.

this code is untested!
---
 include/linux/interrupt.h       |    6 +
 include/litmus/aux_tasks.h      |   10 -
 include/litmus/litmus_softirq.h |  164 ++---
 include/litmus/nvidia_info.h    |   35 +-
 include/litmus/rt_param.h       |   70 +-
 include/litmus/sched_plugin.h   |   11 -
 include/litmus/signal.h         |    2 +-
 include/litmus/unistd_32.h      |    5 +-
 include/litmus/unistd_64.h      |    6 +-
 kernel/sched.c                  |    8 -
 kernel/softirq.c                |   14 +
 litmus/Kconfig                  |   20 +-
 litmus/aux_tasks.c              |   40 +-
 litmus/edf_common.c             |   60 +-
 litmus/ikglp_lock.c             |   12 +-
 litmus/jobs.c                   |    2 +-
 litmus/kfmlp_lock.c             |   11 +-
 litmus/litmus.c                 |   48 +-
 litmus/litmus_softirq.c         | 1460 ++++++++++++++-------------------------
 litmus/locking.c                |   56 +-
 litmus/nvidia_info.c            |  743 +++++++++++++++-----
 litmus/sched_cedf.c             |  165 +----
 litmus/sched_gsn_edf.c          |  136 ++--
 litmus/sched_plugin.c           |   20 -
 24 files changed, 1458 insertions(+), 1646 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 57a7bc8807be..9fc31289a1bb 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -507,6 +507,12 @@ struct tasklet_struct
 #endif
 };
 
+struct tasklet_head
+{
+	struct tasklet_struct *head;
+	struct tasklet_struct **tail;
+};
+
 #define DECLARE_TASKLET(name, func, data) \
 struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(0), func, data }
 
diff --git a/include/litmus/aux_tasks.h b/include/litmus/aux_tasks.h
index 87745c1c0df0..255bbafcc6b7 100644
--- a/include/litmus/aux_tasks.h
+++ b/include/litmus/aux_tasks.h
@@ -3,9 +3,6 @@
 
 struct task_struct;
 
-/* admit an aux task with default parameters */
-//int admit_aux_task(struct task_struct *t);
-
 int make_aux_task_if_required(struct task_struct *t);
 
 /* call on an aux task when it exits real-time */
@@ -17,13 +14,6 @@ long enable_aux_task_owner(struct task_struct *t);
 /* call when an aux_owner exits real-time */
 long disable_aux_task_owner(struct task_struct *t);
 
-
-/* collectivelly make all aux tasks in the process of leader inherit from hp */
-//int aux_tasks_increase_priority(struct task_struct *leader, struct task_struct *hp);
-
-/* collectivelly make all aux tasks in the process of leader inherit from hp */
-//int aux_tasks_decrease_priority(struct task_struct *leader, struct task_struct *hp);
-
 /* call when an aux_owner increases its priority */
 int aux_task_owner_increase_priority(struct task_struct *t);
 
diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h
index 46fe89148505..52e3f7e74ab1 100644
--- a/include/litmus/litmus_softirq.h
+++ b/include/litmus/litmus_softirq.h
@@ -5,129 +5,113 @@
 #include <linux/workqueue.h>
 
 /*
-   Threaded tasklet handling for Litmus.  Tasklets
-   are scheduled with the priority of the tasklet's
-   owner---that is, the RT task on behalf the tasklet
-   runs.
-
-   Tasklets are current scheduled in FIFO order with
-   NO priority inheritance for "blocked" tasklets.
+   Threaded tasklet/workqueue handling for Litmus.
+   Items are scheduled in the following order: hi-tasklet,
+   lo-tasklet, workqueue.  Items are scheduled in FIFO order
+   within each of these classes.
 
    klmirqd assumes the priority of the owner of the
    tasklet when the tasklet is next to execute.
 
-   Currently, hi-tasklets are scheduled before
-   low-tasklets, regardless of priority of low-tasklets.
-   And likewise, low-tasklets are scheduled before work
-   queue objects.  This priority inversion probably needs
-   to be fixed, though it is not an issue if our work with
-   GPUs as GPUs are owned (and associated klmirqds) for
-   exclusive time periods, thus no inversions can
-   occur.
+   The base-priority of a klimirqd thread is below all regular
+   real-time tasks, but above all other Linux scheduling
+   classes (klmirqd threads are within the SHCED_LITMUS class).
+   Regular real-time tasks may increase the priority of
+   a klmirqd thread, but klmirqd is unaware of this
+   (this was not the case in prior incarnations of klmirqd).
  */
 
 
-
-#define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD
-
-/* Spawns NR_LITMUS_SOFTIRQD klmirqd daemons.
-   Actual launch of threads is deffered to kworker's
-   workqueue, so daemons will likely not be immediately
-   running when this function returns, though the required
-   data will be initialized.
-
-   @affinity_set: an array expressing the processor affinity
-    for each of the NR_LITMUS_SOFTIRQD daemons.  May be set
-    to NULL for global scheduling.
-
-	- Examples -
-	8-CPU system with two CPU clusters:
-		affinity[] = {0, 0, 0, 0, 3, 3, 3, 3}
-		NOTE: Daemons not actually bound to specified CPU, but rather
-		cluster in which the CPU resides.
-
-	8-CPU system, partitioned:
-		affinity[] = {0, 1, 2, 3, 4, 5, 6, 7}
-
-	FIXME: change array to a CPU topology or array of cpumasks
-
- */
-void spawn_klmirqd(int* affinity);
-
+/* Initialize klmirqd */
+void init_klmirqd(void);
 
 /* Raises a flag to tell klmirqds to terminate.
-   Termination is async, so some threads may be running
-   after function return. */
+ Termination is async, so some threads may be running
+ after function return. */
 void kill_klmirqd(void);
 
+void kill_klmirqd_thread(struct task_struct* klmirqd_thread);
 
 /* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready
-   to handle tasklets. 0, otherwise.*/
+ to handle tasklets. 0, otherwise.*/
 int klmirqd_is_ready(void);
 
 /* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready
-   to handle tasklets. 0, otherwise.*/
+ to handle tasklets. 0, otherwise.*/
 int klmirqd_is_dead(void);
 
-/* Flushes all pending work out to the OS for regular
- * tasklet/work processing of the specified 'owner'
- *
- * PRECOND: klmirqd_thread must have a clear entry
- * in the GPU registry, otherwise this call will become
- * a no-op as work will loop back to the klmirqd_thread.
- *
- * Pass NULL for owner to flush ALL pending items.
+
+typedef int (*klmirqd_cb_t) (void *arg);
+
+typedef struct
+{
+	klmirqd_cb_t func;
+	void* arg;
+} klmirqd_callback_t;
+
+/* Launches a klmirqd thread with the provided affinity.
+
+   Actual launch of threads is deffered to kworker's
+   workqueue, so daemons will likely not be immediately
+   running when this function returns, though the required
+   data will be initialized.
+
+   cpu == -1 for no affinity
  */
-void flush_pending(struct task_struct* klmirqd_thread,
-				   struct task_struct* owner);
+int launch_klmirqd_thread(int cpu, klmirqd_callback_t* cb);
 
-struct task_struct* get_klmirqd(unsigned int k_id);
 
+/* Flushes all pending work out to the OS for regular
+ * tasklet/work processing.
+ */
+void flush_pending(struct task_struct* klmirqd_thread);
 
 extern int __litmus_tasklet_schedule(
         struct tasklet_struct *t,
-        unsigned int k_id);
+		struct task_struct *klmirqd_thread);
 
 /* schedule a tasklet on klmirqd #k_id */
 static inline int litmus_tasklet_schedule(
     struct tasklet_struct *t,
-    unsigned int k_id)
+    struct task_struct *klmirqd_thread)
 {
 	int ret = 0;
-	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
-		ret = __litmus_tasklet_schedule(t, k_id);
+	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+		ret = __litmus_tasklet_schedule(t, klmirqd_thread);
+	}
 	return(ret);
 }
 
 /* for use by __tasklet_schedule() */
 static inline int _litmus_tasklet_schedule(
     struct tasklet_struct *t,
-    unsigned int k_id)
+    struct task_struct *klmirqd_thread)
 {
-    return(__litmus_tasklet_schedule(t, k_id));
+    return(__litmus_tasklet_schedule(t, klmirqd_thread));
 }
 
 
 extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t,
-                                         unsigned int k_id);
+                                        struct task_struct *klmirqd_thread);
 
 /* schedule a hi tasklet on klmirqd #k_id */
 static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t,
-                                              unsigned int k_id)
+                                             struct task_struct *klmirqd_thread)
 {
 	int ret = 0;
-	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
-		ret = __litmus_tasklet_hi_schedule(t, k_id);
+	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+		ret = __litmus_tasklet_hi_schedule(t, klmirqd_thread);
+	}
 	return(ret);
 }
 
 /* for use by __tasklet_hi_schedule() */
 static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t,
-                                               unsigned int k_id)
+                                              struct task_struct *klmirqd_thread)
 {
-    return(__litmus_tasklet_hi_schedule(t, k_id));
+    return(__litmus_tasklet_hi_schedule(t, klmirqd_thread));
 }
 
 
@@ -136,26 +120,27 @@ static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t,
 
 extern int __litmus_tasklet_hi_schedule_first(
     struct tasklet_struct *t,
-    unsigned int k_id);
+    struct task_struct *klmirqd_thread);
 
 /* schedule a hi tasklet on klmirqd #k_id on next go-around */
 /* PRECONDITION: Interrupts must be disabled. */
 static inline int litmus_tasklet_hi_schedule_first(
     struct tasklet_struct *t,
-    unsigned int k_id)
+    struct task_struct *klmirqd_thread)
 {
 	int ret = 0;
-	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
-		ret = __litmus_tasklet_hi_schedule_first(t, k_id);
+	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+		ret = __litmus_tasklet_hi_schedule_first(t, klmirqd_thread);
+	}
 	return(ret);
 }
 
 /* for use by __tasklet_hi_schedule_first() */
 static inline int _litmus_tasklet_hi_schedule_first(
     struct tasklet_struct *t,
-    unsigned int k_id)
+    struct task_struct *klmirqd_thread)
 {
-    return(__litmus_tasklet_hi_schedule_first(t, k_id));
+    return(__litmus_tasklet_hi_schedule_first(t, klmirqd_thread));
 }
 
 
@@ -164,36 +149,13 @@ static inline int _litmus_tasklet_hi_schedule_first(
 
 extern int __litmus_schedule_work(
 	struct work_struct* w,
-	unsigned int k_id);
+	struct task_struct *klmirqd_thread);
 
 static inline int litmus_schedule_work(
 	struct work_struct* w,
-	unsigned int k_id)
+	struct task_struct *klmirqd_thread)
 {
-	return(__litmus_schedule_work(w, k_id));
+	return(__litmus_schedule_work(w, klmirqd_thread));
 }
 
-
-
-///////////// mutex operations for client threads.
-
-void down_and_set_stat(struct task_struct* t,
-					 enum klmirqd_sem_status to_set,
-					 struct mutex* sem);
-
-void __down_and_reset_and_set_stat(struct task_struct* t,
-				enum klmirqd_sem_status to_reset,
-				enum klmirqd_sem_status to_set,
-				struct mutex* sem);
-
-void up_and_set_stat(struct task_struct* t,
-					enum klmirqd_sem_status to_set,
-					struct mutex* sem);
-
-
-
-void release_klmirqd_lock(struct task_struct* t);
-
-int reacquire_klmirqd_lock(struct task_struct* t);
-
 #endif
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
index 97c9577141db..6f354c8b00ac 100644
--- a/include/litmus/nvidia_info.h
+++ b/include/litmus/nvidia_info.h
@@ -6,10 +6,9 @@
 
 #include <litmus/litmus_softirq.h>
 
-
-//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD
 #define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM
-#define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS
+
+/* Functions used for decoding NVIDIA blobs. */
 
 int init_nvidia_info(void);
 void shutdown_nvidia_info(void);
@@ -18,29 +17,33 @@ int is_nvidia_func(void* func_addr);
 
 void dump_nvidia_info(const struct tasklet_struct *t);
 
-
 // Returns the Nvidia device # associated with provided tasklet and work_struct.
 u32 get_tasklet_nv_device_num(const struct tasklet_struct *t);
 u32 get_work_nv_device_num(const struct work_struct *t);
 
 
-int init_nv_device_reg(void);
-//int get_nv_device_id(struct task_struct* owner);
 
-
-int reg_nv_device(int reg_device_id, int register_device, struct task_struct *t);
+/* Functions for figuring out the priority of GPU-using tasks */
 
 struct task_struct* get_nv_max_device_owner(u32 target_device_id);
-//int is_nv_device_owner(u32 target_device_id);
-
-void lock_nv_registry(u32 reg_device_id, unsigned long* flags);
-void unlock_nv_registry(u32 reg_device_id, unsigned long* flags);
 
-#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
-void pai_check_priority_increase(struct task_struct *t, int reg_device_id);
-void pai_check_priority_decrease(struct task_struct *t, int reg_device_id);
+#ifdef CONFIG_LITMUS_SOFTIRQD
+struct task_struct* get_nv_klmirqd_thread(u32 target_device_id);
 #endif
 
-//void increment_nv_int_count(u32 device);
+/* call when the GPU-holding task, t, blocks */
+long enable_gpu_owner(struct task_struct *t);
+
+/* call when the GPU-holding task, t, resumes */
+long disable_gpu_owner(struct task_struct *t);
+
+/* call when the GPU-holding task, t, increases its priority */
+int gpu_owner_increase_priority(struct task_struct *t);
+
+/* call when the GPU-holding task, t, decreases its priority */
+int gpu_owner_decrease_priority(struct task_struct *t);
+
+
+int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t);
 
 #endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 47301c04d862..c8ee64569dbb 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -128,6 +128,10 @@ struct control_page {
 #include <litmus/binheap.h>
 #include <linux/semaphore.h>
 
+#ifdef CONFIG_LITMUS_SOFTIRQD
+#include <linux/interrupt.h>
+#endif
+
 struct _rt_domain;
 struct bheap_node;
 struct release_heap;
@@ -205,6 +209,38 @@ typedef struct avg_est{
 	lt_t avg;
 } avg_est_t;
 
+
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+//struct tasklet_head
+//{
+//	struct tasklet_struct *head;
+//	struct tasklet_struct **tail;
+//};
+
+struct klmirqd_info
+{
+	struct task_struct*		klmirqd;
+    struct task_struct*     current_owner;
+    unsigned int			terminating:1;
+
+	raw_spinlock_t			lock;
+
+	u32						pending;
+	atomic_t				num_hi_pending;
+	atomic_t				num_low_pending;
+	atomic_t				num_work_pending;
+
+	/* in order of priority */
+	struct tasklet_head     pending_tasklets_hi;
+	struct tasklet_head		pending_tasklets;
+	struct list_head		worklist;
+
+	struct list_head		klmirqd_reg;
+};
+#endif
+
+
 /*	RT task parameters for scheduling extensions
  *	These parameters are inherited during clone and therefore must
  *	be explicitly set up before the task set is launched.
@@ -221,34 +257,21 @@ struct rt_param {
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
     /* proxy threads have minimum priority by default */
-    unsigned int        is_proxy_thread:1;
-
-	/* pointer to klmirqd currently working on this
-	   task_struct's behalf.  only set by the task pointed
-	   to by klmirqd.
+    unsigned int        is_interrupt_thread:1;
 
-	   ptr only valid if is_proxy_thread == 0
-	 */
-	struct task_struct* cur_klmirqd;
-
-	/* Used to implement mutual execution exclusion between
-	 * job and klmirqd execution.  Job must always hold
-	 * it's klmirqd_sem to execute.  klmirqd instance
-	 * must hold the semaphore before executing on behalf
-	 * of a job.
-	 */
-	struct mutex				klmirqd_sem;
-
-	/* status of held klmirqd_sem, even if the held klmirqd_sem is from
-	   another task (only proxy threads do this though).
+	/* pointer to data used by klmirqd thread.
+	 *
+	 * ptr only valid if is_interrupt_thread == 1
 	 */
-	atomic_t					klmirqd_sem_stat;
+	struct klmirqd_info* klmirqd_info;
 #endif
 
 #ifdef CONFIG_LITMUS_NVIDIA
 	/* number of top-half interrupts handled on behalf of current job */
 	atomic_t					nv_int_count;
 	long unsigned int			held_gpus;  // bitmap of held GPUs.
+	struct binheap_node			gpu_owner_node; // just one GPU for now...
+	unsigned int				hide_from_gpu:1;
 
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 	avg_est_t gpu_migration_est[MIG_LAST+1];
@@ -370,6 +393,13 @@ struct rt_param {
 	struct control_page * ctrl_page;
 };
 
+//#ifdef CONFIG_LITMUS_SOFTIRQD
+//struct klmirqd_data
+//{
+//	struct binheap		klmirqd_users;
+//};
+//#endif
+
 #ifdef CONFIG_REALTIME_AUX_TASKS
 struct aux_data
 {
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index e8127f427d56..a13d1a2992fe 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -79,12 +79,6 @@ typedef void (*nested_increase_prio_t)(struct task_struct* t, struct task_struct
 typedef void (*nested_decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
 									  raw_spinlock_t *to_unlock, unsigned long irqflags);
 
-typedef void (*increase_prio_klitirq_t)(struct task_struct* klmirqd,
-                                        struct task_struct* old_owner,
-                                        struct task_struct* new_owner);
-typedef void (*decrease_prio_klmirqd_t)(struct task_struct* klmirqd,
-                                         struct task_struct* old_owner);
-
 
 typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet);
 typedef void (*change_prio_pai_tasklet_t)(struct task_struct *old_prio,
@@ -166,11 +160,6 @@ struct sched_plugin {
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 	allocate_affinity_observer_t allocate_aff_obs;
 #endif
-
-#ifdef CONFIG_LITMUS_SOFTIRQD
-    increase_prio_klitirq_t		increase_prio_klmirqd;
-    decrease_prio_klmirqd_t	decrease_prio_klmirqd;
-#endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 	enqueue_pai_tasklet_t		enqueue_pai_tasklet;
 	change_prio_pai_tasklet_t	change_prio_pai_tasklet;
diff --git a/include/litmus/signal.h b/include/litmus/signal.h
index b3d82b294984..38c3207951e0 100644
--- a/include/litmus/signal.h
+++ b/include/litmus/signal.h
@@ -9,7 +9,7 @@
 
 /* Signals used by Litmus to asynchronously communicate events
  * to real-time tasks.
- * 
+ *
  * Signal values overlap with [SIGRTMIN, SIGRTMAX], so beware of
  * application-level conflicts when dealing with COTS user-level
  * code.
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
index c86b743408ed..7265ffadf555 100644
--- a/include/litmus/unistd_32.h
+++ b/include/litmus/unistd_32.h
@@ -19,8 +19,7 @@
 #define __NR_null_call		__LSC(11)
 #define __NR_litmus_dgl_lock	__LSC(12)
 #define __NR_litmus_dgl_unlock	__LSC(13)
-#define __NR_register_nv_device			__LSC(14)
 
-#define __NR_set_aux_tasks		_LSC(15)
+#define __NR_set_aux_tasks		_LSC(14)
 
-#define NR_litmus_syscalls 16
+#define NR_litmus_syscalls 15
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index 3825bc129dbd..51e730124dde 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -33,10 +33,8 @@ __SYSCALL(__NR_null_call, sys_null_call)
 __SYSCALL(__NR_litmus_dgl_lock, sys_litmus_dgl_lock)
 #define __NR_litmus_dgl_unlock		__LSC(13)
 __SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock)
-#define __NR_register_nv_device		__LSC(14)
-__SYSCALL(__NR_register_nv_device, sys_register_nv_device)
 
-#define __NR_set_aux_tasks			__LSC(15)
+#define __NR_set_aux_tasks			__LSC(14)
 __SYSCALL(__NR_set_aux_tasks, sys_set_aux_tasks)
 
-#define NR_litmus_syscalls 16
+#define NR_litmus_syscalls 15
diff --git a/kernel/sched.c b/kernel/sched.c
index 251c89eaf24e..840f87bce097 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4340,10 +4340,6 @@ need_resched:
 	rcu_note_context_switch(cpu);
 	prev = rq->curr;
 
-#ifdef CONFIG_LITMUS_SOFTIRQD
-	release_klitirqd_lock(prev);
-#endif
-
 	/* LITMUS^RT: quickly re-evaluate the scheduling decision
 	 * if the previous one is no longer valid after CTX.
 	 */
@@ -4444,10 +4440,6 @@ litmus_need_resched_nonpreemptible:
 	if (need_resched())
 		goto need_resched;
 
-#ifdef LITMUS_SOFTIRQD
-	reacquire_klitirqd_lock(prev);
-#endif
-
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 	litmus->run_tasklets(prev);
 #endif
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b013046e8c36..053aec196a50 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -403,11 +403,13 @@ void open_softirq(int nr, void (*action)(struct softirq_action *))
 /*
  * Tasklets
  */
+/*
 struct tasklet_head
 {
 	struct tasklet_struct *head;
 	struct tasklet_struct **tail;
 };
+*/
 
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
@@ -522,6 +524,11 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
 #ifdef CONFIG_LITMUS_NVIDIA
 	if(is_nvidia_func(t->func))
 	{	
+#if 1
+		// do nvidia tasklets right away and return
+		if(__do_nv_now(t))
+			return;
+#else
 		u32 nvidia_device = get_tasklet_nv_device_num(t);
 		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
 		//			  __FUNCTION__, nvidia_device,litmus_clock());
@@ -564,6 +571,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
 			}
 		}
 		unlock_nv_registry(nvidia_device, &flags);
+#endif
 	}
 #endif
 
@@ -590,6 +598,11 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
 #ifdef CONFIG_LITMUS_NVIDIA	
 	if(is_nvidia_func(t->func))
 	{	
+#if 1
+		// do nvidia tasklets right away and return
+		if(__do_nv_now(t))
+			return;
+#else
 		u32 nvidia_device = get_tasklet_nv_device_num(t);
 		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
 		//			  __FUNCTION__, nvidia_device,litmus_clock());
@@ -632,6 +645,7 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
 			}
 		}
 		unlock_nv_registry(nvidia_device, &flags);
+#endif
 	}
 #endif
 
diff --git a/litmus/Kconfig b/litmus/Kconfig
index f2434b87239b..9aeae659ae32 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -382,7 +382,7 @@ menu "Interrupt Handling"
 choice 
 	prompt "Scheduling of interrupt bottom-halves in Litmus."
 	default LITMUS_SOFTIRQD_NONE
-	depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ
+	depends on LITMUS_LOCKING
 	help
 		Schedule tasklets with known priorities in Litmus.
 
@@ -398,7 +398,7 @@ config LITMUS_SOFTIRQD
 	  specifically dispatched to these workers.  (Softirqs for
 	  Litmus tasks are not magically redirected to klmirqd.)
 
-	  G-EDF/RM, C-EDF/RM ONLY for now!
+	  G-EDF, C-EDF ONLY for now!
 
 
 config LITMUS_PAI_SOFTIRQD
@@ -409,19 +409,11 @@ config LITMUS_PAI_SOFTIRQD
 	  at the cost of non-preemptive durations of bottom half
 	  processing.
 		 
-	  G-EDF/RM, C-EDF/RM ONLY for now!	 
+	  G-EDF, C-EDF ONLY for now!	 
 		 
 endchoice	   
 	   
 
-config NR_LITMUS_SOFTIRQD
-	   int "Number of klmirqd."
-	   depends on LITMUS_SOFTIRQD
-	   range 1 4096
-	   default "1"
-	   help
-	     Should be <= to the number of CPUs in your system.
-
 config LITMUS_NVIDIA
 	  bool "Litmus handling of NVIDIA interrupts."
 	  default n
@@ -445,7 +437,7 @@ config LITMUS_AFFINITY_AWARE_GPU_ASSINGMENT
 config NV_DEVICE_NUM
 	   int "Number of NVIDIA GPUs."
 	   depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
-	   range 1 4096
+	   range 1 16
 	   default "1"
 	   help
 	     Should be (<= to the number of CPUs) and
@@ -453,11 +445,11 @@ config NV_DEVICE_NUM
 
 config NV_MAX_SIMULT_USERS
 	int "Maximum number of threads sharing a GPU simultanously"
-	depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
+	depends on LITMUS_NVIDIA
 	range 1 3
 	default "2"
 	help
-		Should be equal to the #copy_engines + #execution_engines
+		Should be at least equal to the #copy_engines + #execution_engines
 		of the GPUs in your system.
 
 		Scientific/Professional GPUs = 3  (ex. M2070, Quadro 6000?)
diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c
index 20f477f6e3bc..ef26bba3be77 100644
--- a/litmus/aux_tasks.c
+++ b/litmus/aux_tasks.c
@@ -54,7 +54,7 @@ int exit_aux_task(struct task_struct *t)
 	TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, t->group_leader->comm, t->group_leader->pid);
 
 	tsk_rt(t)->is_aux_task = 0;
-	
+
 #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 	list_del(&tsk_rt(t)->aux_task_node);
 	if (tsk_rt(t)->inh_task) {
@@ -218,36 +218,36 @@ int make_aux_task_if_required(struct task_struct *t)
 {
 	struct task_struct *leader;
 	int retval = 0;
-	
-	read_lock_irq(&tasklist_lock);	
-	
+
+	read_lock_irq(&tasklist_lock);
+
 	leader = t->group_leader;
 
 	if(!tsk_aux(leader)->initialized || !tsk_aux(leader)->aux_future) {
 		goto out;
 	}
-	
+
 	TRACE_CUR("Making %s/%d in %s/%d an aux thread.\n", t->comm, t->pid, leader->comm, leader->pid);
-	
+
 	INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node);
 	INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node);
-	
+
 	retval = admit_aux_task(t);
 	if (retval == 0) {
 		tsk_rt(t)->is_aux_task = 1;
-		
-#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE	
+
+#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 		list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks);
-		
+
 		if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
 			struct task_struct *hp =
 				container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 							 struct task_struct, rt_param);
-			
+
 			TRACE_CUR("hp in group: %s/%d\n", hp->comm, hp->pid);
-			
+
 			retval = litmus->__increase_prio(t, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
-			
+
 			if (retval != 0) {
 				/* don't know how to recover from bugs with prio inheritance.  better just crash. */
 				read_unlock_irq(&tasklist_lock);
@@ -256,7 +256,7 @@ int make_aux_task_if_required(struct task_struct *t)
 		}
 #endif
 	}
-	
+
 out:
 	read_unlock_irq(&tasklist_lock);
 
@@ -385,7 +385,7 @@ static long __do_enable_aux_tasks(int flags)
 	if (flags & AUX_FUTURE) {
 		tsk_aux(leader)->aux_future = 1;
 	}
-	
+
 	t = leader;
 	do {
 		if (!tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->is_aux_task) {
@@ -398,22 +398,22 @@ static long __do_enable_aux_tasks(int flags)
 		TRACE_CUR("Checking task in %s/%d: %s/%d = (p = %llu):\n",
 				  leader->comm, leader->pid, t->comm, t->pid,
 				  tsk_rt(t)->task_params.period);
-		
+
 		/* inspect period to see if it is an rt task */
 		if (tsk_rt(t)->task_params.period == 0) {
 			if (flags && AUX_CURRENT) {
 				if (!tsk_rt(t)->is_aux_task) {
 					int admit_ret;
-					
+
 					TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
 
 					admit_ret = admit_aux_task(t);
-					
+
 					if (admit_ret == 0) {
 						/* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */
 						tsk_rt(t)->is_aux_task = 1;
 						aux_tasks_added = 1;
-						
+
 #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
 						list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks);
 #endif
@@ -464,7 +464,7 @@ static long __do_disable_aux_tasks(int flags)
 	if (flags & AUX_FUTURE) {
 		tsk_aux(leader)->aux_future = 0;
 	}
-	
+
 	if (flags & AUX_CURRENT) {
 		t = leader;
 		do {
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index c279bf12a7f5..27b728a55669 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -73,6 +73,22 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 		return first && !second;
 	}
 
+	/* There is some goofy stuff in this code here.  There are three subclasses
+	 * within the SCHED_LITMUS scheduling class:
+	 * 1) Auxiliary tasks: COTS helper threads from the application level that
+	 *      are forced to be real-time.
+	 * 2) klmirqd interrupt threads: Litmus threaded interrupt handlers.
+	 * 3) Normal Litmus tasks.
+	 *
+	 * At their base priorities, #3 > #2 > #1.  However, #1 and #2 threads might
+	 * inherit a priority from a task of #3.
+	 *
+	 * The code proceeds in the following manner:
+	 * 1) Make aux and klmirqd threads with base-priorities have low priorities.
+	 * 2) Determine effective priorities.
+	 * 3) Perform priority comparison.  Favor #3 over #1 and #2 in case of tie.
+	 */
+
 
 #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_BOOSTED)
 	/* run aux tasks at max priority */
@@ -109,7 +125,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 				return temp;
 			}
 		}
-		
+
 		if (first->rt_param.is_aux_task && second->rt_param.is_aux_task &&
 			first->rt_param.inh_task == second->rt_param.inh_task) {  // inh_task is !NULL for both tasks since neither was a lo_aux task
 			// Both aux tasks inherit from the same task, so tie-break
@@ -120,6 +136,36 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 	}
 #endif
 
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	{
+		int first_lo_klmirqd = first->rt_param.is_interrupt_thread && !first->rt_param.inh_task;
+		int second_lo_klmirqd = second->rt_param.is_interrupt_thread && !second->rt_param.inh_task;
+
+		/* prioritize aux tasks without inheritance below real-time tasks */
+		if (first_lo_klmirqd || second_lo_klmirqd) {
+			// one of these is an klmirqd thread without inheritance.
+			if(first_lo_klmirqd && second_lo_klmirqd) {
+				TRACE_CUR("klmirqd tie break!\n");  // tie-break by BASE priority of the aux tasks
+				goto klmirqd_tie_break;
+			}
+			else {
+				// make the klmirqd thread (second) lowest priority real-time task
+				int temp = (first_lo_klmirqd) ? !is_realtime(second) : !is_realtime(first);
+				TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp);
+				return temp;
+			}
+		}
+
+		if (first->rt_param.is_interrupt_thread && second->rt_param.is_interrupt_thread &&
+			first->rt_param.inh_task == second->rt_param.inh_task) {  // inh_task is !NULL for both tasks since neither was a lo_klmirqd task
+																	  // Both klmirqd tasks inherit from the same task, so tie-break
+																	  // by base priority of the klmirqd tasks.
+			TRACE_CUR("klmirqd tie break!\n");
+			goto klmirqd_tie_break;
+		}
+	}
+#endif
+
 
 #ifdef CONFIG_LITMUS_LOCKING
 	/* Check for EFFECTIVE priorities. Change task
@@ -161,7 +207,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 #endif
 
 aux_tie_break:
-	
+klmirqd_tie_break:
+
 	if (!is_realtime(second_task)) {
 		return 1;
 	}
@@ -230,15 +277,13 @@ aux_tie_break:
 			}
 			else if (first_task->pid == second_task->pid) {
 #ifdef CONFIG_LITMUS_SOFTIRQD
-				if (first_task->rt_param.is_proxy_thread <
-					second_task->rt_param.is_proxy_thread) {
+				if (first_task->rt_param.is_interrupt_thread < second_task->rt_param.is_interrupt_thread) {
 					return 1;
 				}
-				else if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) {
+				else if (first_task->rt_param.is_interrupt_thread == second_task->rt_param.is_interrupt_thread) {
 #endif
 
 #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE)
-				/* is this dead code? */
 				if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) {
 					return 1;
 				}
@@ -246,8 +291,7 @@ aux_tie_break:
 #endif
 
 				/* Something could be wrong if you get this far. */
-				if (unlikely(first->rt_param.inh_task ==
-										second->rt_param.inh_task)) {
+				if (unlikely(first->rt_param.inh_task == second->rt_param.inh_task)) {
 					/* Both tasks have the same inherited priority.
 					 * Likely in a bug-condition.
 				     */
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index b29828344dd1..a4ae74331782 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1960,11 +1960,11 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 		return(NULL);
 	}
 
-	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
-		TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
-				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-//		return(NULL);
-	}
+//	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
+//		TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
+//				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
+////		return(NULL);
+//	}
 
 	ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
 	if(!ikglp_aff) {
@@ -2124,7 +2124,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
 			if(aff->q_info[i].q->count < max_fifo_len) {
 				int want = 0;
 
-				lt_t migration = 
+				lt_t migration =
 					get_gpu_estimate(t,
 								gpu_migration_distance(tsk_rt(t)->last_gpu,
 													replica_to_gpu(aff, i)));
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 9fe4eb1fa168..8593a8d2f107 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -30,7 +30,7 @@ void prepare_for_next_period(struct task_struct *t)
 	 * release and deadline. Lateness may be negative.
 	 */
 	t->rt_param.job_params.lateness =
-		(long long)litmus_clock() - 
+		(long long)litmus_clock() -
 		(long long)t->rt_param.job_params.deadline;
 
 	setup_release(t, get_release(t) + get_rt_period(t));
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index ab472330095d..785a095275e6 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -587,11 +587,11 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
 		return(NULL);
 	}
 
-	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
-		TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
-				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-//		return(NULL);
-	}
+//	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
+//		TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
+//				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
+////		return(NULL);
+//	}
 
 	kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
 	if(!kfmlp_aff) {
@@ -829,6 +829,7 @@ void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* f
 
 	reg_nv_device(gpu, 1, t);  // register
 
+
 	tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
 	reset_gpu_tracker(t);
 	start_gpu_tracker(t);
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 3b8017397e80..fa244ba53e22 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -60,28 +60,6 @@ void bheap_node_free(struct bheap_node* hn)
 struct release_heap* release_heap_alloc(int gfp_flags);
 void release_heap_free(struct release_heap* rh);
 
-#ifdef CONFIG_LITMUS_NVIDIA
-/*
- * sys_register_nv_device
- * @nv_device_id: The Nvidia device id that the task want to register
- * @reg_action: set to '1' to register the specified device. zero otherwise.
- * Syscall for register task's designated nvidia device into NV_DEVICE_REG array
- * Returns EFAULT  if nv_device_id is out of range.
- *	   0       if success
- */
-asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
-{
-	/* register the device to caller (aka 'current') */
-	return(reg_nv_device(nv_device_id, reg_action, current));
-}
-#else
-asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
-{
-	return(-EINVAL);
-}
-#endif
-
-
 /*
  * sys_set_task_rt_param
  * @pid: Pid of the task which scheduling parameters must be changed
@@ -393,22 +371,11 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 //    WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks));
 #endif
 
-#ifdef CONFIG_LITMUS_SOFTIRQD
-	/* We probably should not have any tasklets executing for
-     * us at this time.
-	 */
-    WARN_ON(p->rt_param.cur_klmirqd);
-	WARN_ON(atomic_read(&p->rt_param.klmirqd_sem_stat) == HELD);
-
-	if(p->rt_param.cur_klmirqd)
-		flush_pending(p->rt_param.cur_klmirqd, p);
-
-	if(atomic_read(&p->rt_param.klmirqd_sem_stat) == HELD)
-		up_and_set_stat(p, NOT_HELD, &p->rt_param.klmirqd_sem);
-#endif
 
 #ifdef CONFIG_LITMUS_NVIDIA
 	WARN_ON(p->rt_param.held_gpus != 0);
+
+	INIT_BINHEAP_NODE(&p->rt_param.gpu_owner_node);
 #endif
 
 	/* Cleanup everything else. */
@@ -477,11 +444,9 @@ long __litmus_admit_task(struct task_struct* tsk)
 	//INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, prio_order);  // done by scheduler
 #endif
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	/* proxy thread off by default */
-	tsk_rt(tsk)is_proxy_thread = 0;
-    tsk_rt(tsk)cur_klmirqd = NULL;
-	mutex_init(&tsk_rt(tsk)->klmirqd_sem);
-	atomic_set(&tsk_rt(tsk)->klmirqd_sem_stat, NOT_HELD);
+	/* not an interrupt thread by default */
+	tsk_rt(tsk)->is_interrupt_thread = 0;
+    tsk_rt(tsk)->klmirqd_info = NULL;
 #endif
 
 	retval = litmus->admit_task(tsk);
@@ -580,8 +545,7 @@ int switch_sched_plugin(struct sched_plugin* plugin)
 		cpu_relax();
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	if(!klmirqd_is_dead())
-	{
+	if (!klmirqd_is_dead()) {
 		kill_klmirqd();
 	}
 #endif
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
index 73a3053e662b..44e2d38ad982 100644
--- a/litmus/litmus_softirq.c
+++ b/litmus/litmus_softirq.c
@@ -18,10 +18,6 @@
 
 /* TODO: Remove unneeded mb() and other barriers. */
 
-
-/* counts number of daemons ready to handle litmus irqs. */
-static atomic_t num_ready_klmirqds = ATOMIC_INIT(0);
-
 enum pending_flags
 {
     LIT_TASKLET_LOW = 0x1,
@@ -29,35 +25,313 @@ enum pending_flags
 	LIT_WORK = LIT_TASKLET_HI<<1
 };
 
-/* only support tasklet processing for now. */
-struct tasklet_head
+struct klmirqd_registration
 {
-	struct tasklet_struct *head;
-	struct tasklet_struct **tail;
+	raw_spinlock_t lock;
+	u32 nr_threads;
+	unsigned int initialized:1;
+	unsigned int shuttingdown:1;
+	struct list_head threads;
 };
 
-struct klmirqd_info
+static atomic_t klmirqd_id_gen = ATOMIC_INIT(0);
+
+static struct klmirqd_registration klmirqd_state;
+
+
+
+void init_klmirqd(void)
+{
+	raw_spin_lock_init(&klmirqd_state.lock);
+
+	klmirqd_state.nr_threads = 0;
+	klmirqd_state.initialized = 1;
+	klmirqd_state.shuttingdown = 0;
+	INIT_LIST_HEAD(&klmirqd_state.threads);
+}
+
+static int __klmirqd_is_ready(void)
+{
+	return (klmirqd_state.initialized == 1 && klmirqd_state.shuttingdown == 0);
+}
+
+int klmirqd_is_ready(void)
+{
+	unsigned long flags;
+	int ret;
+
+	raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
+    ret = __klmirqd_is_ready();
+    raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
+
+	return ret;
+}
+
+int klmirqd_is_dead(void)
+{
+	return(!klmirqd_is_ready());
+}
+
+
+void kill_klmirqd(void)
 {
-	struct task_struct*		klmirqd;
-    struct task_struct*     current_owner;
-    int						terminating;
+	if(!klmirqd_is_dead())
+	{
+		unsigned long flags;
+		struct list_head *pos;
+
+		raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
+
+		TRACE("%s: Killing all klmirqd threads! (%d of them)\n", __FUNCTION__, klmirqd_state.nr_threads);
 
+		klmirqd_state.shuttingdown = 1;
 
-	raw_spinlock_t			lock;
+		list_for_each(pos, &klmirqd_state.threads) {
+			struct klmirqd_info* info = list_entry(pos, struct klmirqd_info, klmirqd_reg);
 
-	u32						pending;
-	atomic_t				num_hi_pending;
-	atomic_t				num_low_pending;
-	atomic_t				num_work_pending;
+			if(info->terminating != 1)
+			{
+				info->terminating = 1;
+				mb(); /* just to be sure? */
+				flush_pending(info->klmirqd);
 
-	/* in order of priority */
-	struct tasklet_head     pending_tasklets_hi;
-	struct tasklet_head		pending_tasklets;
-	struct list_head		worklist;
+				/* signal termination */
+				kthread_stop(info->klmirqd);
+			}
+		}
+
+		raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
+	}
+}
+
+
+
+void kill_klmirqd_thread(struct task_struct* klmirqd_thread)
+{
+	unsigned long flags;
+	struct klmirqd_info* info;
+
+	if (!tsk_rt(klmirqd_thread)->is_interrupt_thread) {
+		TRACE("%s/%d is not a klmirqd thread\n", klmirqd_thread->comm, klmirqd_thread->pid);
+		return;
+	}
+
+	TRACE("%s: Killing klmirqd thread %s/%d\n", __FUNCTION__, klmirqd_thread->comm, klmirqd_thread->pid);
+
+	raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
+
+	info = tsk_rt(klmirqd_thread)->klmirqd_info;
+
+	if(info->terminating != 1) {
+		info->terminating = 1;
+		mb();
+
+		flush_pending(klmirqd_thread);
+		kthread_stop(klmirqd_thread);
+	}
+
+	raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
+}
+
+
+
+struct klmirqd_launch_data
+{
+	int cpu_affinity;
+	klmirqd_callback_t* cb;
+	struct work_struct work;
 };
 
-/* one list for each klmirqd */
-static struct klmirqd_info klmirqds[NR_LITMUS_SOFTIRQD];
+static int run_klmirqd(void* callback);
+
+
+/* executed by a kworker from workqueues */
+static void __launch_klmirqd_thread(struct work_struct *work)
+{
+	int id;
+	struct task_struct* thread = NULL;
+	struct klmirqd_launch_data* launch_data =
+		container_of(work, struct klmirqd_launch_data, work);
+
+    TRACE("%s: Creating klmirqd thread\n", __FUNCTION__);
+
+	id = atomic_inc_return(&klmirqd_id_gen);
+
+	if (launch_data->cpu_affinity != -1) {
+		thread = kthread_create(
+					run_klmirqd,
+					/* treat the affinity as a pointer, we'll cast it back later */
+					(void*)launch_data->cb,
+					"klmirqd_th%d/%d",
+					id,
+					launch_data->cpu_affinity);
+
+		/* litmus will put is in the right cluster. */
+		kthread_bind(thread, launch_data->cpu_affinity);
+
+		TRACE("%s: Launching klmirqd_th%d/%d\n", __FUNCTION__, id, launch_data->cpu_affinity);
+	}
+	else {
+		thread = kthread_create(
+					run_klmirqd,
+					/* treat the affinity as a pointer, we'll cast it back later */
+					(void*)launch_data->cb,
+					"klmirqd_th%d",
+					id);
+
+		TRACE("%s: Launching klmirqd_th%d\n", __FUNCTION__, id);
+	}
+
+	if (thread) {
+		wake_up_process(thread);
+	}
+	else {
+		TRACE("Could not create klmirqd/%d thread!\n", id);
+	}
+
+	kfree(launch_data);
+}
+
+
+int launch_klmirqd_thread(int cpu, klmirqd_callback_t* cb)
+{
+    struct klmirqd_launch_data* delayed_launch;
+
+	if (!klmirqd_is_ready()) {
+		TRACE("klmirqd is not ready.  Check that it was initialized!\n");
+		return -1;
+	}
+
+    /* tell a work queue to launch the threads.  we can't make scheduling
+	 calls since we're in an atomic state. */
+	delayed_launch = kmalloc(sizeof(struct klmirqd_launch_data), GFP_ATOMIC);
+	delayed_launch->cpu_affinity = cpu;
+	delayed_launch->cb = cb;
+    INIT_WORK(&delayed_launch->work, __launch_klmirqd_thread);
+    schedule_work(&delayed_launch->work);
+
+	return 0;
+}
+
+
+
+
+#define KLMIRQD_SLICE_NR_JIFFIES 1
+#define KLMIRQD_SLICE_NS ((NSEC_PER_SEC / HZ) * KLMIRQD_SLICE_NR_JIFFIES)
+
+static int set_litmus_daemon_sched(struct task_struct* tsk)
+{
+    int ret = 0;
+
+	struct rt_task tp = {
+		.period = KLMIRQD_SLICE_NS, /* dummy 1 second period */
+		.relative_deadline = KLMIRQD_SLICE_NS,
+		.exec_cost = KLMIRQD_SLICE_NS,
+		.phase = 0,
+		.cpu = task_cpu(current),
+		.budget_policy = NO_ENFORCEMENT,
+		.budget_signal_policy = NO_SIGNALS,
+		.cls = RT_CLASS_BEST_EFFORT
+	};
+
+	struct sched_param param = { .sched_priority = 0};
+
+	TRACE_CUR("Setting %s/%d as daemon thread.\n", tsk->comm, tsk->pid);
+
+	/* set task params */
+	tsk_rt(tsk)->task_params = tp;
+	tsk_rt(tsk)->is_interrupt_thread = 1;
+
+	/* inform the OS we're SCHED_LITMUS --
+	 sched_setscheduler_nocheck() calls litmus_admit_task(). */
+	sched_setscheduler_nocheck(tsk, SCHED_LITMUS, &param);
+
+    return ret;
+}
+
+static int register_klmirqd(struct task_struct* tsk)
+{
+	int retval = 0;
+	unsigned long flags;
+	struct klmirqd_info *info = NULL;
+
+	if (!tsk_rt(tsk)->is_interrupt_thread) {
+		TRACE("Only proxy threads already running in Litmus may become klmirqd threads!\n");
+		WARN_ON(1);
+		retval = -1;
+		goto out;
+	}
+
+	raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
+
+	if (!__klmirqd_is_ready()) {
+		TRACE("klmirqd is not ready! Did you forget to initialize it?\n");
+		WARN_ON(1);
+		retval = -1;
+		goto out_unlock;
+	}
+
+	/* allocate and initialize klmirqd data for the thread */
+	info = kmalloc(sizeof(struct klmirqd_info), GFP_KERNEL);
+	if (!info) {
+		TRACE("Failed to allocate klmirqd_info struct!\n");
+		retval = -1; /* todo: pick better code */
+		goto out_unlock;
+	}
+	memset(info, 0, sizeof(struct klmirqd_info));
+	info->klmirqd = tsk;
+	info->pending_tasklets_hi.tail = &info->pending_tasklets_hi.head;
+	info->pending_tasklets.tail = &info->pending_tasklets.head;
+	INIT_LIST_HEAD(&info->worklist);
+	INIT_LIST_HEAD(&info->klmirqd_reg);
+	raw_spin_lock_init(&info->lock);
+
+
+	/* now register with klmirqd */
+	list_add_tail(&info->klmirqd_reg, &klmirqd_state.threads);
+	++klmirqd_state.nr_threads;
+
+	/* update the task struct to point to klmirqd info */
+	tsk_rt(tsk)->klmirqd_info = info;
+
+out_unlock:
+	raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
+
+out:
+	return retval;
+}
+
+static int unregister_klmirqd(struct task_struct* tsk)
+{
+	int retval = 0;
+	unsigned long flags;
+	struct klmirqd_info *info = tsk_rt(tsk)->klmirqd_info;
+
+	if (!tsk_rt(tsk)->is_interrupt_thread || !info) {
+		TRACE("%s/%d is not a klmirqd thread!\n", tsk->comm, tsk->pid);
+		WARN_ON(1);
+		retval = -1;
+		goto out;
+	}
+
+	raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
+
+	/* remove the entry in the klmirqd thread list */
+	list_del(&info->klmirqd_reg);
+	--klmirqd_state.nr_threads;
+
+	/* remove link to klmirqd info from thread */
+	tsk_rt(tsk)->klmirqd_info = NULL;
+
+	/* clean up memory */
+	kfree(info);
+
+	raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
+
+out:
+	return retval;
+}
+
 
 
@@ -67,35 +341,50 @@ int proc_read_klmirqd_stats(char *page, char **start,
 							 off_t off, int count,
 							 int *eof, void *data)
 {
-	int len = snprintf(page, PAGE_SIZE,
-				"num ready klmirqds: %d\n\n",
-				atomic_read(&num_ready_klmirqds));
-
-	if(klmirqd_is_ready())
-	{
-		int i;
-		for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
-		{
-			len +=
-				snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
-						 "klmirqd_th%d: %s/%d\n"
-						 "\tcurrent_owner: %s/%d\n"
-						 "\tpending: %x\n"
-						 "\tnum hi: %d\n"
-						 "\tnum low: %d\n"
-						 "\tnum work: %d\n\n",
-						 i,
-						 klmirqds[i].klmirqd->comm, klmirqds[i].klmirqd->pid,
-						 (klmirqds[i].current_owner != NULL) ?
-						 	klmirqds[i].current_owner->comm : "(null)",
-						 (klmirqds[i].current_owner != NULL) ?
-							klmirqds[i].current_owner->pid : 0,
-						 klmirqds[i].pending,
-						 atomic_read(&klmirqds[i].num_hi_pending),
-						 atomic_read(&klmirqds[i].num_low_pending),
-						 atomic_read(&klmirqds[i].num_work_pending));
+	unsigned long flags;
+	int len;
+
+	raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
+
+	if (klmirqd_state.initialized) {
+		if (!klmirqd_state.shuttingdown) {
+			struct list_head *pos;
+
+			len = snprintf(page, PAGE_SIZE,
+						   "num ready klmirqds: %d\n\n",
+						   klmirqd_state.nr_threads);
+
+			list_for_each(pos, &klmirqd_state.threads) {
+				struct klmirqd_info* info = list_entry(pos, struct klmirqd_info, klmirqd_reg);
+
+				len +=
+					snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
+							 "klmirqd_thread: %s/%d\n"
+							 "\tcurrent_owner: %s/%d\n"
+							 "\tpending: %x\n"
+							 "\tnum hi: %d\n"
+							 "\tnum low: %d\n"
+							 "\tnum work: %d\n\n",
+							 info->klmirqd->comm, info->klmirqd->pid,
+							 (info->current_owner != NULL) ?
+								info->current_owner->comm : "(null)",
+							 (info->current_owner != NULL) ?
+								info->current_owner->pid : 0,
+							 info->pending,
+							 atomic_read(&info->num_hi_pending),
+							 atomic_read(&info->num_low_pending),
+							 atomic_read(&info->num_work_pending));
+			}
+		}
+		else {
+			len = snprintf(page, PAGE_SIZE, "klmirqd is shutting down\n");
 		}
 	}
+	else {
+		len = snprintf(page, PAGE_SIZE, "klmirqd is not initialized!\n");
+	}
+
+	raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
 
 	return(len);
 }
@@ -162,6 +451,15 @@ static void dump_state(struct klmirqd_info* which, const char* caller)
 #endif
 
 
+
+
+
+
+
+
+
+
+
 /* forward declarations */
 static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
 									   struct klmirqd_info *which,
@@ -174,24 +472,6 @@ static void ___litmus_schedule_work(struct work_struct *w,
 									int wakeup);
 
 
-
-inline unsigned int klmirqd_id(struct task_struct* tsk)
-{
-    int i;
-    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
-    {
-        if(klmirqds[i].klmirqd == tsk)
-        {
-            return i;
-        }
-    }
-
-    BUG();
-
-    return 0;
-}
-
-
 inline static u32 litirq_pending_hi_irqoff(struct klmirqd_info* which)
 {
     return (which->pending & LIT_TASKLET_HI);
@@ -225,200 +505,11 @@ inline static u32 litirq_pending(struct klmirqd_info* which)
     return pending;
 };
 
-inline static u32 litirq_pending_with_owner(struct klmirqd_info* which, struct task_struct* owner)
-{
-	unsigned long flags;
-	u32 pending;
-
-	raw_spin_lock_irqsave(&which->lock, flags);
-	pending = litirq_pending_irqoff(which);
-	if(pending)
-	{
-		if(which->current_owner != owner)
-		{
-			pending = 0;  // owner switch!
-		}
-	}
-	raw_spin_unlock_irqrestore(&which->lock, flags);
-
-	return pending;
-}
-
-
-inline static u32 litirq_pending_and_sem_and_owner(struct klmirqd_info* which,
-				struct mutex** sem,
-				struct task_struct** t)
-{
-	unsigned long flags;
-	u32 pending;
-
-	/* init values */
-	*sem = NULL;
-	*t = NULL;
-
-	raw_spin_lock_irqsave(&which->lock, flags);
-
-	pending = litirq_pending_irqoff(which);
-	if(pending)
-	{
-		if(which->current_owner != NULL)
-		{
-			*t = which->current_owner;
-			*sem = &tsk_rt(which->current_owner)->klmirqd_sem;
-		}
-		else
-		{
-			BUG();
-		}
-	}
-	raw_spin_unlock_irqrestore(&which->lock, flags);
-
-	if(likely(*sem))
-	{
-		return pending;
-	}
-	else
-	{
-		return 0;
-	}
-}
-
-/* returns true if the next piece of work to do is from a different owner.
- */
-static int tasklet_ownership_change(
-				struct klmirqd_info* which,
-				enum pending_flags taskletQ)
-{
-	/* this function doesn't have to look at work objects since they have
-	   priority below tasklets. */
-
-    unsigned long flags;
-    int ret = 0;
-
-    raw_spin_lock_irqsave(&which->lock, flags);
-
-	switch(taskletQ)
-	{
-	case LIT_TASKLET_HI:
-		if(litirq_pending_hi_irqoff(which))
-		{
-			ret = (which->pending_tasklets_hi.head->owner !=
-						which->current_owner);
-		}
-		break;
-	case LIT_TASKLET_LOW:
-		if(litirq_pending_low_irqoff(which))
-		{
-			ret = (which->pending_tasklets.head->owner !=
-						which->current_owner);
-		}
-		break;
-	default:
-		break;
-	}
-
-    raw_spin_unlock_irqrestore(&which->lock, flags);
-
-    TRACE_TASK(which->klmirqd, "ownership change needed: %d\n", ret);
-
-    return ret;
-}
-
-
-static void __reeval_prio(struct klmirqd_info* which)
-{
-    struct task_struct* next_owner = NULL;
-	struct task_struct* klmirqd = which->klmirqd;
-
-	/* Check in prio-order */
-	u32 pending = litirq_pending_irqoff(which);
-
-	//__dump_state(which, "__reeval_prio: before");
-
-	if(pending)
-	{
-		if(pending & LIT_TASKLET_HI)
-		{
-			next_owner = which->pending_tasklets_hi.head->owner;
-		}
-		else if(pending & LIT_TASKLET_LOW)
-		{
-			next_owner = which->pending_tasklets.head->owner;
-		}
-		else if(pending & LIT_WORK)
-		{
-			struct work_struct* work =
-				list_first_entry(&which->worklist, struct work_struct, entry);
-			next_owner = work->owner;
-		}
-	}
-
-	if(next_owner != which->current_owner)
-	{
-		struct task_struct* old_owner = which->current_owner;
-
-		/* bind the next owner. */
-		which->current_owner = next_owner;
-		mb();
-
-        if(next_owner != NULL)
-        {
-			if(!in_interrupt())
-			{
-				TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
-						((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->comm,
-						((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->pid,
-						next_owner->comm, next_owner->pid);
-			}
-			else
-			{
-				TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
-					((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->comm,
-					((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->pid,
-					next_owner->comm, next_owner->pid);
-			}
-
-			litmus->increase_prio_inheritance_klmirqd(klmirqd, old_owner, next_owner);
-        }
-        else
-        {
-			if(likely(!in_interrupt()))
-			{
-				TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n",
-						__FUNCTION__, klmirqd->comm, klmirqd->pid);
-			}
-			else
-			{
-				// is this a bug?
-				TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n",
-					__FUNCTION__, klmirqd->comm, klmirqd->pid);
-			}
-
-			BUG_ON(pending != 0);
-			litmus->decrease_prio_inheritance_klmirqd(klmirqd, old_owner, NULL);
-        }
-    }
-
-	//__dump_state(which, "__reeval_prio: after");
-}
-
-static void reeval_prio(struct klmirqd_info* which)
-{
-    unsigned long flags;
-
-    raw_spin_lock_irqsave(&which->lock, flags);
-    __reeval_prio(which);
-    raw_spin_unlock_irqrestore(&which->lock, flags);
-}
-
-
 static void wakeup_litirqd_locked(struct klmirqd_info* which)
 {
 	/* Interrupts are disabled: no need to stop preemption */
 	if (which && which->klmirqd)
 	{
-        __reeval_prio(which); /* configure the proper priority */
-
         if(which->klmirqd->state != TASK_RUNNING)
         {
             TRACE("%s: Waking up klmirqd: %s/%d\n", __FUNCTION__,
@@ -468,7 +559,7 @@ static void do_lit_tasklet(struct klmirqd_info* which,
 		list = list->next;
 
         /* execute tasklet if it has my priority and is free */
-		if ((t->owner == which->current_owner) && tasklet_trylock(t)) {
+		if (tasklet_trylock(t)) {
 			if (!atomic_read(&t->count)) {
 
 				sched_trace_tasklet_begin(t->owner);
@@ -503,15 +594,14 @@ static void do_lit_tasklet(struct klmirqd_info* which,
 
 // returns 1 if priorities need to be changed to continue processing
 // pending tasklets.
-static int do_litirq(struct klmirqd_info* which)
+static void do_litirq(struct klmirqd_info* which)
 {
     u32 pending;
-    int resched = 0;
 
     if(in_interrupt())
     {
         TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__);
-        return(0);
+		return;
     }
 
 	if(which->klmirqd != current)
@@ -519,59 +609,40 @@ static int do_litirq(struct klmirqd_info* which)
         TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
 				  __FUNCTION__, current->comm, current->pid,
 				  which->klmirqd->comm, which->klmirqd->pid);
-        return(0);
+        return;
 	}
 
     if(!is_realtime(current))
     {
         TRACE_CUR("%s: exiting early: klmirqd is not real-time. Sched Policy = %d\n",
 				  __FUNCTION__, current->policy);
-        return(0);
+		return;
     }
 
 
     /* We only handle tasklets & work objects, no need for RCU triggers? */
 
     pending = litirq_pending(which);
-    if(pending)
-    {
+    if(pending) {
         /* extract the work to do and do it! */
-        if(pending & LIT_TASKLET_HI)
-        {
+        if(pending & LIT_TASKLET_HI) {
             TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__);
             do_lit_tasklet(which, &which->pending_tasklets_hi);
-            resched = tasklet_ownership_change(which, LIT_TASKLET_HI);
-
-            if(resched)
-            {
-                TRACE_CUR("%s: HI tasklets of another owner remain. "
-						  "Skipping any LOW tasklets.\n", __FUNCTION__);
-            }
         }
 
-        if(!resched && (pending & LIT_TASKLET_LOW))
-        {
+        if(pending & LIT_TASKLET_LOW) {
             TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__);
 			do_lit_tasklet(which, &which->pending_tasklets);
-			resched = tasklet_ownership_change(which, LIT_TASKLET_LOW);
-
-            if(resched)
-            {
-                TRACE_CUR("%s: LOW tasklets of another owner remain. "
-						  "Skipping any work objects.\n", __FUNCTION__);
-            }
         }
     }
-
-	return(resched);
 }
 
 
 static void do_work(struct klmirqd_info* which)
 {
 	unsigned long flags;
-	work_func_t f;
 	struct work_struct* work;
+	work_func_t f;
 
 	// only execute one work-queue item to yield to tasklets.
 	// ...is this a good idea, or should we just batch them?
@@ -594,125 +665,58 @@ static void do_work(struct klmirqd_info* which)
 	raw_spin_unlock_irqrestore(&which->lock, flags);
 
 
+	TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
+	// do the work!
+	work_clear_pending(work);
+	f = work->func;
+	f(work);  /* can't touch 'work' after this point,
+			   the user may have freed it. */
 
-	/* safe to read current_owner outside of lock since only this thread
-	 may write to the pointer. */
-	if(work->owner == which->current_owner)
-	{
-		TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
-		// do the work!
-		work_clear_pending(work);
-		f = work->func;
-		f(work);  /* can't touch 'work' after this point,
-				   the user may have freed it. */
-
-		atomic_dec(&which->num_work_pending);
-	}
-	else
-	{
-		TRACE_CUR("%s: Could not invoke work object.  Requeuing.\n",
-				  __FUNCTION__);
-		___litmus_schedule_work(work, which, 0);
-	}
+	atomic_dec(&which->num_work_pending);
 
 no_work:
 	return;
 }
 
 
-static int set_litmus_daemon_sched(void)
-{
-    /* set up a daemon job that will never complete.
-       it should only ever run on behalf of another
-       real-time task.
-
-       TODO: Transition to a new job whenever a
-       new tasklet is handled */
-
-    int ret = 0;
-
-	struct rt_task tp = {
-		.exec_cost = 0,
-		.period = 1000000000, /* dummy 1 second period */
-		.phase = 0,
-		.cpu = task_cpu(current),
-		.budget_policy = NO_ENFORCEMENT,
-		.cls = RT_CLASS_BEST_EFFORT
-	};
-
-	struct sched_param param = { .sched_priority = 0};
-
-
-	/* set task params, mark as proxy thread, and init other data */
-	tsk_rt(current)->task_params = tp;
-	tsk_rt(current)->is_proxy_thread = 1;
-	tsk_rt(current)->cur_klmirqd = NULL;
-	mutex_init(&tsk_rt(current)->klmirqd_sem);
-	atomic_set(&tsk_rt(current)->klmirqd_sem_stat, NOT_HELD);
-
-	/* inform the OS we're SCHED_LITMUS --
-	   sched_setscheduler_nocheck() calls litmus_admit_task(). */
-	sched_setscheduler_nocheck(current, SCHED_LITMUS, &param);
-
-    return ret;
-}
-
-static void enter_execution_phase(struct klmirqd_info* which,
-								  struct mutex* sem,
-								  struct task_struct* t)
-{
-	TRACE_CUR("%s: Trying to enter execution phase. "
-			  "Acquiring semaphore of %s/%d\n", __FUNCTION__,
-			  t->comm, t->pid);
-	down_and_set_stat(current, HELD, sem);
-	TRACE_CUR("%s: Execution phase entered! "
-			  "Acquired semaphore of %s/%d\n", __FUNCTION__,
-			  t->comm, t->pid);
-}
-
-static void exit_execution_phase(struct klmirqd_info* which,
-								 struct mutex* sem,
-								 struct task_struct* t)
-{
-	TRACE_CUR("%s: Exiting execution phase. "
-			  "Releasing semaphore of %s/%d\n", __FUNCTION__,
-			  t->comm, t->pid);
-	if(atomic_read(&tsk_rt(current)->klmirqd_sem_stat) == HELD)
-	{
-		up_and_set_stat(current, NOT_HELD, sem);
-		TRACE_CUR("%s: Execution phase exited! "
-				  "Released semaphore of %s/%d\n", __FUNCTION__,
-				  t->comm, t->pid);
-	}
-	else
-	{
-		TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__);
-	}
-}
 
 /* main loop for klitsoftirqd */
-static int run_klmirqd(void* unused)
+static int run_klmirqd(void* callback)
 {
-	struct klmirqd_info* which = &klmirqds[klmirqd_id(current)];
-	struct mutex* sem;
-	struct task_struct* owner;
-
-    int rt_status = set_litmus_daemon_sched();
+    int retval = 0;
+	struct klmirqd_info* info = NULL;
+	klmirqd_callback_t* cb = (klmirqd_callback_t*)(callback);
 
-    if(rt_status != 0)
-    {
+	retval = set_litmus_daemon_sched(current);
+    if (retval != 0) {
         TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
-        goto rt_failed;
+        goto failed;
     }
 
-	atomic_inc(&num_ready_klmirqds);
+	retval = register_klmirqd(current);
+	if (retval != 0) {
+		TRACE_CUR("%s: Failed to become a klmirqd thread.\n", __FUNCTION__);
+		goto failed;
+	}
+
+	if (cb && cb->func) {
+		retval = cb->func(cb->arg);
+		if (retval != 0) {
+			TRACE_CUR("%s: klmirqd callback reported failure. retval = %d\n", __FUNCTION__, retval);
+			goto failed_unregister;
+		}
+	}
+
+	/* enter the interrupt handling workloop */
+
+	info = tsk_rt(current)->klmirqd_info;
 
 	set_current_state(TASK_INTERRUPTIBLE);
 
 	while (!kthread_should_stop())
 	{
 		preempt_disable();
-		if (!litirq_pending(which))
+		if (!litirq_pending(info))
 		{
             /* sleep for work */
             TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n",
@@ -731,17 +735,10 @@ static int run_klmirqd(void* unused)
 
 		__set_current_state(TASK_RUNNING);
 
-		while (litirq_pending_and_sem_and_owner(which, &sem, &owner))
+		while (litirq_pending(info))
 		{
-			int needs_resched = 0;
-
 			preempt_enable_no_resched();
 
-			BUG_ON(sem == NULL);
-
-			// wait to enter execution phase; wait for 'current_owner' to block.
-			enter_execution_phase(which, sem, owner);
-
 			if(kthread_should_stop())
 			{
 				TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
@@ -753,36 +750,23 @@ static int run_klmirqd(void* unused)
 			/* Double check that there's still pending work and the owner hasn't
 			 * changed. Pending items may have been flushed while we were sleeping.
 			 */
-			if(litirq_pending_with_owner(which, owner))
+			if(litirq_pending(info))
 			{
 				TRACE_CUR("%s: Executing tasklets and/or work objects.\n",
 						  __FUNCTION__);
 
-				needs_resched = do_litirq(which);
+				do_litirq(info);
 
 				preempt_enable_no_resched();
 
 				// work objects are preemptible.
-				if(!needs_resched)
-				{
-					do_work(which);
-				}
-
-				// exit execution phase.
-				exit_execution_phase(which, sem, owner);
-
-				TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__);
-				reeval_prio(which); /* check if we need to change priority here */
+				do_work(info);
 			}
 			else
 			{
-				TRACE_CUR("%s: Pending work was flushed!  Prev owner was %s/%d\n",
-								__FUNCTION__,
-								owner->comm, owner->pid);
-				preempt_enable_no_resched();
+				TRACE_CUR("%s: Pending work was flushed!\n", __FUNCTION__);
 
-				// exit execution phase.
-				exit_execution_phase(which, sem, owner);
+				preempt_enable_no_resched();
 			}
 
 			cond_resched();
@@ -793,182 +777,38 @@ static int run_klmirqd(void* unused)
 	}
 	__set_current_state(TASK_RUNNING);
 
-	atomic_dec(&num_ready_klmirqds);
+failed_unregister:
+	/* remove our registration from klmirqd */
+	unregister_klmirqd(current);
 
-rt_failed:
+failed:
     litmus_exit_task(current);
 
-	return rt_status;
+	return retval;
 }
 
 
-struct klmirqd_launch_data
-{
-	int* cpu_affinity;
-	struct work_struct work;
-};
-
-/* executed by a kworker from workqueues */
-static void launch_klmirqd(struct work_struct *work)
+void flush_pending(struct task_struct* tsk)
 {
-    int i;
-
-	struct klmirqd_launch_data* launch_data =
-		container_of(work, struct klmirqd_launch_data, work);
-
-    TRACE("%s: Creating %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
-
-    /* create the daemon threads */
-    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
-    {
-		if(launch_data->cpu_affinity)
-		{
-			klmirqds[i].klmirqd =
-				kthread_create(
-				   run_klmirqd,
-				   /* treat the affinity as a pointer, we'll cast it back later */
-				   (void*)(long long)launch_data->cpu_affinity[i],
-				   "klmirqd_th%d/%d",
-				   i,
-				   launch_data->cpu_affinity[i]);
-
-			/* litmus will put is in the right cluster. */
-			kthread_bind(klmirqds[i].klmirqd, launch_data->cpu_affinity[i]);
-		}
-		else
-		{
-			klmirqds[i].klmirqd =
-				kthread_create(
-				   run_klmirqd,
-				   /* treat the affinity as a pointer, we'll cast it back later */
-				   (void*)(long long)(-1),
-				   "klmirqd_th%d",
-				   i);
-		}
-    }
-
-    TRACE("%s: Launching %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
-
-    /* unleash the daemons */
-    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
-    {
-        wake_up_process(klmirqds[i].klmirqd);
-    }
-
-	if(launch_data->cpu_affinity)
-		kfree(launch_data->cpu_affinity);
-	kfree(launch_data);
-}
-
+	unsigned long flags;
+	struct tasklet_struct *list;
+	u32 work_flushed = 0;
 
-void spawn_klmirqd(int* affinity)
-{
-    int i;
-    struct klmirqd_launch_data* delayed_launch;
+	struct klmirqd_info *which;
 
-	if(atomic_read(&num_ready_klmirqds) != 0)
-	{
-		TRACE("%s: At least one klmirqd is already running! Need to call kill_klmirqd()?\n");
+	if (!tsk_rt(tsk)->is_interrupt_thread) {
+		TRACE("%s/%d is not a proxy thread\n", tsk->comm, tsk->pid);
+		WARN_ON(1);
 		return;
 	}
 
-    /* init the tasklet & work queues */
-    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
-    {
-		klmirqds[i].terminating = 0;
-		klmirqds[i].pending = 0;
-
-		klmirqds[i].num_hi_pending.counter = 0;
-		klmirqds[i].num_low_pending.counter = 0;
-		klmirqds[i].num_work_pending.counter = 0;
-
-        klmirqds[i].pending_tasklets_hi.head = NULL;
-        klmirqds[i].pending_tasklets_hi.tail = &klmirqds[i].pending_tasklets_hi.head;
-
-        klmirqds[i].pending_tasklets.head = NULL;
-        klmirqds[i].pending_tasklets.tail = &klmirqds[i].pending_tasklets.head;
-
-		INIT_LIST_HEAD(&klmirqds[i].worklist);
-
-		raw_spin_lock_init(&klmirqds[i].lock);
-    }
-
-    /* wait to flush the initializations to memory since other threads
-       will access it. */
-    mb();
-
-    /* tell a work queue to launch the threads.  we can't make scheduling
-       calls since we're in an atomic state. */
-    TRACE("%s: Setting callback up to launch klmirqds\n", __FUNCTION__);
-	delayed_launch = kmalloc(sizeof(struct klmirqd_launch_data), GFP_ATOMIC);
-	if(affinity)
-	{
-		delayed_launch->cpu_affinity =
-			kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC);
-
-		memcpy(delayed_launch->cpu_affinity, affinity,
-			sizeof(int)*NR_LITMUS_SOFTIRQD);
-	}
-	else
-	{
-		delayed_launch->cpu_affinity = NULL;
-	}
-    INIT_WORK(&delayed_launch->work, launch_klmirqd);
-    schedule_work(&delayed_launch->work);
-}
-
-
-void kill_klmirqd(void)
-{
-	if(!klmirqd_is_dead())
-	{
-    	int i;
-
-        TRACE("%s: Killing %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
-
-    	for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
-    	{
-			if(klmirqds[i].terminating != 1)
-			{
-				klmirqds[i].terminating = 1;
-				mb(); /* just to be sure? */
-				flush_pending(klmirqds[i].klmirqd, NULL);
-
-				/* signal termination */
-				kthread_stop(klmirqds[i].klmirqd);
-			}
-    	}
+	which = tsk_rt(tsk)->klmirqd_info;
+	if (!which) {
+		TRACE("%s/%d is not a klmirqd thread!\n", tsk->comm, tsk->pid);
+		WARN_ON(1);
+		return;
 	}
-}
-
-
-int klmirqd_is_ready(void)
-{
-	return(atomic_read(&num_ready_klmirqds) == NR_LITMUS_SOFTIRQD);
-}
-
-int klmirqd_is_dead(void)
-{
-	return(atomic_read(&num_ready_klmirqds) == 0);
-}
-
-
-struct task_struct* get_klmirqd(unsigned int k_id)
-{
-	return(klmirqds[k_id].klmirqd);
-}
-
-
-void flush_pending(struct task_struct* klmirqd_thread,
-				   struct task_struct* owner)
-{
-	unsigned int k_id = klmirqd_id(klmirqd_thread);
-	struct klmirqd_info *which = &klmirqds[k_id];
-
-	unsigned long flags;
-	struct tasklet_struct *list;
 
-	u32 work_flushed = 0;
 
 	raw_spin_lock_irqsave(&which->lock, flags);
 
@@ -990,35 +830,27 @@ void flush_pending(struct task_struct* klmirqd_thread,
 			struct tasklet_struct *t = list;
 			list = list->next;
 
-			if(likely((t->owner == owner) || (owner == NULL)))
+			if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
 			{
-				if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
-				{
-					BUG();
-				}
+				BUG();
+			}
 
-				work_flushed |= LIT_TASKLET_HI;
+			work_flushed |= LIT_TASKLET_HI;
 
-				t->owner = NULL;
+			t->owner = NULL;
 
-				// WTF?
-				if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
-				{
-					atomic_dec(&which->num_hi_pending);
-					___tasklet_hi_schedule(t);
-				}
-				else
-				{
-					TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
-					BUG();
-				}
+			// WTF?
+			if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+			{
+				atomic_dec(&which->num_hi_pending);
+				___tasklet_hi_schedule(t);
 			}
 			else
 			{
-				TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__);
-				// put back on queue.
-				___litmus_tasklet_hi_schedule(t, which, 0);
+				TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
+				BUG();
 			}
+
 		}
 	}
 
@@ -1038,34 +870,25 @@ void flush_pending(struct task_struct* klmirqd_thread,
 			struct tasklet_struct *t = list;
 			list = list->next;
 
-			if(likely((t->owner == owner) || (owner == NULL)))
+			if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
 			{
-				if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
-				{
-					BUG();
-				}
+				BUG();
+			}
 
-				work_flushed |= LIT_TASKLET_LOW;
+			work_flushed |= LIT_TASKLET_LOW;
 
-				t->owner = NULL;
-				sched_trace_tasklet_end(owner, 1ul);
+			t->owner = NULL;
+//			sched_trace_tasklet_end(owner, 1ul);
 
-				if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
-				{
-					atomic_dec(&which->num_low_pending);
-					___tasklet_schedule(t);
-				}
-				else
-				{
-					TRACE("%s: dropped tasklet??\n", __FUNCTION__);
-					BUG();
-				}
+			if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+			{
+				atomic_dec(&which->num_low_pending);
+				___tasklet_schedule(t);
 			}
 			else
 			{
-				TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__);
-				// put back on queue
-				___litmus_tasklet_schedule(t, which, 0);
+				TRACE("%s: dropped tasklet??\n", __FUNCTION__);
+				BUG();
 			}
 		}
 	}
@@ -1083,21 +906,12 @@ void flush_pending(struct task_struct* klmirqd_thread,
 				list_first_entry(&which->worklist, struct work_struct, entry);
 			list_del_init(&work->entry);
 
-			if(likely((work->owner == owner) || (owner == NULL)))
-			{
-				work_flushed |= LIT_WORK;
-				atomic_dec(&which->num_work_pending);
+			work_flushed |= LIT_WORK;
+			atomic_dec(&which->num_work_pending);
 
-				work->owner = NULL;
-				sched_trace_work_end(owner, current, 1ul);
-				__schedule_work(work);
-			}
-			else
-			{
-				TRACE("%s: Could not flush a work object.\n", __FUNCTION__);
-				// put back on queue
-				___litmus_schedule_work(work, which, 0);
-			}
+			work->owner = NULL;
+//			sched_trace_work_end(owner, current, 1ul);
+			__schedule_work(work);
 		}
 	}
 
@@ -1106,22 +920,6 @@ void flush_pending(struct task_struct* klmirqd_thread,
 
 	mb(); /* commit changes to pending flags */
 
-	/* reset the scheduling priority */
-	if(work_flushed)
-	{
-		__reeval_prio(which);
-
-		/* Try to offload flushed tasklets to Linux's ksoftirqd. */
-		if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI))
-		{
-			wakeup_softirqd();
-		}
-	}
-	else
-	{
-		TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__);
-	}
-
 	raw_spin_unlock_irqrestore(&which->lock, flags);
 }
 
@@ -1161,39 +959,27 @@ static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
     raw_spin_unlock_irqrestore(&which->lock, flags);
 }
 
-int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
+
+int __litmus_tasklet_schedule(struct tasklet_struct *t, struct task_struct* klmirqd_thread)
 {
 	int ret = 0; /* assume failure */
-    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
-    {
-        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
-        BUG();
-    }
+	struct klmirqd_info* info;
 
-    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
-    {
-        TRACE("%s: No klmirqd_th%d!\n", __FUNCTION__, k_id);
-        BUG();
-    }
+	if (unlikely(!is_realtime(klmirqd_thread) ||
+		!tsk_rt(klmirqd_thread)->is_interrupt_thread ||
+		!tsk_rt(klmirqd_thread)->klmirqd_info)) {
+		TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid);
+		return ret;
+	}
 
-	if(likely(!klmirqds[k_id].terminating))
-	{
-		/* Can't accept tasklets while we're processing a workqueue
-		   because they're handled by the same thread. This case is
-		   very RARE.
+	info = tsk_rt(klmirqd_thread)->klmirqd_info;
 
-		   TODO: Use a separate thread for work objects!!!!!!
-         */
-		if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0))
-		{
-			ret = 1;
-			___litmus_tasklet_schedule(t, &klmirqds[k_id], 1);
-		}
-		else
-		{
-			TRACE("%s: rejected tasklet because of pending work.\n",
-						__FUNCTION__);
-		}
+	if (likely(!info->terminating)) {
+		ret = 1;
+		___litmus_tasklet_schedule(t, info, 1);
+	}
+	else {
+		TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid);
 	}
 	return(ret);
 }
@@ -1230,100 +1016,77 @@ static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
     raw_spin_unlock_irqrestore(&which->lock, flags);
 }
 
-int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
+int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, struct task_struct* klmirqd_thread)
 {
 	int ret = 0; /* assume failure */
-    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
-    {
-        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
-        BUG();
-    }
+	struct klmirqd_info* info;
 
-    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
-    {
-        TRACE("%s: No klmirqd_th%d!\n", __FUNCTION__, k_id);
-        BUG();
-    }
+	if (unlikely(!is_realtime(klmirqd_thread) ||
+		!tsk_rt(klmirqd_thread)->is_interrupt_thread ||
+		!tsk_rt(klmirqd_thread)->klmirqd_info)) {
+		TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid);
+		return ret;
+	}
 
-    if(unlikely(!klmirqd_is_ready()))
-    {
-        TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id);
-        BUG();
-    }
+	info = tsk_rt(klmirqd_thread)->klmirqd_info;
 
-	if(likely(!klmirqds[k_id].terminating))
-	{
-		if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0))
-		{
-			ret = 1;
-			___litmus_tasklet_hi_schedule(t, &klmirqds[k_id], 1);
-		}
-		else
-		{
-			TRACE("%s: rejected tasklet because of pending work.\n",
-						__FUNCTION__);
-		}
+	if (likely(!info->terminating)) {
+		ret = 1;
+		___litmus_tasklet_hi_schedule(t, info, 1);
+	}
+	else {
+		TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid);
 	}
+
 	return(ret);
 }
 
 EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
 
 
-int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
+int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, struct task_struct* klmirqd_thread)
 {
 	int ret = 0; /* assume failure */
 	u32 old_pending;
+	struct klmirqd_info* info;
 
 	BUG_ON(!irqs_disabled());
 
-    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
-    {
-        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
-        BUG();
-    }
+	if (unlikely(!is_realtime(klmirqd_thread) ||
+				 !tsk_rt(klmirqd_thread)->is_interrupt_thread ||
+				 !tsk_rt(klmirqd_thread)->klmirqd_info)) {
+		TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid);
+		return ret;
+	}
 
-    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
-    {
-        TRACE("%s: No klmirqd_th%u!\n", __FUNCTION__, k_id);
-        BUG();
-    }
+	info = tsk_rt(klmirqd_thread)->klmirqd_info;
 
-    if(unlikely(!klmirqd_is_ready()))
-    {
-        TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id);
-        BUG();
-    }
+	if (likely(!info->terminating)) {
 
-	if(likely(!klmirqds[k_id].terminating))
-	{
-        raw_spin_lock(&klmirqds[k_id].lock);
+        raw_spin_lock(&info->lock);
 
-		if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0))
-		{
-			ret = 1;  // success!
+		ret = 1;  // success!
 
-			t->next = klmirqds[k_id].pending_tasklets_hi.head;
-            klmirqds[k_id].pending_tasklets_hi.head = t;
+		t->next = info->pending_tasklets_hi.head;
+        info->pending_tasklets_hi.head = t;
 
-			old_pending = klmirqds[k_id].pending;
-			klmirqds[k_id].pending |= LIT_TASKLET_HI;
+		old_pending = info->pending;
+		info->pending |= LIT_TASKLET_HI;
 
-			atomic_inc(&klmirqds[k_id].num_hi_pending);
+		atomic_inc(&info->num_hi_pending);
 
-			mb();
+		mb();
 
-			if(!old_pending)
-                wakeup_litirqd_locked(&klmirqds[k_id]); /* wake up the klmirqd */
-		}
-		else
-		{
-			TRACE("%s: rejected tasklet because of pending work.\n",
-					__FUNCTION__);
+		if(!old_pending) {
+			wakeup_litirqd_locked(info); /* wake up the klmirqd */
 		}
 
-        raw_spin_unlock(&klmirqds[k_id].lock);
+		raw_spin_unlock(&info->lock);
 	}
+	else {
+		TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid);
+	}
+
 	return(ret);
 }
 
@@ -1358,225 +1121,30 @@ static void ___litmus_schedule_work(struct work_struct *w,
 	raw_spin_unlock_irqrestore(&which->lock, flags);
 }
 
-int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
+int __litmus_schedule_work(struct work_struct *w, struct task_struct* klmirqd_thread)
 {
 	int ret = 1; /* assume success */
-	if(unlikely(w->owner == NULL) || !is_realtime(w->owner))
-	{
-		TRACE("%s: No owner associated with this work object!\n", __FUNCTION__);
-		BUG();
-	}
-
-	if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
-	{
-		TRACE("%s: No klmirqd_th%u!\n", k_id);
-		BUG();
-	}
-
-    if(unlikely(!klmirqd_is_ready()))
-    {
-        TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id);
-        BUG();
-    }
-
-	if(likely(!klmirqds[k_id].terminating))
-		___litmus_schedule_work(w, &klmirqds[k_id], 1);
-	else
-		ret = 0;
-	return(ret);
-}
-EXPORT_SYMBOL(__litmus_schedule_work);
-
-
-static int set_klmirqd_sem_status(unsigned long stat)
-{
-	TRACE_CUR("SETTING STATUS FROM %d TO %d\n",
-					atomic_read(&tsk_rt(current)->klmirqd_sem_stat),
-					stat);
-	atomic_set(&tsk_rt(current)->klmirqd_sem_stat, stat);
-	//mb();
-
-	return(0);
-}
-
-static int set_klmirqd_sem_status_if_not_held(unsigned long stat)
-{
-	if(atomic_read(&tsk_rt(current)->klmirqd_sem_stat) != HELD)
-	{
-		return(set_klmirqd_sem_status(stat));
-	}
-	return(-1);
-}
-
-
-void __down_and_reset_and_set_stat(struct task_struct* t,
-					   enum klmirqd_sem_status to_reset,
-					   enum klmirqd_sem_status to_set,
-					   struct mutex* sem)
-{
-#if 0
-	struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem);
-	struct task_struct* task = container_of(param, struct task_struct, rt_param);
-
-	TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
-					__FUNCTION__, task->comm, task->pid);
-#endif
-
-	mutex_lock_sfx(sem,
-				   set_klmirqd_sem_status_if_not_held, to_reset,
-				   set_klmirqd_sem_status, to_set);
-#if 0
-	TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
-					__FUNCTION__, task->comm, task->pid);
-#endif
-}
-
-void down_and_set_stat(struct task_struct* t,
-					   enum klmirqd_sem_status to_set,
-					   struct mutex* sem)
-{
-#if 0
-	struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem);
-	struct task_struct* task = container_of(param, struct task_struct, rt_param);
-
-	TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
-					__FUNCTION__, task->comm, task->pid);
-#endif
-
-	mutex_lock_sfx(sem,
-				   NULL, 0,
-				   set_klmirqd_sem_status, to_set);
-
-#if 0
-	TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
-					__FUNCTION__, task->comm, task->pid);
-#endif
-}
-
-
-void up_and_set_stat(struct task_struct* t,
-					 enum klmirqd_sem_status to_set,
-					 struct mutex* sem)
-{
-#if 0
-	struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem);
-	struct task_struct* task = container_of(param, struct task_struct, rt_param);
-
-	TRACE_CUR("%s: entered.  Unlocking semaphore of %s/%d\n",
-					__FUNCTION__,
-					task->comm, task->pid);
-#endif
-
-	mutex_unlock_sfx(sem, NULL, 0,
-					 set_klmirqd_sem_status, to_set);
-
-#if 0
-	TRACE_CUR("%s: exiting.  Unlocked semaphore of %s/%d\n",
-					__FUNCTION__,
-					task->comm, task->pid);
-#endif
-}
-
-
-
-void release_klmirqd_lock(struct task_struct* t)
-{
-	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klmirqd_sem_stat) == HELD))
-	{
-		struct mutex* sem;
-		struct task_struct* owner = t;
-
-		if(t->state == TASK_RUNNING)
-		{
-			TRACE_TASK(t, "NOT giving up klmirqd_sem because we're not blocked!\n");
-			return;
-		}
-
-		if(likely(!tsk_rt(t)->is_proxy_thread))
-		{
-			sem = &tsk_rt(t)->klmirqd_sem;
-		}
-		else
-		{
-			unsigned int k_id = klmirqd_id(t);
-			owner = klmirqds[k_id].current_owner;
-
-			BUG_ON(t != klmirqds[k_id].klmirqd);
-
-			if(likely(owner))
-			{
-				sem = &tsk_rt(owner)->klmirqd_sem;
-			}
-			else
-			{
-				BUG();
+	struct klmirqd_info* info;
 
-				// We had the rug pulled out from under us.  Abort attempt
-				// to reacquire the lock since our client no longer needs us.
-				TRACE_CUR("HUH?!  How did this happen?\n");
-				atomic_set(&tsk_rt(t)->klmirqd_sem_stat, NOT_HELD);
-				return;
-			}
-		}
-
-		//TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid);
-		up_and_set_stat(t, NEED_TO_REACQUIRE, sem);
-		//TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid);
-	}
-	/*
-	else if(is_realtime(t))
-	{
-		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klmirqd_sem_stat);
+	if (unlikely(!is_realtime(klmirqd_thread) ||
+				 !tsk_rt(klmirqd_thread)->is_interrupt_thread ||
+				 !tsk_rt(klmirqd_thread)->klmirqd_info)) {
+		TRACE("%s: %s/%d can't handle work items\n", klmirqd_thread->comm, klmirqd_thread->pid);
+		return ret;
 	}
-	*/
-}
-
-int reacquire_klmirqd_lock(struct task_struct* t)
-{
-	int ret = 0;
 
-	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klmirqd_sem_stat) == NEED_TO_REACQUIRE))
-	{
-		struct mutex* sem;
-		struct task_struct* owner = t;
-
-		if(likely(!tsk_rt(t)->is_proxy_thread))
-		{
-			sem = &tsk_rt(t)->klmirqd_sem;
-		}
-		else
-		{
-			unsigned int k_id = klmirqd_id(t);
-			//struct task_struct* owner = klmirqds[k_id].current_owner;
-			owner = klmirqds[k_id].current_owner;
+	info = tsk_rt(klmirqd_thread)->klmirqd_info;
 
-			BUG_ON(t != klmirqds[k_id].klmirqd);
-
-			if(likely(owner))
-			{
-				sem = &tsk_rt(owner)->klmirqd_sem;
-			}
-			else
-			{
-				// We had the rug pulled out from under us.  Abort attempt
-				// to reacquire the lock since our client no longer needs us.
-				TRACE_CUR("No longer needs to reacquire klmirqd_sem!\n");
-				atomic_set(&tsk_rt(t)->klmirqd_sem_stat, NOT_HELD);
-				return(0);
-			}
-		}
 
-		//TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid);
-		__down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem);
-		//TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid);
+	if (likely(!info->terminating)) {
+		___litmus_schedule_work(w, info, 1);
 	}
-	/*
-	else if(is_realtime(t))
-	{
-		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klmirqd_sem_stat);
+	else {
+		TRACE("%s: Work rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid);
+		ret = 0;
 	}
-	*/
 
 	return(ret);
 }
+EXPORT_SYMBOL(__litmus_schedule_work);
 
diff --git a/litmus/locking.c b/litmus/locking.c
index 22f46df4308a..7af1dd69a079 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -543,32 +543,54 @@ out:
 
 void suspend_for_lock(void)
 {
-#ifdef CONFIG_REALTIME_AUX_TASKS
-#if 0
-	unsigned int restore = 0;
+#if defined(CONFIG_REALTIME_AUX_TASKS) || defined(CONFIG_LITMUS_NVIDIA)
 	struct task_struct *t = current;
-	unsigned int hide;
-
-	if (tsk_rt(t)->has_aux_tasks) {
-		/* hide from aux tasks so they can't inherit our priority when we block
-		 * for a litmus lock. inheritance is already going to a litmus lock
-		 * holder. */
-		hide = tsk_rt(t)->hide_from_aux_tasks;
-		restore = 1;
-		tsk_rt(t)->hide_from_aux_tasks = 1;
-	}
 #endif
+
+#ifdef CONFIG_REALTIME_AUX_TASKS
+	unsigned int aux_restore = 0;
+	unsigned int aux_hide;
+#endif
+
+#ifdef CONFIG_LITMUS_NVIDIA
+	unsigned int gpu_restore = 0;
+	unsigned int gpu_hide;
+#endif
+
+
+//#ifdef CONFIG_REALTIME_AUX_TASKS
+//	if (tsk_rt(t)->has_aux_tasks) {
+//		/* hide from aux tasks so they can't inherit our priority when we block
+//		 * for a litmus lock. inheritance is already going to a litmus lock
+//		 * holder. */
+//		aux_hide = tsk_rt(t)->hide_from_aux_tasks;
+//		aux_restore = 1;
+//		tsk_rt(t)->hide_from_aux_tasks = 1;
+//	}
+//#endif
+
+#ifdef CONFIG_LITMUS_NVIDIA
+	if (tsk_rt(t)->held_gpus) {
+		gpu_hide = tsk_rt(t)->hide_from_gpu;
+		gpu_restore = 1;
+		tsk_rt(t)->hide_from_gpu = 1;
+	}
 #endif
 
 	schedule();
 
-#ifdef CONFIG_REALTIME_AUX_TASKS
-#if 0
-	if (restore) {
+#ifdef CONFIG_LITMUS_NVIDIA
+	if (gpu_restore) {
 		/* restore our state */
-		tsk_rt(t)->hide_from_aux_tasks = hide;
+		tsk_rt(t)->hide_from_gpu = gpu_hide;
 	}
 #endif
+
+#ifdef CONFIG_REALTIME_AUX_TASKS
+	if (aux_restore) {
+		/* restore our state */
+		tsk_rt(t)->hide_from_aux_tasks = aux_hide;
+	}
 #endif
 }
 
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 22586cde8255..b29f4d3f0dac 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -10,6 +10,10 @@
 
 #include <litmus/binheap.h>
 
+#ifdef CONFIG_LITMUS_SOFTIRQD
+#include <litmus/litmus_softirq.h>
+#endif
+
 typedef unsigned char      NvV8;  /* "void": enumerated or multiple fields   */
 typedef unsigned short     NvV16; /* "void": enumerated or multiple fields   */
 typedef unsigned char      NvU8;  /* 0 to 255                                */
@@ -296,9 +300,14 @@ static struct notifier_block nvidia_going = {
 };
 #endif
 
+
+
+static int init_nv_device_reg(void);
+static int shutdown_nv_device_reg(void);
+
+
 int init_nvidia_info(void)
 {
-#if 1
 	mutex_lock(&module_mutex);
 	nvidia_mod = find_module("nvidia");
 	mutex_unlock(&module_mutex);
@@ -315,13 +324,14 @@ int init_nvidia_info(void)
 		TRACE("%s : Could not find NVIDIA module!  Loaded?\n", __FUNCTION__);
 		return(-1);
 	}
-#endif
 }
 
 void shutdown_nvidia_info(void)
 {
 	nvidia_mod = NULL;
 	mb();
+
+	shutdown_nv_device_reg();
 }
 
 /* works with pointers to static data inside the module too. */
@@ -351,20 +361,6 @@ u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
 	BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
 
 	return(linuxstate->device_num);
-
-	//int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
-
-#if 0
-	// offset determined though observed behavior of the NV driver.
-	//const int DEVICE_NUM_OFFSET = 0x480;  // CUDA 4.0 RC1
-	//const int DEVICE_NUM_OFFSET = 0x510;  // CUDA 4.0 RC2
-
-	void* state = (void*)(t->data);
-	void* device_num_ptr = state + DEVICE_NUM_OFFSET;
-
-	//dump_nvidia_info(t);
-	return(*((u32*)device_num_ptr));
-#endif
 }
 
 u32 get_work_nv_device_num(const struct work_struct *t)
@@ -377,203 +373,452 @@ u32 get_work_nv_device_num(const struct work_struct *t)
 }
 
 
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+
+
 typedef struct {
-	raw_spinlock_t	lock;
-	int	nr_owners;
-	struct task_struct* max_prio_owner;
-	struct task_struct*	owners[NV_MAX_SIMULT_USERS];
+	raw_spinlock_t	lock;  /* not needed if GPU not shared between scheudling domains */
+	struct binheap	owners;
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	klmirqd_callback_t callback;
+	struct task_struct* thread;
+	int ready:1;  /* todo: make threads check for the ready flag */
+#endif
 }nv_device_registry_t;
 
+
 static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
 
-int init_nv_device_reg(void)
+
+
+
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+static int nvidia_klmirqd_cb(void *arg)
 {
-	int i;
+	unsigned long flags;
+	int reg_device_id = (int)(long long)(arg);
+	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
 
-	memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
+	TRACE("nv klmirqd callback for GPU %d\n", reg_device_id);
 
-	for(i = 0; i < NV_DEVICE_NUM; ++i)
-	{
-		raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
-	}
+	raw_spin_lock_irqsave(&reg->lock, flags);
+	reg->thread = current;
+	reg->ready = 1;
+	raw_spin_unlock_irqrestore(&reg->lock, flags);
 
-	return(1);
+	return 0;
 }
+#endif
 
-/* use to get nv_device_id by given owner.
- (if return -1, can't get the assocaite device id)*/
-/*
-int get_nv_device_id(struct task_struct* owner)
+
+static int gpu_owner_max_priority_order(struct binheap_node *a,
+											struct binheap_node *b)
 {
-	int i;
-	if(!owner)
-	{
-		return(-1);
-	}
-	for(i = 0; i < NV_DEVICE_NUM; ++i)
-	{
-		if(NV_DEVICE_REG[i].device_owner == owner)
-			return(i);
-	}
-	return(-1);
+	struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, gpu_owner_node),
+										   struct task_struct, rt_param);
+	struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, gpu_owner_node),
+										   struct task_struct, rt_param);
+
+	BUG_ON(!d_a);
+	BUG_ON(!d_b);
+
+	return litmus->compare(d_a, d_b);
 }
-*/
 
-static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_struct *skip) {
+static int init_nv_device_reg(void)
+{
 	int i;
-	struct task_struct *found = NULL;
-	for(i = 0; i < reg->nr_owners; ++i) {
-		if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
-			found = reg->owners[i];
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	if (!klmirqd_is_ready()) {
+		TRACE("klmirqd is not ready!\n");
+		return 0;
+	}
+#endif
+
+	memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
+	mb();
+
+
+	for(i = 0; i < NV_DEVICE_NUM; ++i) {
+		raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
+		INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order);
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+		// TODO: Make thread spawning this a litmus plugin call.
+		NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb;
+		NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i);
+		mb();
+
+		if(launch_klmirqd_thread(0, &NV_DEVICE_REG[i].callback) != 0) {
+			TRACE("Failed to create klmirqd thread for GPU %d\n", i);
 		}
+#endif
 	}
-	return found;
+
+	return(1);
 }
 
-#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
-void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
+
+/* The following code is full of nasty race conditions... */
+/* spawning of klimirqd threads can race with init_nv_device_reg()!!!! */
+static int shutdown_nv_device_reg(void)
 {
-	unsigned long flags;
-	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
+	TRACE("Shutting down nv device registration.\n");
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	{
+		int i;
+		nv_device_registry_t *reg;
 
-	if(reg->max_prio_owner != t) {
+		for (i = 0; i < NV_DEVICE_NUM; ++i) {
 
-		raw_spin_lock_irqsave(&reg->lock, flags);
+			TRACE("Shutting down GPU %d.\n", i);
 
-		if(reg->max_prio_owner != t) {
-			if(litmus->compare(t, reg->max_prio_owner)) {
-				litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
-				reg->max_prio_owner = t;
+			reg = &NV_DEVICE_REG[i];
+
+			if (reg->thread && reg->ready) {
+				kill_klmirqd_thread(reg->thread);
+
+				/* assume that all goes according to plan... */
+				reg->thread = NULL;
+				reg->ready = 0;
 			}
-		}
 
-		raw_spin_unlock_irqrestore(&reg->lock, flags);
+			while (!binheap_empty(&reg->owners)) {
+				binheap_delete_root(&reg->owners, struct rt_param, gpu_owner_node);
+			}
+		}
 	}
+#endif
+
+	return(1);
 }
 
 
-void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
+/* use to get the owner of nv_device_id. */
+struct task_struct* get_nv_max_device_owner(u32 target_device_id)
 {
-	unsigned long flags;
-	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
+	struct task_struct *owner = NULL;
+	nv_device_registry_t *reg;
 
-	if(reg->max_prio_owner == t) {
+	BUG_ON(target_device_id >= NV_DEVICE_NUM);
 
-		raw_spin_lock_irqsave(&reg->lock, flags);
+	reg = &NV_DEVICE_REG[target_device_id];
 
-		if(reg->max_prio_owner == t) {
-			reg->max_prio_owner = find_hp_owner(reg, NULL);
-			if(reg->max_prio_owner != t) {
-				litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
-			}
-		}
+	if (!binheap_empty(&reg->owners)) {
+		struct task_struct *hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+											  struct task_struct, rt_param);
+		TRACE_CUR("hp: %s/%d\n", hp->comm, hp->pid);
+	}
 
-		raw_spin_unlock_irqrestore(&reg->lock, flags);
+	return(owner);
+}
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+struct task_struct* get_nv_klmirqd_thread(u32 target_device_id)
+{
+	struct task_struct *klmirqd = NULL;
+	nv_device_registry_t *reg;
+
+	BUG_ON(target_device_id >= NV_DEVICE_NUM);
+
+	reg = &NV_DEVICE_REG[target_device_id];
+
+	if(likely(reg->ready)) {
+		klmirqd = reg->thread;
 	}
+
+	return klmirqd;
 }
 #endif
 
-static int __reg_nv_device(int reg_device_id, struct task_struct *t)
+
+
+
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+static int gpu_klmirqd_increase_priority(struct task_struct *klmirqd, struct task_struct *hp)
 {
-	int ret = 0;
-	int i;
-	struct task_struct *old_max = NULL;
-	unsigned long flags;
-	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
+	int retval = 0;
 
-    if(test_bit(reg_device_id, &tsk_rt(t)->held_gpus)) {
-		// TODO: check if taks is already registered.
-		return ret;  // assume already registered.
-	}
+	TRACE_CUR("Increasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid);
 
+	/* the klmirqd thread should never attempt to hold a litmus-level real-time
+	 * so nested support is not required */
+	retval = litmus->__increase_prio(klmirqd, hp);
 
-	raw_spin_lock_irqsave(&reg->lock, flags);
+	return retval;
+}
+
+static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct task_struct *hp)
+{
+	int retval = 0;
 
-	if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
-		TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
-		for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
-			if(reg->owners[i] == NULL) {
-				reg->owners[i] = t;
+	TRACE_CUR("Decreasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid);
 
-				//if(edf_higher_prio(t, reg->max_prio_owner)) {
-				if(litmus->compare(t, reg->max_prio_owner)) {
-					old_max = reg->max_prio_owner;
-					reg->max_prio_owner = t;
+	/* the klmirqd thread should never attempt to hold a litmus-level real-time
+	 * so nested support is not required */
+	retval = litmus->__decrease_prio(klmirqd, hp);
 
-#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
-					litmus->change_prio_pai_tasklet(old_max, t);
+	return retval;
+}
 #endif
-				}
+
+
+
+
+/* call when an aux_owner becomes real-time */
+long enable_gpu_owner(struct task_struct *t)
+{
+	long retval = 0;
+//	unsigned long flags;
+	int gpu;
+	nv_device_registry_t *reg;
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-				down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem);
+	struct task_struct *hp;
 #endif
-				++(reg->nr_owners);
 
-				break;
-			}
-		}
+	if (!tsk_rt(t)->held_gpus) {
+		TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid);
+		return -1;
 	}
-	else
-	{
-		TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
-		//ret = -EBUSY;
+
+	BUG_ON(!is_realtime(t));
+
+	gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
+
+	if (binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
+		TRACE_CUR("task %s/%d is already active on GPU %d\n", t->comm, t->pid, gpu);
+		goto out;
 	}
 
-	raw_spin_unlock_irqrestore(&reg->lock, flags);
+	/* update the registration (and maybe klmirqd) */
+	reg = &NV_DEVICE_REG[gpu];
 
-	__set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
+//	raw_spin_lock_irqsave(&reg->lock, flags);
 
-	return(ret);
+	binheap_add(&tsk_rt(t)->gpu_owner_node, &reg->owners,
+				struct rt_param, gpu_owner_node);
+
+
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+						  struct task_struct, rt_param);
+
+	if (hp == t) {
+		/* we're the new hp */
+		TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu);
+
+		retval = gpu_klmirqd_increase_priority(reg->thread, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
+	}
+#endif
+
+//	raw_spin_unlock_irqsave(&reg->lock, flags);
+
+out:
+	return retval;
 }
 
-static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
+/* call when an aux_owner exits real-time */
+long disable_gpu_owner(struct task_struct *t)
 {
-	int ret = 0;
-	int i;
-	unsigned long flags;
-	nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
+	long retval = 0;
+//	unsigned long flags;
+	int gpu;
+	nv_device_registry_t *reg;
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-    struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id);
+	struct task_struct *hp;
+	struct task_struct *new_hp = NULL;
 #endif
 
-	if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
-		return ret;
+	if (!tsk_rt(t)->held_gpus) {
+		TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid);
+		return -1;
 	}
 
-	raw_spin_lock_irqsave(&reg->lock, flags);
+	BUG_ON(!is_realtime(t));
 
-	TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
+	gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
+
+	if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
+		TRACE_CUR("task %s/%d is not active on GPU %d\n", t->comm, t->pid, gpu);
+		goto out;
+	}
+
+	TRACE_CUR("task %s/%d exiting from GPU %d.\n", t->comm, t->pid, gpu);
+
+
+	reg = &NV_DEVICE_REG[gpu];
+
+//	raw_spin_lock_irqsave(&reg->lock, flags);
 
-	for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
-		if(reg->owners[i] == t) {
-#ifdef CONFIG_LITMUS_SOFTIRQD
-			flush_pending(klmirqd_th, t);
-#endif
-			if(reg->max_prio_owner == t) {
-				reg->max_prio_owner = find_hp_owner(reg, t);
-#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
-				litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
-#endif
-			}
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-			up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem);
+	hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+					  struct task_struct, rt_param);
+
+	binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
+
+
+	if (!binheap_empty(&reg->owners)) {
+		new_hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+							  struct task_struct, rt_param);
+	}
+
+	if (hp == t && new_hp != t) {
+		struct task_struct *to_inh = NULL;
+
+		TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu);
+
+		if (new_hp) {
+			to_inh = (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp;
+		}
+
+		retval = gpu_klmirqd_decrease_priority(reg->thread, to_inh);
+	}
+#else
+	binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
 #endif
 
-			reg->owners[i] = NULL;
-			--(reg->nr_owners);
+//	raw_spin_unlock_irqsave(&reg->lock, flags);
+
+
+out:
+	return retval;
+}
+
+
+
+
+
+
+
+
+
+
+int gpu_owner_increase_priority(struct task_struct *t)
+{
+	int retval = 0;
+	int gpu;
+	nv_device_registry_t *reg;
+
+	struct task_struct *hp = NULL;
+	struct task_struct *hp_eff = NULL;
+
+	BUG_ON(!is_realtime(t));
+	BUG_ON(!tsk_rt(t)->held_gpus);
+
+	gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
+
+	if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
+		WARN_ON(!is_running(t));
+		TRACE_CUR("gpu klmirqd may not inherit from %s/%d on GPU %d\n",
+				  t->comm, t->pid, gpu);
+		goto out;
+	}
+
+
+
+
+	TRACE_CUR("task %s/%d on GPU %d increasing priority.\n", t->comm, t->pid, gpu);
+	reg = &NV_DEVICE_REG[gpu];
+
+	hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+					  struct task_struct, rt_param);
+	hp_eff = effective_priority(hp);
+
+	if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */
+		binheap_decrease(&tsk_rt(t)->gpu_owner_node, &reg->owners);
+	}
+
+	hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+					  struct task_struct, rt_param);
+
+	if (effective_priority(hp) != hp_eff) { /* the eff. prio. of hp has changed */
+		hp_eff = effective_priority(hp);
+		TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu);
+
+		retval = gpu_klmirqd_increase_priority(reg->thread, hp_eff);
+	}
+
+out:
+	return retval;
+}
+
+
+int gpu_owner_decrease_priority(struct task_struct *t)
+{
+	int retval = 0;
+	int gpu;
+	nv_device_registry_t *reg;
+
+	struct task_struct *hp = NULL;
+	struct task_struct *hp_eff = NULL;
 
-			break;
+	BUG_ON(!is_realtime(t));
+	BUG_ON(!tsk_rt(t)->held_gpus);
+
+	gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
+
+	if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) {
+		WARN_ON(!is_running(t));
+		TRACE_CUR("aux tasks may not inherit from %s/%d on GPU %d\n",
+				  t->comm, t->pid, gpu);
+		goto out;
+	}
+
+	TRACE_CUR("task %s/%d on GPU %d decresing priority.\n", t->comm, t->pid, gpu);
+	reg = &NV_DEVICE_REG[gpu];
+
+	hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+					  struct task_struct, rt_param);
+	hp_eff = effective_priority(hp);
+	binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
+	binheap_add(&tsk_rt(t)->gpu_owner_node, &reg->owners,
+				struct rt_param, gpu_owner_node);
+
+	if (hp == t) { /* t was originally the hp */
+		struct task_struct *new_hp =
+			container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+					 struct task_struct, rt_param);
+		if (effective_priority(new_hp) != hp_eff) { /* eff prio. of hp has changed */
+			hp_eff = effective_priority(new_hp);
+			TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu);
+			retval = gpu_klmirqd_decrease_priority(reg->thread, hp_eff);
 		}
 	}
 
-	raw_spin_unlock_irqrestore(&reg->lock, flags);
+out:
+	return retval;
+}
+
+
+
+
+
+
+
+
+
+static int __reg_nv_device(int reg_device_id, struct task_struct *t)
+{
+	__set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
+
+	return(0);
+}
 
+static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
+{
 	__clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
 
-	return(ret);
+	return(0);
 }
 
 
@@ -596,55 +841,213 @@ int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
 	return(ret);
 }
 
-/* use to get the owner of nv_device_id. */
-struct task_struct* get_nv_max_device_owner(u32 target_device_id)
-{
-	struct task_struct *owner = NULL;
-	BUG_ON(target_device_id >= NV_DEVICE_NUM);
-	owner = NV_DEVICE_REG[target_device_id].max_prio_owner;
-	return(owner);
-}
 
-void lock_nv_registry(u32 target_device_id, unsigned long* flags)
-{
-	BUG_ON(target_device_id >= NV_DEVICE_NUM);
 
-	if(in_interrupt())
-		TRACE("Locking registry for %d.\n", target_device_id);
-	else
-		TRACE_CUR("Locking registry for %d.\n", target_device_id);
 
-	raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
-}
 
-void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
-{
-	BUG_ON(target_device_id >= NV_DEVICE_NUM);
 
-	if(in_interrupt())
-		TRACE("Unlocking registry for %d.\n", target_device_id);
-	else
-		TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
 
-	raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
-}
 
 
-//void increment_nv_int_count(u32 device)
+
+
+
+
+
+
+
+
+
+
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+//void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
+//{
+//	unsigned long flags;
+//	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
+//
+//
+//
+//	if(reg->max_prio_owner != t) {
+//
+//		raw_spin_lock_irqsave(&reg->lock, flags);
+//
+//		if(reg->max_prio_owner != t) {
+//			if(litmus->compare(t, reg->max_prio_owner)) {
+//				litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
+//				reg->max_prio_owner = t;
+//			}
+//		}
+//
+//		raw_spin_unlock_irqrestore(&reg->lock, flags);
+//	}
+//}
+//
+//
+//void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
 //{
 //	unsigned long flags;
-//	struct task_struct* owner;
+//	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
 //
-//	lock_nv_registry(device, &flags);
+//	if(reg->max_prio_owner == t) {
 //
-//	owner = NV_DEVICE_REG[device].device_owner;
-//	if(owner)
+//		raw_spin_lock_irqsave(&reg->lock, flags);
+//
+//		if(reg->max_prio_owner == t) {
+//			reg->max_prio_owner = find_hp_owner(reg, NULL);
+//			if(reg->max_prio_owner != t) {
+//				litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
+//			}
+//		}
+//
+//		raw_spin_unlock_irqrestore(&reg->lock, flags);
+//	}
+//}
+#endif
+
+
+
+
+
+//static int __reg_nv_device(int reg_device_id, struct task_struct *t)
+//{
+//	int ret = 0;
+//	int i;
+//	struct task_struct *old_max = NULL;
+//
+//
+//	raw_spin_lock_irqsave(&reg->lock, flags);
+//
+//	if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
+//		TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
+//		for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
+//			if(reg->owners[i] == NULL) {
+//				reg->owners[i] = t;
+//
+//				//if(edf_higher_prio(t, reg->max_prio_owner)) {
+//				if(litmus->compare(t, reg->max_prio_owner)) {
+//					old_max = reg->max_prio_owner;
+//					reg->max_prio_owner = t;
+//
+//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+//					litmus->change_prio_pai_tasklet(old_max, t);
+//#endif
+//				}
+//
+//#ifdef CONFIG_LITMUS_SOFTIRQD
+//				down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem);
+//#endif
+//				++(reg->nr_owners);
+//
+//				break;
+//			}
+//		}
+//	}
+//	else
 //	{
-//		atomic_inc(&tsk_rt(owner)->nv_int_count);
+//		TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
+//		//ret = -EBUSY;
 //	}
 //
-//	unlock_nv_registry(device, &flags);
+//	raw_spin_unlock_irqrestore(&reg->lock, flags);
+//
+//	__set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
+//
+//	return(ret);
+//}
+//
+//static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
+//{
+//	int ret = 0;
+//	int i;
+//	unsigned long flags;
+//	nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
+//
+//#ifdef CONFIG_LITMUS_SOFTIRQD
+//    struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id);
+//#endif
+//
+//	if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
+//		return ret;
+//	}
+//
+//	raw_spin_lock_irqsave(&reg->lock, flags);
+//
+//	TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
+//
+//	for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
+//		if(reg->owners[i] == t) {
+//#ifdef CONFIG_LITMUS_SOFTIRQD
+//			flush_pending(klmirqd_th, t);
+//#endif
+//			if(reg->max_prio_owner == t) {
+//				reg->max_prio_owner = find_hp_owner(reg, t);
+//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+//				litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
+//#endif
+//			}
+//
+//#ifdef CONFIG_LITMUS_SOFTIRQD
+//			up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem);
+//#endif
+//
+//			reg->owners[i] = NULL;
+//			--(reg->nr_owners);
+//
+//			break;
+//		}
+//	}
+//
+//	raw_spin_unlock_irqrestore(&reg->lock, flags);
+//
+//	__clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
+//
+//	return(ret);
+//}
+//
+//
+//int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
+//{
+//	int ret;
+//
+//	if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
+//	{
+//		if(reg_action)
+//			ret = __reg_nv_device(reg_device_id, t);
+//		else
+//			ret = __clear_reg_nv_device(reg_device_id, t);
+//	}
+//	else
+//	{
+//		ret = -ENODEV;
+//	}
+//
+//	return(ret);
+//}
+
+
+
+//void lock_nv_registry(u32 target_device_id, unsigned long* flags)
+//{
+//	BUG_ON(target_device_id >= NV_DEVICE_NUM);
+//
+//	if(in_interrupt())
+//		TRACE("Locking registry for %d.\n", target_device_id);
+//	else
+//		TRACE_CUR("Locking registry for %d.\n", target_device_id);
+//
+//	raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
+//}
+//
+//void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
+//{
+//	BUG_ON(target_device_id >= NV_DEVICE_NUM);
+//
+//	if(in_interrupt())
+//		TRACE("Unlocking registry for %d.\n", target_device_id);
+//	else
+//		TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
+//
+//	raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
 //}
-//EXPORT_SYMBOL(increment_nv_int_count);
 
 
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 44c8336c5061..84aafca78cde 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -44,6 +44,7 @@
 
 #include <litmus/bheap.h>
 #include <litmus/binheap.h>
+#include <litmus/trace.h>
 
 #ifdef CONFIG_LITMUS_LOCKING
 #include <litmus/kfmlp_lock.h>
@@ -75,7 +76,6 @@
 
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 #include <linux/interrupt.h>
-#include <litmus/trace.h>
 #endif
 
 #ifdef CONFIG_LITMUS_NVIDIA
@@ -118,14 +118,6 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
 #define test_will_schedule(cpu) \
 	(atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
 
-#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
-struct tasklet_head
-{
-	struct tasklet_struct *head;
-	struct tasklet_struct **tail;
-};
-#endif
-
 /*
  * In C-EDF there is a cedf domain _per_ cluster
  * The number of clusters is dynamically determined accordingly to the
@@ -1038,6 +1030,13 @@ static void cedf_task_wake_up(struct task_struct *task)
 	}
 #endif
 
+#ifdef CONFIG_LITMUS_NVIDIA
+	if (tsk_rt(task)->held_gpus && !tsk_rt(task)->hide_from_gpu) {
+		TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", task->comm, task->pid);
+		disable_gpu_owner(task);
+	}
+#endif
+
 	cedf_job_arrival(task);
 	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
 }
@@ -1064,6 +1063,14 @@ static void cedf_task_block(struct task_struct *t)
 	}
 #endif
 
+#ifdef CONFIG_LITMUS_NVIDIA
+	if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) {
+
+		TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
+		enable_gpu_owner(t);
+	}
+#endif
+
 	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
 
 	BUG_ON(!is_realtime(t));
@@ -1092,6 +1099,13 @@ static void cedf_task_exit(struct task_struct * t)
 	}
 #endif
 
+#ifdef CONFIG_LITMUS_NVIDIA
+	/* make sure we clean up on our way out */
+	if(tsk_rt(t)->held_gpus) {
+		disable_gpu_owner(t);
+	}
+#endif
+
 	unlink(t);
 	if (tsk_rt(t)->scheduled_on != NO_CPU) {
 		cpu_entry_t *cpu;
@@ -1208,6 +1222,13 @@ static int __increase_priority_inheritance(struct task_struct* t,
 				aux_task_owner_increase_priority(t);
 			}
 #endif
+
+#ifdef CONFIG_LITMUS_NVIDIA
+			/* propagate to gpu klmirqd */
+			if (tsk_rt(t)->held_gpus) {
+				gpu_owner_increase_priority(t);
+			}
+#endif
 		}
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	}
@@ -1237,16 +1258,6 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str
 
 	__increase_priority_inheritance(t, prio_inh);
 
-#ifdef CONFIG_LITMUS_SOFTIRQD
-	if(tsk_rt(t)->cur_klmirqd != NULL)
-	{
-		TRACE_TASK(t, "%s/%d inherits a new priority!\n",
-				   tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid);
-
-		__increase_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh);
-	}
-#endif
-
 	raw_spin_unlock(&cluster->cluster_lock);
 
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
@@ -1320,6 +1331,13 @@ static int __decrease_priority_inheritance(struct task_struct* t,
 		}
 #endif
 
+#ifdef CONFIG_LITMUS_NVIDIA
+		/* propagate to gpu */
+		if (tsk_rt(t)->held_gpus) {
+			gpu_owner_decrease_priority(t);
+		}
+#endif
+
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	}
 	else {
@@ -1346,16 +1364,6 @@ static void decrease_priority_inheritance(struct task_struct* t,
 	raw_spin_lock(&cluster->cluster_lock);
 	__decrease_priority_inheritance(t, prio_inh);
 
-#ifdef CONFIG_LITMUS_SOFTIRQD
-	if(tsk_rt(t)->cur_klmirqd != NULL)
-	{
-		TRACE_TASK(t, "%s/%d decreases in priority!\n",
-				   tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid);
-
-		__decrease_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh);
-	}
-#endif
-
 	raw_spin_unlock(&cluster->cluster_lock);
 
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
@@ -1371,73 +1379,6 @@ static void decrease_priority_inheritance(struct task_struct* t,
 }
 
 
-
-
-
-#ifdef CONFIG_LITMUS_SOFTIRQD
-/* called with IRQs off */
-static void increase_priority_inheritance_klmirqd(struct task_struct* klmirqd,
-											  struct task_struct* old_owner,
-											  struct task_struct* new_owner)
-{
-	cedf_domain_t* cluster = task_cpu_cluster(klmirqd);
-
-	BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread));
-
-	raw_spin_lock(&cluster->cluster_lock);
-
-	if(old_owner != new_owner)
-	{
-		if(old_owner)
-		{
-			// unreachable?
-			tsk_rt(old_owner)->cur_klmirqd = NULL;
-		}
-
-		TRACE_TASK(klmirqd, "giving ownership to %s/%d.\n",
-				   new_owner->comm, new_owner->pid);
-
-		tsk_rt(new_owner)->cur_klmirqd = klmirqd;
-	}
-
-	__decrease_priority_inheritance(klmirqd, NULL);  // kludge to clear out cur prio.
-
-	__increase_priority_inheritance(klmirqd,
-			(tsk_rt(new_owner)->inh_task == NULL) ?
-				new_owner :
-				tsk_rt(new_owner)->inh_task);
-
-	raw_spin_unlock(&cluster->cluster_lock);
-}
-
-
-/* called with IRQs off */
-static void decrease_priority_inheritance_klmirqd(struct task_struct* klmirqd,
-												   struct task_struct* old_owner,
-												   struct task_struct* new_owner)
-{
-	cedf_domain_t* cluster = task_cpu_cluster(klmirqd);
-
-	BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread));
-
-	raw_spin_lock(&cluster->cluster_lock);
-
-    TRACE_TASK(klmirqd, "priority restored\n");
-
-	__decrease_priority_inheritance(klmirqd, new_owner);
-
-	tsk_rt(old_owner)->cur_klmirqd = NULL;
-
-	raw_spin_unlock(&cluster->cluster_lock);
-}
-#endif // CONFIG_LITMUS_SOFTIRQD
-
-
-
-
-
-
-
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 
 /* called with IRQs off */
@@ -1836,33 +1777,7 @@ static long cedf_activate_plugin(void)
 	}
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-	{
-		/* distribute the daemons evenly across the clusters. */
-		int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
-		int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
-		int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
-
-		int daemon = 0;
-		for(i = 0; i < num_clusters; ++i)
-		{
-			int num_on_this_cluster = num_daemons_per_cluster;
-			if(left_over)
-			{
-				++num_on_this_cluster;
-				--left_over;
-			}
-
-			for(j = 0; j < num_on_this_cluster; ++j)
-			{
-				// first CPU of this cluster
-				affinity[daemon++] = i*cluster_size;
-			}
-		}
-
-		spawn_klmirqd(affinity);
-
-		kfree(affinity);
-	}
+	init_klmirqd();
 #endif
 
 #ifdef CONFIG_LITMUS_NVIDIA
@@ -1906,10 +1821,6 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 	.allocate_aff_obs = cedf_allocate_affinity_observer,
 #endif
-#ifdef CONFIG_LITMUS_SOFTIRQD
-	.increase_prio_klmirqd = increase_priority_inheritance_klmirqd,
-	.decrease_prio_klmirqd = decrease_priority_inheritance_klmirqd,
-#endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 	.enqueue_pai_tasklet = cedf_enqueue_pai_tasklet,
 	.change_prio_pai_tasklet = cedf_change_prio_pai_tasklet,
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index d52be9325044..f27c104ea027 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -26,6 +26,7 @@
 
 #include <litmus/bheap.h>
 #include <litmus/binheap.h>
+#include <litmus/trace.h>
 
 #ifdef CONFIG_LITMUS_LOCKING
 #include <litmus/kfmlp_lock.h>
@@ -50,7 +51,6 @@
 
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 #include <linux/interrupt.h>
-#include <litmus/trace.h>
 #endif
 
 #ifdef CONFIG_LITMUS_NVIDIA
@@ -156,12 +156,6 @@ static raw_spinlock_t* gsnedf_get_dgl_spinlock(struct task_struct *t)
 #endif
 
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
-struct tasklet_head
-{
-	struct tasklet_struct *head;
-	struct tasklet_struct **tail;
-};
-
 struct tasklet_head gsnedf_pending_tasklets;
 #endif
 
@@ -938,13 +932,6 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 
 	sched_state_task_picked();
 
-#if 0
-	if (next && is_realtime(next) && tsk_rt(next)->is_aux_task && !tsk_rt(next)->inh_task) {
-		TRACE_TASK(next, "is aux with no inheritance. preventing it from actually running.\n");
-		next = NULL;
-	}
-#endif
-
 	raw_spin_unlock(&gsnedf_lock);
 
 #ifdef WANT_ALL_SCHED_EVENTS
@@ -1056,6 +1043,13 @@ static void gsnedf_task_wake_up(struct task_struct *task)
 	}
 #endif
 
+#ifdef CONFIG_LITMUS_NVIDIA
+	if (tsk_rt(task)->held_gpus && !tsk_rt(task)->hide_from_gpu) {
+		TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", task->comm, task->pid);
+		disable_gpu_owner(task);
+	}
+#endif
+
 	gsnedf_job_arrival(task);
 	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
 }
@@ -1079,6 +1073,14 @@ static void gsnedf_task_block(struct task_struct *t)
 	}
 #endif
 
+#ifdef CONFIG_LITMUS_NVIDIA
+	if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) {
+
+		TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
+		enable_gpu_owner(t);
+	}
+#endif
+
 	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
 
 	BUG_ON(!is_realtime(t));
@@ -1106,6 +1108,13 @@ static void gsnedf_task_exit(struct task_struct * t)
 	}
 #endif
 
+#ifdef CONFIG_LITMUS_NVIDIA
+	/* make sure we clean up on our way out */
+	if(tsk_rt(t)->held_gpus) {
+		disable_gpu_owner(t);
+	}
+#endif
+
 	unlink(t);
 	if (tsk_rt(t)->scheduled_on != NO_CPU) {
 		gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
@@ -1154,7 +1163,6 @@ static int __increase_priority_inheritance(struct task_struct* t,
 
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	/* this sanity check allows for weaker locking in protocols */
-	/* TODO (klmirqd): Skip this check if 't' is a proxy thread (???) */
 	if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
 #endif
 		TRACE_TASK(t, "inherits priority from %s/%d\n",
@@ -1218,6 +1226,14 @@ static int __increase_priority_inheritance(struct task_struct* t,
 				aux_task_owner_increase_priority(t);
 			}
 #endif
+
+#ifdef CONFIG_LITMUS_NVIDIA
+			/* propagate to gpu klmirqd */
+			if (tsk_rt(t)->held_gpus) {
+				gpu_owner_increase_priority(t);
+			}
+#endif
+
 		}
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	}
@@ -1247,16 +1263,6 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str
 
 	success = __increase_priority_inheritance(t, prio_inh);
 
-#ifdef CONFIG_LITMUS_SOFTIRQD
-	if(tsk_rt(t)->cur_klmirqd != NULL)
-	{
-		TRACE_TASK(t, "%s/%d inherits a new priority!\n",
-				tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid);
-
-		__increase_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh);
-	}
-#endif
-
 	raw_spin_unlock(&gsnedf_lock);
 
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
@@ -1330,6 +1336,14 @@ static int __decrease_priority_inheritance(struct task_struct* t,
 		}
 #endif
 
+#ifdef CONFIG_LITMUS_NVIDIA
+		/* propagate to gpu */
+		if (tsk_rt(t)->held_gpus) {
+			gpu_owner_decrease_priority(t);
+		}
+#endif
+
+
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	}
 	else {
@@ -1357,16 +1371,6 @@ static void decrease_priority_inheritance(struct task_struct* t,
 
 	success = __decrease_priority_inheritance(t, prio_inh);
 
-#ifdef CONFIG_LITMUS_SOFTIRQD
-	if(tsk_rt(t)->cur_klmirqd != NULL)
-	{
-		TRACE_TASK(t, "%s/%d decreases in priority!\n",
-				   tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid);
-
-		__decrease_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh);
-	}
-#endif
-
 	raw_spin_unlock(&gsnedf_lock);
 
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
@@ -1382,62 +1386,6 @@ static void decrease_priority_inheritance(struct task_struct* t,
 }
 
 
-#ifdef CONFIG_LITMUS_SOFTIRQD
-/* called with IRQs off */
-static void increase_priority_inheritance_klmirqd(struct task_struct* klmirqd,
-											  struct task_struct* old_owner,
-											  struct task_struct* new_owner)
-{
-	BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread));
-
-	raw_spin_lock(&gsnedf_lock);
-
-	if(old_owner != new_owner)
-	{
-		if(old_owner)
-		{
-			// unreachable?
-			tsk_rt(old_owner)->cur_klmirqd = NULL;
-		}
-
-		TRACE_TASK(klmirqd, "giving ownership to %s/%d.\n",
-				   new_owner->comm, new_owner->pid);
-
-		tsk_rt(new_owner)->cur_klmirqd = klmirqd;
-	}
-
-	__decrease_priority_inheritance(klmirqd, NULL);  // kludge to clear out cur prio.
-
-	__increase_priority_inheritance(klmirqd,
-			(tsk_rt(new_owner)->inh_task == NULL) ?
-				new_owner :
-				tsk_rt(new_owner)->inh_task);
-
-	raw_spin_unlock(&gsnedf_lock);
-}
-
-
-/* called with IRQs off */
-static void decrease_priority_inheritance_klmirqd(struct task_struct* klmirqd,
-												   struct task_struct* old_owner,
-												   struct task_struct* new_owner)
-{
-	BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread));
-
-	raw_spin_lock(&gsnedf_lock);
-
-    TRACE_TASK(klmirqd, "priority restored\n");
-
-	__decrease_priority_inheritance(klmirqd, new_owner);
-
-	tsk_rt(old_owner)->cur_klmirqd = NULL;
-
-	raw_spin_unlock(&gsnedf_lock);
-}
-#endif
-
-
-
 
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 
@@ -1923,7 +1871,7 @@ static long gsnedf_activate_plugin(void)
 #endif
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-    spawn_klmirqd(NULL);
+    init_klmirqd();
 #endif
 
 #ifdef CONFIG_LITMUS_NVIDIA
@@ -1965,10 +1913,6 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 	.allocate_aff_obs = gsnedf_allocate_affinity_observer,
 #endif
-#ifdef CONFIG_LITMUS_SOFTIRQD
-	.increase_prio_klmirqd = increase_priority_inheritance_klmirqd,
-	.decrease_prio_klmirqd = decrease_priority_inheritance_klmirqd,
-#endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 	.enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet,
 	.change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet,
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index cda67e0f6bc8..30c216fd6fdc 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -167,18 +167,6 @@ static int litmus_dummy___decrease_prio(struct task_struct* t, struct task_struc
 }
 #endif
 
-#ifdef CONFIG_LITMUS_SOFTIRQD
-static void litmus_dummy_increase_prio_klmirqd(struct task_struct* klmirqd,
-                                       struct task_struct* old_owner,
-                                       struct task_struct* new_owner)
-{
-}
-
-static void litmus_dummy_decrease_prio_klmirqd(struct task_struct* klmirqd,
-                                                struct task_struct* old_owner)
-{
-}
-#endif
 
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t)
@@ -263,10 +251,6 @@ struct sched_plugin linux_sched_plugin = {
 	.nested_decrease_prio = litmus_dummy_nested_decrease_prio,
 	.__compare = litmus_dummy___compare,
 #endif
-#ifdef CONFIG_LITMUS_SOFTIRQD
-	.increase_prio_klmirqd = litmus_dummy_increase_prio_klmirqd,
-	.decrease_prio_klmirqd = litmus_dummy_decrease_prio_klmirqd,
-#endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 	.enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet,
 	.change_prio_pai_tasklet = litmus_dummy_change_prio_pai_tasklet,
@@ -327,10 +311,6 @@ int register_sched_plugin(struct sched_plugin* plugin)
 	CHECK(nested_decrease_prio);
 	CHECK(__compare);
 #endif
-#ifdef CONFIG_LITMUS_SOFTIRQD
-	CHECK(increase_prio_klmirqd);
-	CHECK(decrease_prio_klmirqd);
-#endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 	CHECK(enqueue_pai_tasklet);
 	CHECK(change_prio_pai_tasklet);
-- 
cgit v1.2.2


From 9207c7f874e7754391fdf184187fc763455466c5 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Wed, 12 Dec 2012 14:45:17 -0500
Subject: Cluster assignment of nv klmirqd threads.

---
 include/litmus/sched_plugin.h |  8 ++++++++
 litmus/nvidia_info.c          | 15 +++++++++------
 litmus/sched_cedf.c           | 22 ++++++++++++++++++++++
 litmus/sched_gsn_edf.c        | 10 +++++++++-
 litmus/sched_plugin.c         | 14 ++++++++++++++
 5 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index a13d1a2992fe..cfa218504d75 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -102,6 +102,10 @@ typedef int (*__higher_prio_t)(struct task_struct* a, comparison_mode_t a_mod,
 							  struct task_struct* b, comparison_mode_t b_mod);
 #endif
 
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
+typedef int (*default_cpu_for_gpu_t)(int gpu, int num_gpus);
+#endif
+
 
 /********************* sys call backends  ********************/
 /* This function causes the caller to sleep until the next release */
@@ -165,6 +169,10 @@ struct sched_plugin {
 	change_prio_pai_tasklet_t	change_prio_pai_tasklet;
 	run_tasklets_t				run_tasklets;
 #endif
+
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
+	default_cpu_for_gpu_t		map_gpu_to_cpu;
+#endif
 } __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
 
 
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index b29f4d3f0dac..0b39dcc84115 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -449,13 +449,16 @@ static int init_nv_device_reg(void)
 		INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order);
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
-		// TODO: Make thread spawning this a litmus plugin call.
-		NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb;
-		NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i);
-		mb();
+		{
+			int default_cpu = litmus->map_gpu_to_cpu(i, NV_DEVICE_NUM);
+
+			NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb;
+			NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i);
+			mb();
 
-		if(launch_klmirqd_thread(0, &NV_DEVICE_REG[i].callback) != 0) {
-			TRACE("Failed to create klmirqd thread for GPU %d\n", i);
+			if(launch_klmirqd_thread(default_cpu, &NV_DEVICE_REG[i].callback) != 0) {
+				TRACE("Failed to create klmirqd thread for GPU %d\n", i);
+			}
 		}
 #endif
 	}
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 84aafca78cde..35ea1544ce69 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -1652,6 +1652,25 @@ static void cleanup_cedf(void)
 	}
 }
 
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
+static int cedf_map_gpu_to_cpu(int gpu, int num_gpus)
+{
+	/* TODO: Some sort of smart clustering on the PCIe bus topology */
+	int num_gpu_clusters = num_clusters;
+	unsigned int gpu_cluster_size = num_gpus / num_gpu_clusters;
+	int cpu_cluster = gpu / gpu_cluster_size;
+	int default_cpu = cedf[cpu_cluster].cpus[0]->cpu;  // first CPU in given cluster
+
+	if(num_gpus % num_gpu_clusters != 0) {
+		TRACE("GPU clusters are of non-uniform size!\n");
+	}
+
+	TRACE("CPU %d is default for GPU %d interrupt threads.\n", default_cpu, gpu);
+
+	return default_cpu;
+}
+#endif
+
 static long cedf_activate_plugin(void)
 {
 	int i, j, cpu, ccpu, cpu_count;
@@ -1826,6 +1845,9 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
 	.change_prio_pai_tasklet = cedf_change_prio_pai_tasklet,
 	.run_tasklets = cedf_run_tasklets,
 #endif
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
+	.map_gpu_to_cpu = cedf_map_gpu_to_cpu,
+#endif
 };
 
 static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index f27c104ea027..1b5d8d73dc16 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -1835,7 +1835,12 @@ UNSUPPORTED_AFF_OBS:
 #endif
 
 
-
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
+static int gsnedf_map_gpu_to_cpu(int gpu, int num_gpus)
+{
+	return 0;  // CPU_0 is default in all cases.
+}
+#endif
 
 
 static long gsnedf_activate_plugin(void)
@@ -1918,6 +1923,9 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
 	.change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet,
 	.run_tasklets = gsnedf_run_tasklets,
 #endif
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
+	.map_gpu_to_cpu = gsnedf_map_gpu_to_cpu,
+#endif
 };
 
 
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 30c216fd6fdc..ea89f5fedcab 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -222,6 +222,13 @@ static long litmus_dummy_allocate_aff_obs(struct affinity_observer **aff_obs,
 }
 #endif
 
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
+static int litmus_dummy_map_gpu_to_cpu(int gpu, int num_gpus)
+{
+	return 0;
+}
+#endif
+
 
 /* The default scheduler plugin. It doesn't do anything and lets Linux do its
  * job.
@@ -262,6 +269,9 @@ struct sched_plugin linux_sched_plugin = {
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 	.allocate_aff_obs = litmus_dummy_allocate_aff_obs,
 #endif
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
+	.map_gpu_to_cpu = litmus_dummy_map_gpu_to_cpu,
+#endif
 
 	.admit_task = litmus_dummy_admit_task
 };
@@ -322,6 +332,10 @@ int register_sched_plugin(struct sched_plugin* plugin)
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 	CHECK(allocate_aff_obs);
 #endif
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
+	CHECK(map_gpu_to_cpu);
+#endif
+
 	CHECK(admit_task);
 
 	if (!plugin->release_at)
-- 
cgit v1.2.2


From 2ccc2c4cc981a68e703082e6e32f5483ad87b61c Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Wed, 12 Dec 2012 16:38:55 -0500
Subject: Use num_online_gpus()

Note that num_online_gpus() merely reports the
staticly configured maximum number of available
GPUs.  Will make dynamic in the future.
---
 include/litmus/nvidia_info.h  |  6 ++++--
 include/litmus/sched_plugin.h |  2 +-
 litmus/nvidia_info.c          |  8 ++++----
 litmus/sched_cedf.c           | 27 +++++++++++++++++++--------
 litmus/sched_gsn_edf.c        |  2 +-
 litmus/sched_plugin.c         |  2 +-
 6 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
index 6f354c8b00ac..8c2a5524512e 100644
--- a/include/litmus/nvidia_info.h
+++ b/include/litmus/nvidia_info.h
@@ -8,6 +8,10 @@
 
 #define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM
 
+/* TODO: Make this a function that checks the PCIe bus or maybe proc settings */
+#define num_online_gpus() (NV_DEVICE_NUM)
+
+
 /* Functions used for decoding NVIDIA blobs. */
 
 int init_nvidia_info(void);
@@ -21,8 +25,6 @@ void dump_nvidia_info(const struct tasklet_struct *t);
 u32 get_tasklet_nv_device_num(const struct tasklet_struct *t);
 u32 get_work_nv_device_num(const struct work_struct *t);
 
-
-
 /* Functions for figuring out the priority of GPU-using tasks */
 
 struct task_struct* get_nv_max_device_owner(u32 target_device_id);
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index cfa218504d75..78004381a6cc 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -103,7 +103,7 @@ typedef int (*__higher_prio_t)(struct task_struct* a, comparison_mode_t a_mod,
 #endif
 
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
-typedef int (*default_cpu_for_gpu_t)(int gpu, int num_gpus);
+typedef int (*default_cpu_for_gpu_t)(int gpu);
 #endif
 
 
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 0b39dcc84115..7883296a7a18 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -444,13 +444,13 @@ static int init_nv_device_reg(void)
 	mb();
 
 
-	for(i = 0; i < NV_DEVICE_NUM; ++i) {
+	for(i = 0; i < num_online_gpus(); ++i) {
 		raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
 		INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order);
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
 		{
-			int default_cpu = litmus->map_gpu_to_cpu(i, NV_DEVICE_NUM);
+			int default_cpu = litmus->map_gpu_to_cpu(i);
 
 			NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb;
 			NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i);
@@ -478,7 +478,7 @@ static int shutdown_nv_device_reg(void)
 		int i;
 		nv_device_registry_t *reg;
 
-		for (i = 0; i < NV_DEVICE_NUM; ++i) {
+		for (i = 0; i < num_online_gpus(); ++i) {
 
 			TRACE("Shutting down GPU %d.\n", i);
 
@@ -829,7 +829,7 @@ int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
 {
 	int ret;
 
-	if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
+	if((reg_device_id < num_online_gpus()) && (reg_device_id >= 0))
 	{
 		if(reg_action)
 			ret = __reg_nv_device(reg_device_id, t);
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 35ea1544ce69..46de8041cf59 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -157,6 +157,13 @@ static unsigned int cluster_size;
 
 static int clusters_allocated = 0;
 
+
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
+static int num_gpu_clusters;
+static unsigned int gpu_cluster_size;
+#endif
+
+
 #ifdef CONFIG_LITMUS_DGL_SUPPORT
 static raw_spinlock_t* cedf_get_dgl_spinlock(struct task_struct *t)
 {
@@ -1653,18 +1660,11 @@ static void cleanup_cedf(void)
 }
 
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
-static int cedf_map_gpu_to_cpu(int gpu, int num_gpus)
+static int cedf_map_gpu_to_cpu(int gpu)
 {
-	/* TODO: Some sort of smart clustering on the PCIe bus topology */
-	int num_gpu_clusters = num_clusters;
-	unsigned int gpu_cluster_size = num_gpus / num_gpu_clusters;
 	int cpu_cluster = gpu / gpu_cluster_size;
 	int default_cpu = cedf[cpu_cluster].cpus[0]->cpu;  // first CPU in given cluster
 
-	if(num_gpus % num_gpu_clusters != 0) {
-		TRACE("GPU clusters are of non-uniform size!\n");
-	}
-
 	TRACE("CPU %d is default for GPU %d interrupt threads.\n", default_cpu, gpu);
 
 	return default_cpu;
@@ -1717,6 +1717,17 @@ static long cedf_activate_plugin(void)
 	printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
 			num_clusters, cluster_size);
 
+	
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
+	num_gpu_clusters = min(num_clusters, num_online_gpus());
+	gpu_cluster_size = num_online_gpus() / num_gpu_clusters;
+	
+	if (((num_online_gpus() % gpu_cluster_size) != 0) ||
+		(num_gpu_clusters != num_clusters)) {
+		printk(KERN_WARNING "C-EDF: GPUs not uniformly distributed among CPU clusters.\n");
+	}
+#endif
+	
 	/* initialize clusters */
 	cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
 	for (i = 0; i < num_clusters; i++) {
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 1b5d8d73dc16..4ac573a6f0f7 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -1836,7 +1836,7 @@ UNSUPPORTED_AFF_OBS:
 
 
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
-static int gsnedf_map_gpu_to_cpu(int gpu, int num_gpus)
+static int gsnedf_map_gpu_to_cpu(int gpu)
 {
 	return 0;  // CPU_0 is default in all cases.
 }
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index ea89f5fedcab..76ff892122aa 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -223,7 +223,7 @@ static long litmus_dummy_allocate_aff_obs(struct affinity_observer **aff_obs,
 #endif
 
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
-static int litmus_dummy_map_gpu_to_cpu(int gpu, int num_gpus)
+static int litmus_dummy_map_gpu_to_cpu(int gpu)
 {
 	return 0;
 }
-- 
cgit v1.2.2


From 4ea2c9490eaf9df55ccbfe6f4c56518fc4bdce8f Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Thu, 13 Dec 2012 11:44:23 -0500
Subject: Fix klmirqd plugin switching to not panic.

---
 litmus/Kconfig          | 13 -------------
 litmus/litmus.c         | 23 ++++++++++-------------
 litmus/litmus_softirq.c | 27 +++++++++++++++++++++------
 litmus/nvidia_info.c    |  5 ++++-
 4 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/litmus/Kconfig b/litmus/Kconfig
index 9aeae659ae32..8ca66b4d687c 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -443,19 +443,6 @@ config NV_DEVICE_NUM
 	     Should be (<= to the number of CPUs) and
 		 (<= to the number of GPUs) in your system.
 
-config NV_MAX_SIMULT_USERS
-	int "Maximum number of threads sharing a GPU simultanously"
-	depends on LITMUS_NVIDIA
-	range 1 3
-	default "2"
-	help
-		Should be at least equal to the #copy_engines + #execution_engines
-		of the GPUs in your system.
-
-		Scientific/Professional GPUs = 3  (ex. M2070, Quadro 6000?)
-		Consumer Fermi/Kepler GPUs   = 2  (GTX-4xx thru -6xx)
-		Older                        = 1  (ex. GTX-2xx)
-
 choice
 	  prompt "CUDA/Driver Version Support"
 	  default CUDA_5_0
diff --git a/litmus/litmus.c b/litmus/litmus.c
index fa244ba53e22..f98aa9d778a2 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -445,8 +445,8 @@ long __litmus_admit_task(struct task_struct* tsk)
 #endif
 #ifdef CONFIG_LITMUS_SOFTIRQD
 	/* not an interrupt thread by default */
-	tsk_rt(tsk)->is_interrupt_thread = 0;
-    tsk_rt(tsk)->klmirqd_info = NULL;
+	//tsk_rt(tsk)->is_interrupt_thread = 0;
+    //tsk_rt(tsk)->klmirqd_info = NULL;
 #endif
 
 	retval = litmus->admit_task(tsk);
@@ -523,11 +523,6 @@ static void synch_on_plugin_switch(void* info)
 		cpu_relax();
 }
 
-/* Switching a plugin in use is tricky.
- * We must watch out that no real-time tasks exists
- * (and that none is created in parallel) and that the plugin is not
- * currently in use on any processor (in theory).
- */
 int switch_sched_plugin(struct sched_plugin* plugin)
 {
 	//unsigned long flags;
@@ -535,20 +530,21 @@ int switch_sched_plugin(struct sched_plugin* plugin)
 
 	BUG_ON(!plugin);
 
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	if (!klmirqd_is_dead()) {
+		kill_klmirqd();
+	}
+#endif
+
 	/* forbid other cpus to use the plugin */
 	atomic_set(&cannot_use_plugin, 1);
 	/* send IPI to force other CPUs to synch with us */
 	smp_call_function(synch_on_plugin_switch, NULL, 0);
 
 	/* wait until all other CPUs have started synch */
-	while (atomic_read(&cannot_use_plugin) < num_online_cpus())
+	while (atomic_read(&cannot_use_plugin) < num_online_cpus()) {
 		cpu_relax();
-
-#ifdef CONFIG_LITMUS_SOFTIRQD
-	if (!klmirqd_is_dead()) {
-		kill_klmirqd();
 	}
-#endif
 
 	/* stop task transitions */
 	//raw_spin_lock_irqsave(&task_transition_lock, flags);
@@ -571,6 +567,7 @@ int switch_sched_plugin(struct sched_plugin* plugin)
 out:
 	//raw_spin_unlock_irqrestore(&task_transition_lock, flags);
 	atomic_set(&cannot_use_plugin, 0);
+
 	return ret;
 }
 
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
index 44e2d38ad982..9c5ecab5e8d9 100644
--- a/litmus/litmus_softirq.c
+++ b/litmus/litmus_softirq.c
@@ -79,6 +79,7 @@ void kill_klmirqd(void)
 	{
 		unsigned long flags;
 		struct list_head *pos;
+		struct list_head *q;
 
 		raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
 
@@ -86,7 +87,7 @@ void kill_klmirqd(void)
 
 		klmirqd_state.shuttingdown = 1;
 
-		list_for_each(pos, &klmirqd_state.threads) {
+		list_for_each_safe(pos, q, &klmirqd_state.threads) {
 			struct klmirqd_info* info = list_entry(pos, struct klmirqd_info, klmirqd_reg);
 
 			if(info->terminating != 1)
@@ -96,7 +97,9 @@ void kill_klmirqd(void)
 				flush_pending(info->klmirqd);
 
 				/* signal termination */
+				raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
 				kthread_stop(info->klmirqd);
+				raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
 			}
 		}
 
@@ -219,7 +222,7 @@ int launch_klmirqd_thread(int cpu, klmirqd_callback_t* cb)
 #define KLMIRQD_SLICE_NR_JIFFIES 1
 #define KLMIRQD_SLICE_NS ((NSEC_PER_SEC / HZ) * KLMIRQD_SLICE_NR_JIFFIES)
 
-static int set_litmus_daemon_sched(struct task_struct* tsk)
+static int become_litmus_daemon(struct task_struct* tsk)
 {
     int ret = 0;
 
@@ -249,6 +252,16 @@ static int set_litmus_daemon_sched(struct task_struct* tsk)
     return ret;
 }
 
+static int become_normal_daemon(struct task_struct* tsk)
+{
+	int ret = 0;
+
+	struct sched_param param = { .sched_priority = 0};
+	sched_setscheduler_nocheck(tsk, SCHED_NORMAL, &param);
+
+	return ret;
+}
+
 static int register_klmirqd(struct task_struct* tsk)
 {
 	int retval = 0;
@@ -318,6 +331,7 @@ static int unregister_klmirqd(struct task_struct* tsk)
 
 	/* remove the entry in the klmirqd thread list */
 	list_del(&info->klmirqd_reg);
+	mb();
 	--klmirqd_state.nr_threads;
 
 	/* remove link to klmirqd info from thread */
@@ -687,7 +701,7 @@ static int run_klmirqd(void* callback)
 	struct klmirqd_info* info = NULL;
 	klmirqd_callback_t* cb = (klmirqd_callback_t*)(callback);
 
-	retval = set_litmus_daemon_sched(current);
+	retval = become_litmus_daemon(current);
     if (retval != 0) {
         TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
         goto failed;
@@ -696,7 +710,7 @@ static int run_klmirqd(void* callback)
 	retval = register_klmirqd(current);
 	if (retval != 0) {
 		TRACE_CUR("%s: Failed to become a klmirqd thread.\n", __FUNCTION__);
-		goto failed;
+		goto failed_sched_normal;
 	}
 
 	if (cb && cb->func) {
@@ -781,9 +795,10 @@ failed_unregister:
 	/* remove our registration from klmirqd */
 	unregister_klmirqd(current);
 
-failed:
-    litmus_exit_task(current);
+failed_sched_normal:
+	become_normal_daemon(current);
 
+failed:
 	return retval;
 }
 
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 7883296a7a18..3d38b168d9ba 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -322,7 +322,10 @@ int init_nvidia_info(void)
 	else
 	{
 		TRACE("%s : Could not find NVIDIA module!  Loaded?\n", __FUNCTION__);
-		return(-1);
+
+		init_nv_device_reg();
+		return(0);
+//		return(-1);
 	}
 }
 
-- 
cgit v1.2.2


From a3e1d14976fbb0859ad91afdbea13786255648da Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Thu, 13 Dec 2012 13:12:59 -0500
Subject: blah

---
 litmus/litmus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litmus/litmus.c b/litmus/litmus.c
index f98aa9d778a2..4ee1c6ca7801 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -647,7 +647,7 @@ int sys_kill(int pid, int sig);
 
 static void sysrq_handle_kill_rt_tasks(int key)
 {
-	struct task_struct *t;
+	struct task_struct *t;  // test
 	read_lock(&tasklist_lock);
 	for_each_process(t) {
 		if (is_realtime(t)) {
-- 
cgit v1.2.2


From bb9b9d2075a717ea77cb83c30d55aed366bececf Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Thu, 13 Dec 2012 13:13:21 -0500
Subject: test2

---
 litmus/litmus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litmus/litmus.c b/litmus/litmus.c
index 4ee1c6ca7801..f98aa9d778a2 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -647,7 +647,7 @@ int sys_kill(int pid, int sig);
 
 static void sysrq_handle_kill_rt_tasks(int key)
 {
-	struct task_struct *t;  // test
+	struct task_struct *t;
 	read_lock(&tasklist_lock);
 	for_each_process(t) {
 		if (is_realtime(t)) {
-- 
cgit v1.2.2


From 8f4bc19471bd49f4dcf6ab20254b7c71ec12e4e2 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Thu, 13 Dec 2012 17:15:17 -0500
Subject: Fix several klmirqd bugs.

1) Deadlock in litmus_task_exit()-- added litmus_pre_task_exit()
   to be called without the Linux runqueue lock held.
2) Prioritization of base-prio klmirqd/aux threads vs. normal
   real-time tasks.
3) Initialization of gpu owner binheap node moved to *after*
   memset(0) of rt_params.
4) Exit path of klmirqd threads.
---
 include/litmus/litmus.h |  2 ++
 kernel/sched.c          |  3 ++
 litmus/Kconfig          |  8 +++++
 litmus/edf_common.c     | 20 ++++++++++--
 litmus/litmus.c         | 36 +++++++++++---------
 litmus/litmus_softirq.c |  4 +++
 litmus/nvidia_info.c    | 87 +++++++++++++++++++++++++++++++++++++++++++++----
 litmus/sched_cedf.c     |  6 ++--
 8 files changed, 138 insertions(+), 28 deletions(-)

diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 711b88e2b3d1..54f33e835682 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -38,6 +38,8 @@ void litmus_exec(void);
 void exit_litmus(struct task_struct *dead_tsk);
 
 long litmus_admit_task(struct task_struct *tsk);
+
+void litmus_pre_exit_task(struct task_struct *tsk); // called before litmus_exit_task, but without run queue locks held
 void litmus_exit_task(struct task_struct *tsk);
 
 #define is_realtime(t) 		((t)->policy == SCHED_LITMUS)
diff --git a/kernel/sched.c b/kernel/sched.c
index 840f87bce097..a1f10984adb3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5288,6 +5288,9 @@ recheck:
 		if (retval)
 			return retval;
 	}
+	else if (p->policy == SCHED_LITMUS) {
+		litmus_pre_exit_task(p);
+	}
 
 	/*
 	 * make sure no PI-waiters arrive (or leave) while we are
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 8ca66b4d687c..b704e893e9be 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -470,6 +470,14 @@ config CUDA_3_2
 
 endchoice
 
+config LITMUS_NV_KLMIRQD_DEBUG
+	  bool "Raise fake sporadic tasklets to test nv klimirqd threads."
+	  depends on LITMUS_NVIDIA && LITMUS_SOFTIRQD
+	  default n
+	  help
+		Causes tasklets to be sporadically dispatched to waiting klmirqd
+		threads.
+
 endmenu
 
 endmenu
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 27b728a55669..255e4f36e413 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -119,8 +119,15 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 				goto aux_tie_break;
 			}
 			else {
+
 				// make the aux thread lowest priority real-time task
-				int temp = (first_lo_aux) ? !is_realtime(second) : !is_realtime(first);
+				int temp = 0;
+				if (first_lo_aux && is_realtime(second)) {
+//					temp = 0;
+				}
+				else if(second_lo_aux && is_realtime(first)) {
+					temp = 1;
+				}
 				TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp);
 				return temp;
 			}
@@ -149,8 +156,15 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 				goto klmirqd_tie_break;
 			}
 			else {
-				// make the klmirqd thread (second) lowest priority real-time task
-				int temp = (first_lo_klmirqd) ? !is_realtime(second) : !is_realtime(first);
+				// make the klmirqd thread the lowest-priority real-time task
+				// but (above low-prio aux tasks and Linux tasks)
+				int temp = 0;
+				if (first_lo_klmirqd && is_realtime(second)) {
+//					temp = 0;
+				}
+				else if(second_lo_klmirqd && is_realtime(first)) {
+					temp = 1;
+				}
 				TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp);
 				return temp;
 			}
diff --git a/litmus/litmus.c b/litmus/litmus.c
index f98aa9d778a2..1aada57176de 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -355,8 +355,8 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 		ctrl_page   = p->rt_param.ctrl_page;
 	}
 
-#ifdef CONFIG_LITMUS_NESTED_LOCKING
-	prio_order = p->rt_param.hp_blocked_tasks.compare;
+#ifdef CONFIG_LITMUS_NVIDIA
+	WARN_ON(p->rt_param.held_gpus != 0);
 #endif
 
 #ifdef CONFIG_LITMUS_LOCKING
@@ -367,15 +367,7 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 #endif
 
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
-//	WARN_ON(p->rt_param.blocked_lock);
-//    WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks));
-#endif
-
-
-#ifdef CONFIG_LITMUS_NVIDIA
-	WARN_ON(p->rt_param.held_gpus != 0);
-
-	INIT_BINHEAP_NODE(&p->rt_param.gpu_owner_node);
+	prio_order = p->rt_param.hp_blocked_tasks.compare;
 #endif
 
 	/* Cleanup everything else. */
@@ -384,8 +376,9 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 #ifdef CONFIG_REALTIME_AUX_TASKS
 	/* also clear out the aux_data. the !restore case is only called on
 	 * fork (initial thread creation). */
-	if (!restore)
+	if (!restore) {
 		memset(&p->aux_data, 0, sizeof(p->aux_data));
+	}
 #endif
 
 	/* Restore preserved fields. */
@@ -394,6 +387,10 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 		p->rt_param.ctrl_page   = ctrl_page;
 	}
 
+#ifdef CONFIG_LITMUS_NVIDIA
+	INIT_BINHEAP_NODE(&p->rt_param.gpu_owner_node);
+#endif
+
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
 	init_gpu_affinity_state(p);
 #endif
@@ -494,15 +491,20 @@ out:
 	return retval;
 }
 
-void litmus_exit_task(struct task_struct* tsk)
+void litmus_pre_exit_task(struct task_struct* tsk)
 {
 	if (is_realtime(tsk)) {
-		sched_trace_task_completion(tsk, 1);
-
 		if (tsk_rt(tsk)->rsrc_exit_cb) {
 			int ret = tsk_rt(tsk)->rsrc_exit_cb(tsk);
 			WARN_ON(ret != 0);
 		}
+	}
+}
+
+void litmus_exit_task(struct task_struct* tsk)
+{
+	if (is_realtime(tsk)) {
+		sched_trace_task_completion(tsk, 1);
 
 		litmus->task_exit(tsk);
 
@@ -637,8 +639,10 @@ void exit_litmus(struct task_struct *dead_tsk)
 	}
 
 	/* main cleanup only for RT tasks */
-	if (is_realtime(dead_tsk))
+	if (is_realtime(dead_tsk)) {
+		litmus_pre_exit_task(dead_tsk); /* todo: double check that no Linux rq lock is held */
 		litmus_exit_task(dead_tsk);
+	}
 }
 
 
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
index 9c5ecab5e8d9..be06405021c5 100644
--- a/litmus/litmus_softirq.c
+++ b/litmus/litmus_softirq.c
@@ -1163,3 +1163,7 @@ int __litmus_schedule_work(struct work_struct *w, struct task_struct* klmirqd_th
 }
 EXPORT_SYMBOL(__litmus_schedule_work);
 
+
+
+
+
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 3d38b168d9ba..059a7e7ac715 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -258,6 +258,8 @@ void dump_nvidia_info(const struct tasklet_struct *t)
 static struct module* nvidia_mod = NULL;
 
 
+
+
 #if 0
 static int nvidia_ready_module_notify(struct notifier_block *self,
 				unsigned long val, void *data)
@@ -390,6 +392,10 @@ typedef struct {
 	struct task_struct* thread;
 	int ready:1;  /* todo: make threads check for the ready flag */
 #endif
+
+#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
+	struct tasklet_struct nv_klmirqd_dbg_tasklet;
+#endif
 }nv_device_registry_t;
 
 
@@ -397,8 +403,6 @@ static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
 
 
-
-
 #ifdef CONFIG_LITMUS_SOFTIRQD
 static int nvidia_klmirqd_cb(void *arg)
 {
@@ -417,6 +421,63 @@ static int nvidia_klmirqd_cb(void *arg)
 }
 #endif
 
+#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
+struct nv_klmirqd_dbg_timer_struct
+{
+	struct hrtimer timer;
+};
+
+static struct nv_klmirqd_dbg_timer_struct nv_klmirqd_dbg_timer;
+
+static void nv_klmirqd_arm_dbg_timer(lt_t relative_time)
+{
+	lt_t when_to_fire = litmus_clock() + relative_time;
+
+	TRACE("next nv tasklet in %d ns\n", relative_time);
+
+	__hrtimer_start_range_ns(&nv_klmirqd_dbg_timer.timer,
+							 ns_to_ktime(when_to_fire),
+							 0,
+							 HRTIMER_MODE_ABS_PINNED,
+							 0);
+}
+
+static void nv_klmirqd_dbg_tasklet_func(unsigned long arg)
+{
+	lt_t now = litmus_clock();
+	nv_device_registry_t *reg = (nv_device_registry_t*)arg;
+	int gpunum = reg - &NV_DEVICE_REG[0];
+
+	TRACE("nv klmirqd routine invoked for GPU %d!\n", gpunum);
+
+	/* set up the next timer */
+	nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms.
+}
+
+
+static enum hrtimer_restart nvklmirqd_timer_func(struct hrtimer *timer)
+{
+	lt_t now = litmus_clock();
+	int gpu = (int)(now % num_online_gpus());
+	nv_device_registry_t *reg;
+
+	TRACE("nvklmirqd_timer invoked!\n");
+
+	reg = &NV_DEVICE_REG[gpu];
+
+	if (reg->thread && reg->ready) {
+		TRACE("Adding a tasklet for GPU %d\n", gpu);
+		litmus_tasklet_schedule(&reg->nv_klmirqd_dbg_tasklet, reg->thread);
+	}
+	else {
+		TRACE("nv klmirqd is not ready!\n");
+		nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms.
+	}
+
+	return HRTIMER_NORESTART;
+}
+#endif
+
 
 static int gpu_owner_max_priority_order(struct binheap_node *a,
 											struct binheap_node *b)
@@ -451,6 +512,10 @@ static int init_nv_device_reg(void)
 		raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
 		INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order);
 
+#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
+		tasklet_init(&NV_DEVICE_REG[i].nv_klmirqd_dbg_tasklet, nv_klmirqd_dbg_tasklet_func, (unsigned long)&NV_DEVICE_REG[i]);
+#endif
+
 #ifdef CONFIG_LITMUS_SOFTIRQD
 		{
 			int default_cpu = litmus->map_gpu_to_cpu(i);
@@ -466,6 +531,12 @@ static int init_nv_device_reg(void)
 #endif
 	}
 
+#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
+	hrtimer_init(&nv_klmirqd_dbg_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	nv_klmirqd_dbg_timer.timer.function = nvklmirqd_timer_func;
+	nv_klmirqd_arm_dbg_timer(NSEC_PER_MSEC * 1000);
+#endif
+
 	return(1);
 }
 
@@ -578,7 +649,7 @@ static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct tas
 
 
-/* call when an aux_owner becomes real-time */
+/* call when an gpu owner becomes real-time */
 long enable_gpu_owner(struct task_struct *t)
 {
 	long retval = 0;
@@ -631,7 +702,7 @@ out:
 	return retval;
 }
 
-/* call when an aux_owner exits real-time */
+/* call when an gpu owner exits real-time */
 long disable_gpu_owner(struct task_struct *t)
 {
 	long retval = 0;
@@ -773,9 +844,9 @@ int gpu_owner_decrease_priority(struct task_struct *t)
 
 	gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
 
-	if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) {
+	if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
 		WARN_ON(!is_running(t));
-		TRACE_CUR("aux tasks may not inherit from %s/%d on GPU %d\n",
+		TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n",
 				  t->comm, t->pid, gpu);
 		goto out;
 	}
@@ -862,6 +933,10 @@ int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
 
 
+
+
+
+
 
 
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 46de8041cf59..a454832b2aa8 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -1717,17 +1717,17 @@ static long cedf_activate_plugin(void)
 	printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
 			num_clusters, cluster_size);
 
-	
+
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
 	num_gpu_clusters = min(num_clusters, num_online_gpus());
 	gpu_cluster_size = num_online_gpus() / num_gpu_clusters;
-	
+
 	if (((num_online_gpus() % gpu_cluster_size) != 0) ||
 		(num_gpu_clusters != num_clusters)) {
 		printk(KERN_WARNING "C-EDF: GPUs not uniformly distributed among CPU clusters.\n");
 	}
 #endif
-	
+
 	/* initialize clusters */
 	cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
 	for (i = 0; i < num_clusters; i++) {
-- 
cgit v1.2.2


From fa1229f9776c7ecc99baa187e0b485ebdbfdd78c Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Thu, 13 Dec 2012 18:39:27 -0500
Subject: Allow klmirqd threads to be given names.

---
 include/litmus/litmus_softirq.h |  7 +++-
 litmus/Kconfig                  |  3 +-
 litmus/litmus.c                 | 12 +++---
 litmus/litmus_softirq.c         | 84 +++++++++++++++++++++++++++++------------
 litmus/nvidia_info.c            |  5 ++-
 litmus/sched_gsn_edf.c          |  2 +-
 6 files changed, 80 insertions(+), 33 deletions(-)

diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h
index 52e3f7e74ab1..cfef08187464 100644
--- a/include/litmus/litmus_softirq.h
+++ b/include/litmus/litmus_softirq.h
@@ -57,8 +57,13 @@ typedef struct
    data will be initialized.
 
    cpu == -1 for no affinity
+
+   provide a name at most 31 (32, + null terminator) characters long.
+   name == NULL for a default name.  (all names are appended with
+   base-CPU affinity)
  */
-int launch_klmirqd_thread(int cpu, klmirqd_callback_t* cb);
+#define MAX_KLMIRQD_NAME_LEN 31
+int launch_klmirqd_thread(char* name, int cpu, klmirqd_callback_t* cb);
 
 
 /* Flushes all pending work out to the OS for regular
diff --git a/litmus/Kconfig b/litmus/Kconfig
index b704e893e9be..c05405094ea4 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -476,7 +476,8 @@ config LITMUS_NV_KLMIRQD_DEBUG
 	  default n
 	  help
 		Causes tasklets to be sporadically dispatched to waiting klmirqd
-		threads.
+		threads.  WARNING! Kernel panic may occur if you switch between
+		LITMUS plugins!
 
 endmenu
 
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 1aada57176de..1b4b9d25dbdc 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -556,14 +556,16 @@ int switch_sched_plugin(struct sched_plugin* plugin)
 		ret = litmus->deactivate_plugin();
 		if (0 != ret)
 			goto out;
-		ret = plugin->activate_plugin();
+
+		litmus = plugin;  // switch
+		mb();  // make sure it's seen everywhere.
+		ret = litmus->activate_plugin();
 		if (0 != ret) {
 			printk(KERN_INFO "Can't activate %s (%d).\n",
-			       plugin->plugin_name, ret);
-			plugin = &linux_sched_plugin;
+			       litmus->plugin_name, ret);
+			litmus = &linux_sched_plugin;
 		}
-		printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
-		litmus = plugin;
+		printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", litmus->plugin_name);
 	} else
 		ret = -EBUSY;
 out:
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
index be06405021c5..464a78d780ad 100644
--- a/litmus/litmus_softirq.c
+++ b/litmus/litmus_softirq.c
@@ -34,7 +34,7 @@ struct klmirqd_registration
 	struct list_head threads;
 };
 
-static atomic_t klmirqd_id_gen = ATOMIC_INIT(0);
+static atomic_t klmirqd_id_gen = ATOMIC_INIT(-1);
 
 static struct klmirqd_registration klmirqd_state;
 
@@ -136,12 +136,11 @@ void kill_klmirqd_thread(struct task_struct* klmirqd_thread)
 	raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
 }
 
-
-
 struct klmirqd_launch_data
 {
 	int cpu_affinity;
 	klmirqd_callback_t* cb;
+	char name[MAX_KLMIRQD_NAME_LEN+1];
 	struct work_struct work;
 };
 
@@ -156,47 +155,76 @@ static void __launch_klmirqd_thread(struct work_struct *work)
 	struct klmirqd_launch_data* launch_data =
 		container_of(work, struct klmirqd_launch_data, work);
 
-    TRACE("%s: Creating klmirqd thread\n", __FUNCTION__);
+    TRACE("Creating klmirqd thread\n");
+
 
-	id = atomic_inc_return(&klmirqd_id_gen);
 
 	if (launch_data->cpu_affinity != -1) {
-		thread = kthread_create(
-					run_klmirqd,
-					/* treat the affinity as a pointer, we'll cast it back later */
-					(void*)launch_data->cb,
-					"klmirqd_th%d/%d",
-					id,
-					launch_data->cpu_affinity);
+		if (launch_data->name[0] == '\0') {
+			id = atomic_inc_return(&klmirqd_id_gen);
+			TRACE("Launching klmirqd_th%d/%d\n", id, launch_data->cpu_affinity);
+
+			thread = kthread_create(
+						run_klmirqd,
+						/* treat the affinity as a pointer, we'll cast it back later */
+						(void*)launch_data->cb,
+						"klmirqd_th%d/%d",
+						id,
+						launch_data->cpu_affinity);
+		}
+		else {
+			TRACE("Launching %s/%d\n", launch_data->name, launch_data->cpu_affinity);
+
+			thread = kthread_create(
+						run_klmirqd,
+						/* treat the affinity as a pointer, we'll cast it back later */
+						(void*)launch_data->cb,
+						"%s/%d",
+						launch_data->name,
+						launch_data->cpu_affinity);
+		}
 
 		/* litmus will put is in the right cluster. */
 		kthread_bind(thread, launch_data->cpu_affinity);
-
-		TRACE("%s: Launching klmirqd_th%d/%d\n", __FUNCTION__, id, launch_data->cpu_affinity);
 	}
 	else {
-		thread = kthread_create(
-					run_klmirqd,
-					/* treat the affinity as a pointer, we'll cast it back later */
-					(void*)launch_data->cb,
-					"klmirqd_th%d",
-					id);
-
-		TRACE("%s: Launching klmirqd_th%d\n", __FUNCTION__, id);
+		if (launch_data->name[0] == '\0') {
+			id = atomic_inc_return(&klmirqd_id_gen);
+			TRACE("Launching klmirqd_th%d\n", id);
+
+			thread = kthread_create(
+						run_klmirqd,
+						/* treat the affinity as a pointer, we'll cast it back later */
+						(void*)launch_data->cb,
+						"klmirqd_th%d",
+						id);
+
+		}
+		else {
+			TRACE("Launching %s\n", launch_data->name);
+
+			thread = kthread_create(
+						run_klmirqd,
+						/* treat the affinity as a pointer, we'll cast it back later */
+						(void*)launch_data->cb,
+						launch_data->name);
+		}
+
+
 	}
 
 	if (thread) {
 		wake_up_process(thread);
 	}
 	else {
-		TRACE("Could not create klmirqd/%d thread!\n", id);
+		TRACE("Could not create thread!\n");
 	}
 
 	kfree(launch_data);
 }
 
 
-int launch_klmirqd_thread(int cpu, klmirqd_callback_t* cb)
+int launch_klmirqd_thread(char* name, int cpu, klmirqd_callback_t* cb)
 {
     struct klmirqd_launch_data* delayed_launch;
 
@@ -211,6 +239,14 @@ int launch_klmirqd_thread(int cpu, klmirqd_callback_t* cb)
 	delayed_launch->cpu_affinity = cpu;
 	delayed_launch->cb = cb;
     INIT_WORK(&delayed_launch->work, __launch_klmirqd_thread);
+
+	if(name) {
+		snprintf(delayed_launch->name, MAX_KLMIRQD_NAME_LEN+1, "%s", name);
+	}
+	else {
+		delayed_launch->name[0] = '\0';
+	}
+
     schedule_work(&delayed_launch->work);
 
 	return 0;
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 059a7e7ac715..5a63fb732e8b 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -496,6 +496,7 @@ static int gpu_owner_max_priority_order(struct binheap_node *a,
 static int init_nv_device_reg(void)
 {
 	int i;
+	char name[MAX_KLMIRQD_NAME_LEN+1];
 
 #ifdef CONFIG_LITMUS_SOFTIRQD
 	if (!klmirqd_is_ready()) {
@@ -520,11 +521,13 @@ static int init_nv_device_reg(void)
 		{
 			int default_cpu = litmus->map_gpu_to_cpu(i);
 
+			snprintf(name, MAX_KLMIRQD_NAME_LEN, "nvklmirqd%d", i);
+
 			NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb;
 			NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i);
 			mb();
 
-			if(launch_klmirqd_thread(default_cpu, &NV_DEVICE_REG[i].callback) != 0) {
+			if(launch_klmirqd_thread(name, default_cpu, &NV_DEVICE_REG[i].callback) != 0) {
 				TRACE("Failed to create klmirqd thread for GPU %d\n", i);
 			}
 		}
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 4ac573a6f0f7..7eb44fee1861 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -1838,7 +1838,7 @@ UNSUPPORTED_AFF_OBS:
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
 static int gsnedf_map_gpu_to_cpu(int gpu)
 {
-	return 0;  // CPU_0 is default in all cases.
+	return -1;  // No CPU affinity needed.
 }
 #endif
 
-- 
cgit v1.2.2


From 642eadd6b82daaeeb3247c2417bf58d113639a1c Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Tue, 8 Jan 2013 18:19:43 -0500
Subject: Extend non-rt support to sync-releases.

Note that non-rt tasks are released immediately.  That is,
the 'delay' provided to the release_ts() system-call is ignored
for non-rt tasks.
---
 litmus/sync.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/litmus/sync.c b/litmus/sync.c
index bf75fde5450b..9fb6366f002f 100644
--- a/litmus/sync.c
+++ b/litmus/sync.c
@@ -65,7 +65,11 @@ static long do_release_ts(lt_t start)
 						     struct __wait_queue,
 						     task_list)->private;
 		task_count++;
-		litmus->release_at(t, start + t->rt_param.task_params.phase);
+		/* RT tasks can be delayed.  Non-RT tasks are released
+		   immediately. */
+		if (is_realtime(t)) {
+			litmus->release_at(t, start + t->rt_param.task_params.phase);
+		}
 		sched_trace_task_release(t);
 	}
 
@@ -80,10 +84,8 @@ static long do_release_ts(lt_t start)
 asmlinkage long sys_wait_for_ts_release(void)
 {
 	long ret = -EPERM;
-	struct task_struct *t = current;
 
-	if (is_realtime(t))
-		ret = do_wait_for_ts_release();
+	ret = do_wait_for_ts_release();
 
 	return ret;
 }
@@ -97,8 +99,11 @@ asmlinkage long sys_release_ts(lt_t __user *__delay)
 	/* FIXME: check capabilities... */
 
 	ret = copy_from_user(&delay, __delay, sizeof(delay));
-	if (ret == 0)
+	if (ret == 0) {
+		/* Note: Non-rt tasks that participate in a sync release cannot be
+		   delayed.  They will be released immediately. */
 		ret = do_release_ts(litmus_clock() + delay);
+	}
 
 	return ret;
 }
-- 
cgit v1.2.2


From 1235a665a5e00dc762e6646c01381b3ed5019d86 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Wed, 9 Jan 2013 17:00:54 -0500
Subject: Enable sched_trace log injection from userspace

---
 include/litmus/rt_param.h  | 22 ++++++++++++++
 include/litmus/unistd_32.h |  6 ++--
 include/litmus/unistd_64.h |  5 +++-
 litmus/litmus.c            | 73 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 103 insertions(+), 3 deletions(-)

diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index c8ee64569dbb..43daaf84101d 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -47,6 +47,28 @@ typedef enum {
 	AUX_FUTURE	= (AUX_CURRENT<<2)
 } aux_flags_t;
 
+/* mirror of st_event_record_type_t
+ * Assume all are UNsupported, unless otherwise stated. */
+typedef enum {
+	ST_INJECT_NAME = 1,				/* supported */
+	ST_INJECT_PARAM,				/* supported */
+	ST_INJECT_RELEASE,				/* supported */
+	ST_INJECT_ASSIGNED,
+	ST_INJECT_SWITCH_TO,
+	ST_INJECT_SWITCH_AWAY,
+	ST_INJECT_COMPLETION,			/* supported */
+	ST_INJECT_BLOCK,
+	ST_INJECT_RESUME,
+	ST_INJECT_ACTION,
+	ST_INJECT_SYS_RELEASE,			/* supported */
+} sched_trace_injection_events_t;
+
+struct st_inject_args {
+	lt_t release;
+	lt_t deadline;
+	unsigned int job_no;
+};
+
 /* We use the common priority interpretation "lower index == higher priority",
  * which is commonly used in fixed-priority schedulability analysis papers.
  * So, a numerically lower priority value implies higher scheduling priority,
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
index 7265ffadf555..d1fe84a5d574 100644
--- a/include/litmus/unistd_32.h
+++ b/include/litmus/unistd_32.h
@@ -20,6 +20,8 @@
 #define __NR_litmus_dgl_lock	__LSC(12)
 #define __NR_litmus_dgl_unlock	__LSC(13)
 
-#define __NR_set_aux_tasks		_LSC(14)
+#define __NR_set_aux_tasks		__LSC(14)
 
-#define NR_litmus_syscalls 15
+#define __NR_sched_trace_event	__LSC(15)
+
+#define NR_litmus_syscalls 16
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index 51e730124dde..75f9fcb897f5 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -37,4 +37,7 @@ __SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock)
 #define __NR_set_aux_tasks			__LSC(14)
 __SYSCALL(__NR_set_aux_tasks, sys_set_aux_tasks)
 
-#define NR_litmus_syscalls 15
+#define __NR_sched_trace_event		__LSC(15)
+__SYSCALL(__NR_sched_trace_event, sys_sched_trace_event)
+
+#define NR_litmus_syscalls 16
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 1b4b9d25dbdc..6a1095aa7725 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -310,6 +310,79 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
 	return ret;
 }
 
+
+asmlinkage long sys_sched_trace_event(int event, struct st_inject_args __user *__args)
+{
+	long retval = 0;
+	struct task_struct* t = current;
+
+	struct st_inject_args args;
+
+	if (is_realtime(t)) {
+		printk(KERN_WARNING "Only non-real-time tasks may inject sched_trace events.\n");
+		retval = -EINVAL;
+		goto out;
+	}
+
+	if (__args && copy_from_user(&args, __args, sizeof(args))) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	switch(event) {
+		/*************************************/
+		/* events that don't need parameters */
+		/*************************************/
+		case ST_INJECT_NAME:
+			sched_trace_task_name(t);
+			break;
+		case ST_INJECT_PARAM:
+			/* presumes sporadic_task_ns() has already been called
+			 * and valid data has been initialized even if the calling
+			 * task is SCHED_NORMAL. */
+			sched_trace_task_param(t);
+			break;
+
+		/*******************************/
+		/* events that need parameters */
+		/*******************************/
+		case ST_INJECT_COMPLETION:
+			if (!__args) {
+				retval = -EINVAL;
+				goto out;
+			}
+
+			/* slam in the data */
+			t->rt_param.job_params.job_no = args.job_no;
+
+			sched_trace_task_completion(t, 0);
+			break;
+		case ST_INJECT_RELEASE:
+			if (!__args) {
+				retval = -EINVAL;
+				goto out;
+			}
+
+			/* slam in the data */
+			tsk_rt(t)->job_params.release = args.release;
+			tsk_rt(t)->job_params.deadline = args.deadline;
+
+			sched_trace_task_release(t);
+			break;
+
+		/**********************/
+		/* unsupported events */
+		/**********************/
+		default:
+			retval = -EINVAL;
+			break;
+	}
+
+out:
+	return retval;
+}
+
+
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
 void init_gpu_affinity_state(struct task_struct* p)
 {
-- 
cgit v1.2.2