Full patch for klitirqd with Nvidia GPU support.

author: Glenn Elliott <gelliott@cs.unc.edu> 2011-06-02 16:06:05 -0400
committer: Glenn Elliott <gelliott@cs.unc.edu> 2011-06-02 16:06:05 -0400
commit: 3d5537c160c1484e8d562b9828baf679cc53f67a (patch)
tree: b595364f1b0f94ac2426c8315bc5967debc7bbb0
parent: 7d754596756240fa918b94cd0c3011c77a638987 (diff)
42 files changed, 5325 insertions, 138 deletions
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 91fd0c70a18a..50abbc6b7429 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -8,6 +8,10 @@
 #include <linux/smp.h>
 #include <linux/ftrace.h>
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/sched_trace.h>
+#endif
 #include <asm/apic.h>
 #include <asm/io_apic.h>
 #include <asm/irq.h>
@@ -244,7 +248,17 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
                                __func__, smp_processor_id(), vector, irq);
        }
+//#ifndef CONFIG_LITMUS_NVIDIA
        irq_exit();
+//#else
+        /* skip softirqs if we're tracing an interrupt top-half */
+        /* comment out if-statement if we want to trace with bh on. */
+        //if(!is_interrupt_tracing_active())
+//      irq_exit();
+//      sched_trace_nv_interrupt_end();
+//#endif
        set_irq_regs(old_regs);
        return 1;
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 37702905f658..b5ddae40cee2 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -352,3 +352,4 @@ ENTRY(sys_call_table)
        .long sys_wait_for_ts_release
        .long sys_release_ts
        .long sys_null_call
+        .long sys_register_nv_device
diff --git a/include/linux/completion.h b/include/linux/completion.h
index c63950e8a863..3ce20dd3086e 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -76,6 +76,7 @@ static inline void init_completion(struct completion *x)
        init_waitqueue_head(&x->wait);
 }
+extern void __wait_for_completion_locked(struct completion *);
 extern void wait_for_completion(struct completion *);
 extern int wait_for_completion_interruptible(struct completion *x);
 extern int wait_for_completion_killable(struct completion *x);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a0384a4d1e6f..5d22f5342376 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -459,6 +459,10 @@ struct tasklet_struct
        atomic_t count;
        void (*func)(unsigned long);
        unsigned long data;
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        struct task_struct *owner;
+#endif
 };
 #define DECLARE_TASKLET(name, func, data) \
@@ -496,6 +500,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t)
 #define tasklet_unlock(t) do { } while (0)
 #endif
+extern void ___tasklet_schedule(struct tasklet_struct *t);
 extern void __tasklet_schedule(struct tasklet_struct *t);
 static inline void tasklet_schedule(struct tasklet_struct *t)
@@ -504,6 +509,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t)
                __tasklet_schedule(t);
 }
+extern void ___tasklet_hi_schedule(struct tasklet_struct *t);
 extern void __tasklet_hi_schedule(struct tasklet_struct *t);
 static inline void tasklet_hi_schedule(struct tasklet_struct *t)
@@ -512,6 +518,7 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t)
                __tasklet_hi_schedule(t);
 }
+extern void ___tasklet_hi_schedule_first(struct tasklet_struct *t);
 extern void __tasklet_hi_schedule_first(struct tasklet_struct *t);
 /*
@@ -541,7 +548,7 @@ static inline void tasklet_disable(struct tasklet_struct *t)
 }
 static inline void tasklet_enable(struct tasklet_struct *t)
-{
+{       
        smp_mb__before_atomic_dec();
        atomic_dec(&t->count);
 }
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index f363bc8fdc74..9f3199571994 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -126,6 +126,15 @@ static inline int mutex_is_locked(struct mutex *lock)
        return atomic_read(&lock->count) != 1;
 }
+/* return non-zero to abort.  only pre-side-effects may abort */
+typedef int (*side_effect_t)(unsigned long);
+extern void mutex_lock_sfx(struct mutex *lock,
+                                                   side_effect_t pre, unsigned long pre_arg,
+                                                   side_effect_t post, unsigned long post_arg);
+extern void mutex_unlock_sfx(struct mutex *lock,
+                                                         side_effect_t pre, unsigned long pre_arg,
+                                                         side_effect_t post, unsigned long post_arg);
 /*
 * See kernel/mutex.c for detailed documentation of these APIs.
 * Also see Documentation/mutex-design.txt.
@@ -145,6 +154,7 @@ extern void mutex_lock(struct mutex *lock);
 extern int __must_check mutex_lock_interruptible(struct mutex *lock);
 extern int __must_check mutex_lock_killable(struct mutex *lock);
 # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index 5310d27abd2a..69e3f57661ec 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -49,4 +49,13 @@ extern int __must_check down_trylock(struct semaphore *sem);
 extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
 extern void up(struct semaphore *sem);
+extern void __down(struct semaphore *sem);
+extern void __up(struct semaphore *sem);
+struct semaphore_waiter {
+        struct list_head list;
+        struct task_struct *task;
+        int up;
+};
 #endif /* __LINUX_SEMAPHORE_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 25e02c941bac..5fecfb375eeb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -83,6 +83,9 @@ struct work_struct {
 #ifdef CONFIG_LOCKDEP
        struct lockdep_map lockdep_map;
 #endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        struct task_struct *owner;
+#endif
 };
 #define WORK_DATA_INIT()        ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
@@ -115,11 +118,25 @@ struct execute_work {
 #define __WORK_INIT_LOCKDEP_MAP(n, k)
 #endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+#define __WORK_INIT_OWNER() \
+        .owner = NULL,
+#define PREPARE_OWNER(_work, _owner) \
+        do { \
+                (_work)->owner = (_owner); \
+        } while(0)
+#else
+#define __WORK_INIT_OWNER()
+#define PREPARE_OWNER(_work, _owner)
+#endif
 #define __WORK_INITIALIZER(n, f) {                              \
        .data = WORK_DATA_STATIC_INIT(),                        \
        .entry  = { &(n).entry, &(n).entry },                   \
        .func = (f),                                            \
        __WORK_INIT_LOCKDEP_MAP(#n, &(n))                       \
+        __WORK_INIT_OWNER() \
        }
 #define __DELAYED_WORK_INITIALIZER(n, f) {                      \
@@ -327,6 +344,7 @@ extern void flush_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 extern void flush_delayed_work(struct delayed_work *work);
+extern int __schedule_work(struct work_struct *work);
 extern int schedule_work(struct work_struct *work);
 extern int schedule_work_on(int cpu, struct work_struct *work);
 extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
diff --git a/include/litmus/affinity.h b/include/litmus/affinity.h
new file mode 100644
index 000000000000..877b4099c6e2
--- /dev/null
+++ b/include/litmus/affinity.h
@@ -0,0 +1,78 @@
+#ifndef __LITMUS_AFFINITY_H
+#define __LITMUS_AFFINITY_H
+#include <linux/cpumask.h>
+/*
+  L1 (instr) = depth 0
+  L1 (data)  = depth 1
+  L2 = depth 2
+  L3 = depth 3
+ */
+#define NUM_CACHE_LEVELS 4
+struct neighborhood
+{
+        unsigned int size[NUM_CACHE_LEVELS];
+        cpumask_var_t neighbors[NUM_CACHE_LEVELS];
+};
+/* topology info is stored redundently in a big array for fast lookups */
+extern struct neighborhood neigh_info[NR_CPUS];
+void init_topology(void); /* called by Litmus module's _init_litmus() */
+/* Works like:
+void get_nearest_available_cpu(cpu_entry_t* nearest, cpu_entry_t* start, cpu_entry_t* entries, int release_master)
+Set release_master = -1 for no RM.
+ */
+#define get_nearest_available_cpu(nearest, start, entries, release_master) \
+{ \
+        (nearest) = NULL; \
+        if(!(start)->linked) \
+        { \
+                (nearest) = (start); \
+        } \
+        else \
+        { \
+                int __level; \
+                int __cpu; \
+                struct neighborhood* __neighbors = &neigh_info[(start)->cpu]; \
+                \
+                for(__level = 0; (__level < NUM_CACHE_LEVELS) && !(nearest); ++__level) \
+                { \
+                        if(__neighbors->size[__level] > 1) \
+                        { \
+                                for_each_cpu(__cpu, __neighbors->neighbors[__level]) \
+                                { \
+                                        if(__cpu != (release_master)) \
+                                        { \
+                                                cpu_entry_t* __entry = &per_cpu((entries), __cpu); \
+                                                if(!__entry->linked) \
+                                                { \
+                                                        (nearest) = __entry; \
+                                                        break; \
+                                                } \
+                                        } \
+                                } \
+                        } \
+                        else if(__neighbors->size[__level] == 0) \
+                        { \
+                                break; \
+                        } \
+                } \
+        } \
+        \
+        if((nearest)) \
+        { \
+                TRACE("P%d is closest available CPU to P%d\n", (nearest)->cpu, (start)->cpu); \
+        } \
+        else \
+        { \
+                TRACE("Could not find an available CPU close to P%d\n", \
+                                                (start)->cpu); \
+        } \
+}
+#endif
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
index caf2a1e6918c..c740e8fc3e88 100644
--- a/include/litmus/fdso.h
+++ b/include/litmus/fdso.h
@@ -18,9 +18,10 @@ typedef enum  {
        MIN_OBJ_TYPE    = 0,
        FMLP_SEM        = 0,
-        SRP_SEM         = 1,
+        KFMLP_SEM       = 1,
+        SRP_SEM         = 2,
-        MAX_OBJ_TYPE    = 1
+        MAX_OBJ_TYPE    = SRP_SEM
 } obj_type_t;
 struct inode_obj_id {
@@ -64,6 +65,7 @@ static inline void* od_lookup(int od, obj_type_t type)
 }
 #define lookup_fmlp_sem(od)((struct pi_semaphore*)  od_lookup(od, FMLP_SEM))
+#define lookup_kfmlp_sem(od)((struct pi_semaphore*)  od_lookup(od, KFMLP_SEM))
 #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
 #define lookup_ics(od)     ((struct ics*)           od_lookup(od, ICS_ID))
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index e7769ca36ec0..3df242bf272f 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -26,6 +26,7 @@ static inline int in_list(struct list_head* list)
                );
 }
 struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
 #define NO_CPU                  0xffffffff
diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h
new file mode 100644
index 000000000000..34287f3cbb8d
--- /dev/null
+++ b/include/litmus/litmus_softirq.h
@@ -0,0 +1,199 @@
+#ifndef __LITMUS_SOFTIRQ_H
+#define __LITMUS_SOFTIRQ_H
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+/*
+   Threaded tasklet handling for Litmus.  Tasklets
+   are scheduled with the priority of the tasklet's
+   owner---that is, the RT task on behalf the tasklet
+   runs.
+ 
+   Tasklets are current scheduled in FIFO order with
+   NO priority inheritance for "blocked" tasklets.
+ 
+   klitirqd assumes the priority of the owner of the
+   tasklet when the tasklet is next to execute.
+ 
+   Currently, hi-tasklets are scheduled before
+   low-tasklets, regardless of priority of low-tasklets.
+   And likewise, low-tasklets are scheduled before work
+   queue objects.  This priority inversion probably needs
+   to be fixed, though it is not an issue if our work with
+   GPUs as GPUs are owned (and associated klitirqds) for
+   exclusive time periods, thus no inversions can
+   occur.
+ */
+#define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD
+/* Spawns NR_LITMUS_SOFTIRQD klitirqd daemons.
+   Actual launch of threads is deffered to kworker's
+   workqueue, so daemons will likely not be immediately
+   running when this function returns, though the required
+   data will be initialized.
+ 
+   @affinity_set: an array expressing the processor affinity
+    for each of the NR_LITMUS_SOFTIRQD daemons.  May be set
+    to NULL for global scheduling.
+ 
+        - Examples -
+        8-CPU system with two CPU clusters:
+                affinity[] = {0, 0, 0, 0, 3, 3, 3, 3}
+                NOTE: Daemons not actually bound to specified CPU, but rather
+                cluster in which the CPU resides.
+ 
+        8-CPU system, partitioned:
+                affinity[] = {0, 1, 2, 3, 4, 5, 6, 7}
+ 
+        FIXME: change array to a CPU topology or array of cpumasks
+ 
+ */
+void spawn_klitirqd(int* affinity);
+/* Raises a flag to tell klitirqds to terminate.
+   Termination is async, so some threads may be running
+   after function return. */
+void kill_klitirqd(void);
+/* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready
+   to handle tasklets. 0, otherwise.*/
+int klitirqd_is_ready(void);
+/* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready
+   to handle tasklets. 0, otherwise.*/
+int klitirqd_is_dead(void);
+/* Flushes all pending work out to the OS for regular
+ * tasklet/work processing of the specified 'owner'
+ *
+ * PRECOND: klitirqd_thread must have a clear entry
+ * in the GPU registry, otherwise this call will become
+ * a no-op as work will loop back to the klitirqd_thread.
+ *
+ * Pass NULL for owner to flush ALL pending items.
+ */
+void flush_pending(struct task_struct* klitirqd_thread,
+                                   struct task_struct* owner);
+struct task_struct* get_klitirqd(unsigned int k_id);
+extern int __litmus_tasklet_schedule(
+        struct tasklet_struct *t,
+        unsigned int k_id);
+/* schedule a tasklet on klitirqd #k_id */
+static inline int litmus_tasklet_schedule(
+    struct tasklet_struct *t,
+    unsigned int k_id)
+{
+        int ret = 0;
+        if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+                ret = __litmus_tasklet_schedule(t, k_id);
+        return(ret);
+}
+/* for use by __tasklet_schedule() */
+static inline int _litmus_tasklet_schedule(
+    struct tasklet_struct *t,
+    unsigned int k_id)
+{
+    return(__litmus_tasklet_schedule(t, k_id));
+}
+extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t,
+                                         unsigned int k_id);
+/* schedule a hi tasklet on klitirqd #k_id */
+static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t,
+                                              unsigned int k_id)
+{
+        int ret = 0;
+        if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+                ret = __litmus_tasklet_hi_schedule(t, k_id);
+        return(ret);
+}
+/* for use by __tasklet_hi_schedule() */
+static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t,
+                                               unsigned int k_id)
+{
+    return(__litmus_tasklet_hi_schedule(t, k_id));
+}
+extern int __litmus_tasklet_hi_schedule_first(
+    struct tasklet_struct *t,
+    unsigned int k_id);
+/* schedule a hi tasklet on klitirqd #k_id on next go-around */
+/* PRECONDITION: Interrupts must be disabled. */
+static inline int litmus_tasklet_hi_schedule_first(
+    struct tasklet_struct *t,
+    unsigned int k_id)
+{
+        int ret = 0;
+        if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+                ret = __litmus_tasklet_hi_schedule_first(t, k_id);
+        return(ret);
+}
+/* for use by __tasklet_hi_schedule_first() */
+static inline int _litmus_tasklet_hi_schedule_first(
+    struct tasklet_struct *t,
+    unsigned int k_id)
+{
+    return(__litmus_tasklet_hi_schedule_first(t, k_id));
+}
+//////////////
+extern int __litmus_schedule_work(
+        struct work_struct* w,
+        unsigned int k_id);
+static inline int litmus_schedule_work(
+        struct work_struct* w,
+        unsigned int k_id)
+{
+        return(__litmus_schedule_work(w, k_id));
+}
+///////////// mutex operations for client threads.
+ 
+void down_and_set_stat(struct task_struct* t,
+                                         enum klitirqd_sem_status to_set,
+                                         struct mutex* sem);
+void __down_and_reset_and_set_stat(struct task_struct* t,
+                                enum klitirqd_sem_status to_reset,
+                                enum klitirqd_sem_status to_set,
+                                struct mutex* sem);
+void up_and_set_stat(struct task_struct* t,
+                                        enum klitirqd_sem_status to_set,
+                                        struct mutex* sem);
+void release_klitirqd_lock(struct task_struct* t);
+int reacquire_klitirqd_lock(struct task_struct* t);
+#endif
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
new file mode 100644
index 000000000000..579301d77cf5
--- /dev/null
+++ b/include/litmus/nvidia_info.h
@@ -0,0 +1,37 @@
+#ifndef __LITMUS_NVIDIA_H
+#define __LITMUS_NVIDIA_H
+#include <linux/interrupt.h>
+#include <litmus/litmus_softirq.h>
+#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD
+int init_nvidia_info(void);
+int is_nvidia_func(void* func_addr);
+void dump_nvidia_info(const struct tasklet_struct *t);
+// Returns the Nvidia device # associated with provided tasklet and work_struct.
+u32 get_tasklet_nv_device_num(const struct tasklet_struct *t);
+u32 get_work_nv_device_num(const struct work_struct *t);
+int init_nv_device_reg(void);
+//int get_nv_device_id(struct task_struct* owner);
+int reg_nv_device(int reg_device_id, int register_device);
+struct task_struct* get_nv_device_owner(u32 target_device_id);
+void lock_nv_registry(u32 reg_device_id, unsigned long* flags);
+void unlock_nv_registry(u32 reg_device_id, unsigned long* flags);
+void increment_nv_int_count(u32 device);
+#endif
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
index 260c6fe17986..244924f93c48 100644
--- a/include/litmus/preempt.h
+++ b/include/litmus/preempt.h
@@ -26,6 +26,7 @@ const char* sched_state_name(int s);
                                    (x), #x, __FUNCTION__);             \
        } while (0);
+//#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) /* ignore */
 #define TRACE_SCHED_STATE_CHANGE(x, y, cpu)                             \
        TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n",                   \
                    cpu,  (x), sched_state_name(x),                     \
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 5de422c742f6..53af3ce1d955 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -69,6 +69,8 @@ struct control_page {
 /* don't export internal data structures to user space (liblitmus) */
 #ifdef __KERNEL__
+#include <linux/semaphore.h>
 struct _rt_domain;
 struct bheap_node;
 struct release_heap;
@@ -94,6 +96,14 @@ struct rt_job {
 struct pfair_param;
+enum klitirqd_sem_status
+{
+        NEED_TO_REACQUIRE,
+        REACQUIRING,
+        NOT_HELD,
+        HELD
+};
 /*      RT task parameters for scheduling extensions
 *      These parameters are inherited during clone and therefore must
 *      be explicitly set up before the task set is launched.
@@ -108,6 +118,38 @@ struct rt_param {
        /* is the task present? (true if it can be scheduled) */
        unsigned int            present:1;
+#ifdef CONFIG_LITMUS_SOFTIRQD
+    /* proxy threads have minimum priority by default */
+    unsigned int        is_proxy_thread:1;
+    
+        /* pointer to klitirqd currently working on this
+           task_struct's behalf.  only set by the task pointed
+           to by klitirqd.
+         
+           ptr only valid if is_proxy_thread == 0
+         */
+        struct task_struct* cur_klitirqd;
+        /* Used to implement mutual execution exclusion between
+         * job and klitirqd execution.  Job must always hold
+         * it's klitirqd_sem to execute.  klitirqd instance
+         * must hold the semaphore before executing on behalf
+         * of a job.
+         */
+        //struct semaphore                      klitirqd_sem;
+        struct mutex                            klitirqd_sem;
+        /* status of held klitirqd_sem, even if the held klitirqd_sem is from
+           another task (only proxy threads do this though).
+         */
+        atomic_t                                        klitirqd_sem_stat;
+#endif
+#ifdef CONFIG_LITMUS_NVIDIA
+        /* number of top-half interrupts handled on behalf of current job */
+        atomic_t                                        nv_int_count;
+#endif
 #ifdef CONFIG_LITMUS_LOCKING
        /* Is the task being priority-boosted by a locking protocol? */
        unsigned int            priority_boosted:1;
@@ -128,7 +170,7 @@ struct rt_param {
         * an increased task priority.
         */
         struct task_struct*    inh_task;
+    
 #ifdef CONFIG_NP_SECTION
        /* For the FMLP under PSN-EDF, it is required to make the task
         * non-preemptive from kernel space. In order not to interfere with
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 6e7cabdddae8..df50930d14a0 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -29,7 +29,6 @@ typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
 */
 typedef void (*finish_switch_t)(struct task_struct *prev);
 /********************* task state changes ********************/
 /* Called to setup a new real-time task.
@@ -58,6 +57,17 @@ typedef void (*task_exit_t)    (struct task_struct *);
 typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
                                 void* __user config);
+/* Called to change inheritance levels of given task */
+typedef void (*set_prio_inh_t)(struct task_struct* t,
+                               struct task_struct* prio_inh);
+typedef void (*clear_prio_inh_t)(struct task_struct* t);
+typedef void (*set_prio_inh_klitirq_t)(struct task_struct* klitirqd,
+                                       struct task_struct* old_owner,
+                                       struct task_struct* new_owner);
+typedef void (*clear_prio_inh_klitirqd_t)(struct task_struct* klitirqd,
+                                          struct task_struct* old_owner);
 /********************* sys call backends  ********************/
 /* This function causes the caller to sleep until the next release */
@@ -88,7 +98,7 @@ struct sched_plugin {
        /*      task state changes      */
        admit_task_t            admit_task;
-        task_new_t              task_new;
+    task_new_t                  task_new;
        task_wake_up_t          task_wake_up;
        task_block_t            task_block;
        task_exit_t             task_exit;
@@ -96,6 +106,14 @@ struct sched_plugin {
 #ifdef CONFIG_LITMUS_LOCKING
        /*      locking protocols       */
        allocate_lock_t         allocate_lock;
+    
+    set_prio_inh_t      set_prio_inh;
+    clear_prio_inh_t    clear_prio_inh;
+#endif
+    
+#ifdef CONFIG_LITMUS_SOFTIRQD
+    set_prio_inh_klitirq_t              set_prio_inh_klitirqd;
+    clear_prio_inh_klitirqd_t   clear_prio_inh_klitirqd;
 #endif
 } __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
index 7ca34cb13881..1486c778aff8 100644
--- a/include/litmus/sched_trace.h
+++ b/include/litmus/sched_trace.h
@@ -11,12 +11,12 @@ struct st_trace_header {
        u8      cpu;            /* On which CPU was it recorded? */
        u16     pid;            /* PID of the task.              */
        u32     job;            /* The job sequence number.      */
-};
+} __attribute__((packed));
 #define ST_NAME_LEN 16
 struct st_name_data {
        char    cmd[ST_NAME_LEN];/* The name of the executable of this process. */
-};
+} __attribute__((packed));
 struct st_param_data {          /* regular params */
        u32     wcet;
@@ -25,30 +25,29 @@ struct st_param_data {		/* regular params */
        u8      partition;
        u8      class;
        u8      __unused[2];
-};
+} __attribute__((packed));
 struct st_release_data {        /* A job is was/is going to be released. */
        u64     release;        /* What's the release time?              */
        u64     deadline;       /* By when must it finish?               */
-};
+} __attribute__((packed));
 struct st_assigned_data {       /* A job was asigned to a CPU.           */
        u64     when;
        u8      target;         /* Where should it execute?              */
        u8      __unused[7];
-};
+} __attribute__((packed));
 struct st_switch_to_data {      /* A process was switched to on a given CPU.   */
        u64     when;           /* When did this occur?                        */
        u32     exec_time;      /* Time the current job has executed.          */
        u8      __unused[4];
+} __attribute__((packed));
-};
 struct st_switch_away_data {    /* A process was switched away from on a given CPU. */
        u64     when;
        u64     exec_time;
-};
+} __attribute__((packed));
 struct st_completion_data {     /* A job completed. */
        u64     when;
@@ -56,35 +55,92 @@ struct st_completion_data {	/* A job completed. */
                                 * next task automatically; set to 0 otherwise.
                                 */
        u8      __uflags:7;
-        u8      __unused[7];
+        u16 nv_int_count;
-};
+        u8      __unused[5];
+} __attribute__((packed));
 struct st_block_data {          /* A task blocks. */
        u64     when;
        u64     __unused;
-};
+} __attribute__((packed));
 struct st_resume_data {         /* A task resumes. */
        u64     when;
        u64     __unused;
-};
+} __attribute__((packed));
 struct st_action_data {
        u64     when;
        u8      action;
        u8      __unused[7];
-};
+} __attribute__((packed));
 struct st_sys_release_data {
        u64     when;
        u64     release;
-};
+} __attribute__((packed));
+struct st_tasklet_release_data {
+        u64 when;
+        u64 __unused;
+} __attribute__((packed));
+struct st_tasklet_begin_data {
+        u64 when;
+        u16 exe_pid;
+        u8  __unused[6];
+} __attribute__((packed));
+struct st_tasklet_end_data {
+        u64 when;
+        u16 exe_pid;
+        u8      flushed;
+        u8      __unused[5];
+} __attribute__((packed));
+struct st_work_release_data {
+        u64 when;
+        u64 __unused;
+} __attribute__((packed));
+struct st_work_begin_data {
+        u64 when;
+        u16 exe_pid;
+        u8      __unused[6];
+} __attribute__((packed));
+struct st_work_end_data {
+        u64 when;
+        u16 exe_pid;
+        u8      flushed;
+        u8      __unused[5];
+} __attribute__((packed));
+struct st_effective_priority_change_data {
+        u64 when;
+        u16 inh_pid;
+        u8      __unused[6];
+} __attribute__((packed));
+struct st_nv_interrupt_begin_data {
+        u64 when;
+        u32 device;
+        u8  __unused[4];
+} __attribute__((packed));
+struct st_nv_interrupt_end_data {
+        u64 when;
+        u32 device;
+        u8  __unused[4];
+} __attribute__((packed));
 #define DATA(x) struct st_ ## x ## _data x;
 typedef enum {
-        ST_NAME = 1,            /* Start at one, so that we can spot
+    ST_NAME = 1, /* Start at one, so that we can spot
-                                 * uninitialized records. */
+                                  * uninitialized records. */
        ST_PARAM,
        ST_RELEASE,
        ST_ASSIGNED,
@@ -94,7 +150,16 @@ typedef enum {
        ST_BLOCK,
        ST_RESUME,
        ST_ACTION,
-        ST_SYS_RELEASE
+        ST_SYS_RELEASE,
+        ST_TASKLET_RELEASE,
+        ST_TASKLET_BEGIN,
+        ST_TASKLET_END,
+        ST_WORK_RELEASE,
+        ST_WORK_BEGIN,
+        ST_WORK_END,
+        ST_EFF_PRIO_CHANGE,
+        ST_NV_INTERRUPT_BEGIN,
+        ST_NV_INTERRUPT_END,
 } st_event_record_type_t;
 struct st_event_record {
@@ -113,8 +178,17 @@ struct st_event_record {
                DATA(resume);
                DATA(action);
                DATA(sys_release);
+                DATA(tasklet_release);
+                DATA(tasklet_begin);
+                DATA(tasklet_end);
+                DATA(work_release);
+                DATA(work_begin);
+                DATA(work_end);
+                DATA(effective_priority_change);
+                DATA(nv_interrupt_begin);
+                DATA(nv_interrupt_end);
        } data;
-};
+} __attribute__((packed));
 #undef DATA
@@ -129,6 +203,8 @@ struct st_event_record {
        ft_event1(id, callback, task)
 #define SCHED_TRACE2(id, callback, task, xtra) \
        ft_event2(id, callback, task, xtra)
+#define SCHED_TRACE3(id, callback, task, xtra1, xtra2) \
+        ft_event3(id, callback, task, xtra1, xtra2)
 /* provide prototypes; needed on sparc64 */
 #ifndef NO_TASK_TRACE_DECLS
@@ -155,12 +231,45 @@ feather_callback void do_sched_trace_action(unsigned long id,
 feather_callback void do_sched_trace_sys_release(unsigned long id,
                                                 lt_t* start);
+feather_callback void do_sched_trace_tasklet_release(unsigned long id,
+                                                                                                   struct task_struct* owner);
+feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
+                                                                                                  struct task_struct* owner);
+feather_callback void do_sched_trace_tasklet_end(unsigned long id,
+                                                                                                 struct task_struct* owner,
+                                                                                                 unsigned long flushed);
+feather_callback void do_sched_trace_work_release(unsigned long id,
+                                                                                                         struct task_struct* owner);
+feather_callback void do_sched_trace_work_begin(unsigned long id,
+                                                                                                struct task_struct* owner,
+                                                                                                struct task_struct* exe);
+feather_callback void do_sched_trace_work_end(unsigned long id,
+                                                                                          struct task_struct* owner,
+                                                                                          struct task_struct* exe,
+                                                                                          unsigned long flushed);
+feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
+                                                                                          struct task_struct* task,
+                                                                                          struct task_struct* inh);
+feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
+                                                                                                u32 device);
+feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
+                                                                                                unsigned long unused);
+/* returns true if we're tracing an interrupt on current CPU */
+/* int is_interrupt_tracing_active(void); */
 #endif
 #else
 #define SCHED_TRACE(id, callback, task)        /* no tracing */
 #define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
+#define SCHED_TRACE3(id, callback, task, xtra1, xtra2)
 #endif
@@ -193,6 +302,35 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
        SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when)
+#define sched_trace_tasklet_release(t) \
+        SCHED_TRACE(SCHED_TRACE_BASE_ID + 11, do_sched_trace_tasklet_release, t)
+#define sched_trace_tasklet_begin(t) \
+        SCHED_TRACE(SCHED_TRACE_BASE_ID + 12, do_sched_trace_tasklet_begin, t)
+#define sched_trace_tasklet_end(t, flushed) \
+        SCHED_TRACE2(SCHED_TRACE_BASE_ID + 13, do_sched_trace_tasklet_end, t, flushed)
+#define sched_trace_work_release(t) \
+        SCHED_TRACE(SCHED_TRACE_BASE_ID + 14, do_sched_trace_work_release, t)
+#define sched_trace_work_begin(t, e) \
+        SCHED_TRACE2(SCHED_TRACE_BASE_ID + 15, do_sched_trace_work_begin, t, e)
+#define sched_trace_work_end(t, e, flushed) \
+        SCHED_TRACE3(SCHED_TRACE_BASE_ID + 16, do_sched_trace_work_end, t, e, flushed)
+#define sched_trace_eff_prio_change(t, inh) \
+        SCHED_TRACE2(SCHED_TRACE_BASE_ID + 17, do_sched_trace_eff_prio_change, t, inh)
+#define sched_trace_nv_interrupt_begin(d) \
+        SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d)
+#define sched_trace_nv_interrupt_end() \
+        SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, 0ul)
 #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
 #endif /* __KERNEL__ */
diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h
new file mode 100644
index 000000000000..c2c872639880
--- /dev/null
+++ b/include/litmus/sched_trace_external.h
@@ -0,0 +1,42 @@
+/*
+ * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
+ */
+#ifndef _LINUX_SCHED_TRACE_EXTERNAL_H_
+#define _LINUX_SCHED_TRACE_EXTERNAL_H_
+extern void __sched_trace_tasklet_begin_external(struct task_struct* t);
+static inline void sched_trace_tasklet_begin_external(struct task_struct* t)
+{
+        __sched_trace_tasklet_begin_external(t);
+}
+extern void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed);
+static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
+{
+        __sched_trace_tasklet_end_external(t, flushed);
+}
+extern void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e);
+static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
+{
+        __sched_trace_work_begin_external(t, e);
+}
+extern void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f);
+static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
+{
+        __sched_trace_work_end_external(t, e, f);
+}
+extern void __sched_trace_nv_interrupt_begin_external(u32 device);
+static inline void sched_trace_nv_interrupt_begin_external(u32 device)
+{
+        __sched_trace_nv_interrupt_begin_external(device);
+}
+extern void __sched_trace_nv_interrupt_end_external(void);
+static inline void sched_trace_nv_interrupt_end_external(void)
+{
+        __sched_trace_nv_interrupt_end_external();
+}
+#endif
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
index 94264c27d9ac..c6efc4c40af2 100644
--- a/include/litmus/unistd_32.h
+++ b/include/litmus/unistd_32.h
@@ -17,5 +17,6 @@
 #define __NR_wait_for_ts_release __LSC(9)
 #define __NR_release_ts         __LSC(10)
 #define __NR_null_call          __LSC(11)
+#define __NR_register_nv_device                 __LSC(12)
-#define NR_litmus_syscalls 12
+#define NR_litmus_syscalls 13
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index d5ced0d2642c..b44a7c33bdf8 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -29,5 +29,8 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
 __SYSCALL(__NR_release_ts, sys_release_ts)
 #define __NR_null_call                          __LSC(11)
 __SYSCALL(__NR_null_call, sys_null_call)
+#define __NR_register_nv_device                 __LSC(12)
+__SYSCALL(__NR_register_nv_device, sys_register_nv_device)
-#define NR_litmus_syscalls 12
+#define NR_litmus_syscalls 13
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index f2852a510232..ebff2cf715c5 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -530,7 +530,7 @@ static void print_lock(struct held_lock *hlock)
        print_ip_sym(hlock->acquire_ip);
 }
-static void lockdep_print_held_locks(struct task_struct *curr)
+void lockdep_print_held_locks(struct task_struct *curr)
 {
        int i, depth = curr->lockdep_depth;
@@ -546,6 +546,7 @@ static void lockdep_print_held_locks(struct task_struct *curr)
                print_lock(curr->held_locks + i);
        }
 }
+EXPORT_SYMBOL(lockdep_print_held_locks);
 static void print_kernel_version(void)
 {
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 200407c1502f..435685ecd068 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -496,3 +496,144 @@ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
        return 1;
 }
 EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
+//__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_);
+void mutex_lock_sfx(struct mutex *lock,
+                                   side_effect_t pre, unsigned long pre_arg,
+                                   side_effect_t post, unsigned long post_arg)
+{
+        long state = TASK_UNINTERRUPTIBLE;
+        unsigned int subclass = 0;
+        unsigned long ip = _RET_IP_;
+        
+        struct task_struct *task = current;
+        struct mutex_waiter waiter;
+        unsigned long flags;
+        
+        preempt_disable();
+        mutex_acquire(&lock->dep_map, subclass, 0, ip);
+        spin_lock_mutex(&lock->wait_lock, flags);
+        
+        if(pre)
+        {
+                if(unlikely(pre(pre_arg)))
+                {
+                        // this will fuck with lockdep's CONFIG_PROVE_LOCKING...
+                        spin_unlock_mutex(&lock->wait_lock, flags);
+                        preempt_enable();
+                        return;
+                }
+        }
+        debug_mutex_lock_common(lock, &waiter);
+        debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
+        
+        /* add waiting tasks to the end of the waitqueue (FIFO): */
+        list_add_tail(&waiter.list, &lock->wait_list);
+        waiter.task = task;
+        
+        if (atomic_xchg(&lock->count, -1) == 1)
+                goto done;
+        
+        lock_contended(&lock->dep_map, ip);
+        
+        for (;;) {
+                /*
+                 * Lets try to take the lock again - this is needed even if
+                 * we get here for the first time (shortly after failing to
+                 * acquire the lock), to make sure that we get a wakeup once
+                 * it's unlocked. Later on, if we sleep, this is the
+                 * operation that gives us the lock. We xchg it to -1, so
+                 * that when we release the lock, we properly wake up the
+                 * other waiters:
+                 */
+                if (atomic_xchg(&lock->count, -1) == 1)
+                        break;
+                
+                __set_task_state(task, state);
+                
+                /* didnt get the lock, go to sleep: */
+                spin_unlock_mutex(&lock->wait_lock, flags);
+                preempt_enable_no_resched();
+                schedule();
+                preempt_disable();
+                spin_lock_mutex(&lock->wait_lock, flags);
+        }
+        
+done:
+        lock_acquired(&lock->dep_map, ip);
+        /* got the lock - rejoice! */
+        mutex_remove_waiter(lock, &waiter, current_thread_info());
+        mutex_set_owner(lock);
+        
+        /* set it to 0 if there are no waiters left: */
+        if (likely(list_empty(&lock->wait_list)))
+                atomic_set(&lock->count, 0);
+        
+        if(post)
+                post(post_arg); 
+        
+        spin_unlock_mutex(&lock->wait_lock, flags);
+        
+        debug_mutex_free_waiter(&waiter);
+        preempt_enable();
+        
+        //return 0;     
+}
+EXPORT_SYMBOL(mutex_lock_sfx);
+//__mutex_unlock_common_slowpath(lock_count, 1);
+void mutex_unlock_sfx(struct mutex *lock,
+                                        side_effect_t pre, unsigned long pre_arg,
+                                        side_effect_t post, unsigned long post_arg)
+{
+        //struct mutex *lock = container_of(lock_count, struct mutex, count);
+        unsigned long flags;
+        
+        spin_lock_mutex(&lock->wait_lock, flags);
+        
+        if(pre)
+                pre(pre_arg);
+        
+        //mutex_release(&lock->dep_map, nested, _RET_IP_);
+        mutex_release(&lock->dep_map, 1, _RET_IP_);
+        debug_mutex_unlock(lock);
+        
+        /*
+         * some architectures leave the lock unlocked in the fastpath failure
+         * case, others need to leave it locked. In the later case we have to
+         * unlock it here
+         */
+        if (__mutex_slowpath_needs_to_unlock())
+                atomic_set(&lock->count, 1);
+        
+        if (!list_empty(&lock->wait_list)) {
+                /* get the first entry from the wait-list: */
+                struct mutex_waiter *waiter =
+                list_entry(lock->wait_list.next,
+                                   struct mutex_waiter, list);
+                
+                debug_mutex_wake_waiter(lock, waiter);
+                
+                wake_up_process(waiter->task);
+        }
+        
+        if(post)
+                post(post_arg);
+        
+        spin_unlock_mutex(&lock->wait_lock, flags);     
+}
+EXPORT_SYMBOL(mutex_unlock_sfx);
diff --git a/kernel/sched.c b/kernel/sched.c
index c5d775079027..3162605ffc91 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -82,6 +82,10 @@
 #include <litmus/sched_trace.h>
 #include <litmus/trace.h>
+#ifdef CONFIG_LITMUS_SOFTIRQD
+#include <litmus/litmus_softirq.h>
+#endif
 static void litmus_tick(struct rq*, struct task_struct*);
 #define CREATE_TRACE_POINTS
@@ -3789,6 +3793,7 @@ pick_next_task(struct rq *rq)
        }
 }
 /*
 * schedule() is the main scheduler function.
 */
@@ -3807,6 +3812,10 @@ need_resched:
        rcu_note_context_switch(cpu);
        prev = rq->curr;
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        release_klitirqd_lock(prev);
+#endif
+        
        release_kernel_lock(prev);
 need_resched_nonpreemptible:
        TS_SCHED_START;
@@ -3882,15 +3891,20 @@ need_resched_nonpreemptible:
        if (sched_state_validate_switch() || unlikely(reacquire_kernel_lock(prev)))
                goto need_resched_nonpreemptible;
+        
        preempt_enable_no_resched();
        if (need_resched())
                goto need_resched;
+        reacquire_klitirqd_lock(prev);
+        
        srp_ceiling_block();
 }
 EXPORT_SYMBOL(schedule);
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 /*
 * Look out! "owner" is an entirely speculative pointer
@@ -4051,6 +4065,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
        }
 }
 /**
 * __wake_up - wake up threads blocked on a waitqueue.
 * @q: the waitqueue
@@ -4236,6 +4251,12 @@ void __sched wait_for_completion(struct completion *x)
 }
 EXPORT_SYMBOL(wait_for_completion);
+void __sched __wait_for_completion_locked(struct completion *x)
+{
+        do_wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(__wait_for_completion_locked);
 /**
 * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
 * @x:  holds the state of this particular completion
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 94a62c0d4ade..c947a046a6d7 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -33,11 +33,11 @@
 #include <linux/spinlock.h>
 #include <linux/ftrace.h>
-static noinline void __down(struct semaphore *sem);
+noinline void __down(struct semaphore *sem);
 static noinline int __down_interruptible(struct semaphore *sem);
 static noinline int __down_killable(struct semaphore *sem);
 static noinline int __down_timeout(struct semaphore *sem, long jiffies);
-static noinline void __up(struct semaphore *sem);
+noinline void __up(struct semaphore *sem);
 /**
 * down - acquire the semaphore
@@ -190,11 +190,13 @@ EXPORT_SYMBOL(up);
 /* Functions for the contended case */
+/*
 struct semaphore_waiter {
        struct list_head list;
        struct task_struct *task;
        int up;
 };
+ */
 /*
 * Because this function is inlined, the 'state' parameter will be
@@ -233,10 +235,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
        return -EINTR;
 }
-static noinline void __sched __down(struct semaphore *sem)
+noinline void __sched __down(struct semaphore *sem)
 {
        __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
 }
+EXPORT_SYMBOL(__down);
 static noinline int __sched __down_interruptible(struct semaphore *sem)
 {
@@ -253,7 +257,7 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
        return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies);
 }
-static noinline void __sched __up(struct semaphore *sem)
+noinline void __sched __up(struct semaphore *sem)
 {
        struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
                                                struct semaphore_waiter, list);
@@ -261,3 +265,4 @@ static noinline void __sched __up(struct semaphore *sem)
        waiter->up = 1;
        wake_up_process(waiter->task);
 }
+EXPORT_SYMBOL(__up);
+\ No newline at end of file
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 07b4f1b1a73a..be4b8fab3637 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -29,6 +29,14 @@
 #include <trace/events/irq.h>
 #include <asm/irq.h>
+#include <litmus/litmus.h>
+#include <litmus/sched_trace.h>
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/nvidia_info.h>
+#endif
 /*
   - No shared variables, all the data are CPU local.
   - If a softirq needs serialization, let it serialize itself
@@ -54,7 +62,7 @@ EXPORT_SYMBOL(irq_stat);
 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
-static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+static DEFINE_PER_CPU(struct task_struct *, ksoftirqd) = NULL;
 char *softirq_to_name[NR_SOFTIRQS] = {
        "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
@@ -177,6 +185,7 @@ void local_bh_enable_ip(unsigned long ip)
 }
 EXPORT_SYMBOL(local_bh_enable_ip);
 /*
 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
 * and we fall back to softirqd after that.
@@ -187,34 +196,30 @@ EXPORT_SYMBOL(local_bh_enable_ip);
 * should not be able to lock up the box.
 */
 #define MAX_SOFTIRQ_RESTART 10
+static void ____do_softirq(void)
-asmlinkage void __do_softirq(void)
 {
-        struct softirq_action *h;
        __u32 pending;
-        int max_restart = MAX_SOFTIRQ_RESTART;
+        
+        struct softirq_action *h;
        int cpu;
+        
        pending = local_softirq_pending();
+        
        account_system_vtime(current);
+        
-        __local_bh_disable((unsigned long)__builtin_return_address(0));
-        lockdep_softirq_enter();
        cpu = smp_processor_id();
-restart:
-        /* Reset the pending bitmask before enabling irqs */
-        set_softirq_pending(0);
+        set_softirq_pending(0);
+        
        local_irq_enable();
+        
        h = softirq_vec;
+        
        do {
                if (pending & 1) {
                        int prev_count = preempt_count();
                        kstat_incr_softirqs_this_cpu(h - softirq_vec);
+                        
                        trace_softirq_entry(h, softirq_vec);
                        h->action(h);
                        trace_softirq_exit(h, softirq_vec);
@@ -226,26 +231,70 @@ restart:
                                       h->action, prev_count, preempt_count());
                                preempt_count() = prev_count;
                        }
+                        
                        rcu_bh_qs(cpu);
                }
                h++;
                pending >>= 1;
        } while (pending);
+        
        local_irq_disable();
+}
+static void ___do_softirq(void)
+{
+        __u32 pending;
+        //struct softirq_action *h;
+        int max_restart = MAX_SOFTIRQ_RESTART;
+        //int cpu;
+        pending = local_softirq_pending();
+restart:
+        ____do_softirq();
        pending = local_softirq_pending();
        if (pending && --max_restart)
                goto restart;
        if (pending)
+        {
                wakeup_softirqd();
+        }
+}
+asmlinkage void __do_softirq(void)
+{
+#ifdef LITMUS_THREAD_ALL_SOFTIRQ
+        /* Skip straight to wakeup_softirqd() if we're using 
+         LITMUS_THREAD_ALL_SOFTIRQ (unless there's really high prio-stuff waiting.). */
+        struct task_struct *tsk = __get_cpu_var(ksoftirqd);
+        
+        if(tsk)
+        {
+                __u32 pending = local_softirq_pending();
+                const __u32 high_prio_softirq = (1<<HI_SOFTIRQ) | (1<<TIMER_SOFTIRQ) | (1<<HRTIMER_SOFTIRQ);
+                if(pending && !(pending & high_prio_softirq))
+                {
+                        wakeup_softirqd();
+                        return;
+                }
+        }
+#endif
+        
+        /*
+         * 'immediate' softirq execution:
+         */
+        __local_bh_disable((unsigned long)__builtin_return_address(0));
+        lockdep_softirq_enter();
+        
+        ___do_softirq();
+        
        lockdep_softirq_exit();
+        
        account_system_vtime(current);
-        _local_bh_enable();
+        _local_bh_enable();     
 }
 #ifndef __ARCH_HAS_DO_SOFTIRQ
@@ -357,8 +406,64 @@ struct tasklet_head
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
 void __tasklet_schedule(struct tasklet_struct *t)
 {
+#ifdef CONFIG_LITMUS_NVIDIA
+        if(is_nvidia_func(t->func))
+        {
+                u32 nvidia_device = get_tasklet_nv_device_num(t);       
+                //              TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
+                //                        __FUNCTION__, nvidia_device,litmus_clock());
+                unsigned long flags;
+                struct task_struct* device_owner;
+                lock_nv_registry(nvidia_device, &flags);
+                device_owner = get_nv_device_owner(nvidia_device);
+                if(device_owner==NULL)
+                {
+                        t->owner = NULL;
+                }
+                else
+                {
+                        if(is_realtime(device_owner))
+                        {
+                                TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
+                                          __FUNCTION__, nvidia_device,litmus_clock());                          
+                                TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
+                                          __FUNCTION__,device_owner->pid,nvidia_device);
+                                t->owner = device_owner;
+                                sched_trace_tasklet_release(t->owner);
+                                if(likely(_litmus_tasklet_schedule(t,nvidia_device)))
+                                {
+                                        unlock_nv_registry(nvidia_device, &flags);
+                                        return;
+                                }
+                                else
+                                {
+                                        t->owner = NULL; /* fall through to normal scheduling */
+                                }
+                        }
+                        else
+                        {
+                                t->owner = NULL;
+                        }
+                }
+                unlock_nv_registry(nvidia_device, &flags);
+        }
+#endif
+        ___tasklet_schedule(t);
+}
+EXPORT_SYMBOL(__tasklet_schedule);
+void ___tasklet_schedule(struct tasklet_struct *t)
+{
        unsigned long flags;
        local_irq_save(flags);
@@ -368,11 +473,65 @@ void __tasklet_schedule(struct tasklet_struct *t)
        raise_softirq_irqoff(TASKLET_SOFTIRQ);
        local_irq_restore(flags);
 }
+EXPORT_SYMBOL(___tasklet_schedule);
-EXPORT_SYMBOL(__tasklet_schedule);
 void __tasklet_hi_schedule(struct tasklet_struct *t)
 {
+#ifdef CONFIG_LITMUS_NVIDIA
+        if(is_nvidia_func(t->func))
+        {       
+                u32 nvidia_device = get_tasklet_nv_device_num(t);
+                //              TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
+                //                        __FUNCTION__, nvidia_device,litmus_clock());
+                unsigned long flags;
+                struct task_struct* device_owner;
+                
+                lock_nv_registry(nvidia_device, &flags);
+                
+                device_owner = get_nv_device_owner(nvidia_device);
+                if(device_owner==NULL) 
+                {
+                        t->owner = NULL;
+                }
+                else
+                {
+                        if( is_realtime(device_owner))
+                        {
+                                TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n",
+                                          __FUNCTION__, nvidia_device,litmus_clock());                          
+                                TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
+                                          __FUNCTION__,device_owner->pid,nvidia_device);
+                                
+                                t->owner = device_owner;
+                                sched_trace_tasklet_release(t->owner);
+                                if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device)))
+                                {
+                                        unlock_nv_registry(nvidia_device, &flags);
+                                        return;
+                                }
+                                else
+                                {
+                                        t->owner = NULL; /* fall through to normal scheduling */
+                                }
+                        }
+                        else
+                        {
+                                t->owner = NULL;
+                        }
+                }
+                unlock_nv_registry(nvidia_device, &flags);
+        }
+#endif
+        ___tasklet_hi_schedule(t);
+}
+EXPORT_SYMBOL(__tasklet_hi_schedule);
+void ___tasklet_hi_schedule(struct tasklet_struct* t)
+{
        unsigned long flags;
        local_irq_save(flags);
@@ -382,19 +541,72 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
        raise_softirq_irqoff(HI_SOFTIRQ);
        local_irq_restore(flags);
 }
+EXPORT_SYMBOL(___tasklet_hi_schedule);
-EXPORT_SYMBOL(__tasklet_hi_schedule);
 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
 {
        BUG_ON(!irqs_disabled());
+#ifdef CONFIG_LITMUS_NVIDIA     
+        if(is_nvidia_func(t->func))
+        {       
+                u32 nvidia_device = get_tasklet_nv_device_num(t);
+                //              TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
+                //                        __FUNCTION__, nvidia_device,litmus_clock());
+                unsigned long flags;
+                struct task_struct* device_owner;
+                
+                lock_nv_registry(nvidia_device, &flags);
+                device_owner = get_nv_device_owner(nvidia_device);
+                if(device_owner==NULL)
+                {
+                        t->owner = NULL;
+                }
+                else
+                {
+                        if(is_realtime(device_owner))
+                        {
+                                TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
+                                          __FUNCTION__, nvidia_device,litmus_clock());
+                                
+                                TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
+                                          __FUNCTION__,device_owner->pid,nvidia_device);
+                                
+                                t->owner = device_owner;
+                                sched_trace_tasklet_release(t->owner);
+                                if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device)))
+                                {
+                                        unlock_nv_registry(nvidia_device, &flags);
+                                        return;
+                                }
+                                else
+                                {
+                                        t->owner = NULL; /* fall through to normal scheduling */
+                                }
+                        }
+                        else
+                        {
+                                t->owner = NULL;
+                        }
+                }
+                unlock_nv_registry(nvidia_device, &flags);
+        }
+#endif
+        ___tasklet_hi_schedule_first(t);
+}
+EXPORT_SYMBOL(__tasklet_hi_schedule_first);
+void ___tasklet_hi_schedule_first(struct tasklet_struct* t)
+{
+        BUG_ON(!irqs_disabled());
        t->next = __get_cpu_var(tasklet_hi_vec).head;
        __get_cpu_var(tasklet_hi_vec).head = t;
        __raise_softirq_irqoff(HI_SOFTIRQ);
 }
+EXPORT_SYMBOL(___tasklet_hi_schedule_first);
-EXPORT_SYMBOL(__tasklet_hi_schedule_first);
 static void tasklet_action(struct softirq_action *a)
 {
@@ -450,6 +662,7 @@ static void tasklet_hi_action(struct softirq_action *a)
                        if (!atomic_read(&t->count)) {
                                if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
                                        BUG();
                                t->func(t->data);
                                tasklet_unlock(t);
                                continue;
@@ -473,8 +686,13 @@ void tasklet_init(struct tasklet_struct *t,
        t->next = NULL;
        t->state = 0;
        atomic_set(&t->count, 0);
        t->func = func;
        t->data = data;
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        t->owner = NULL;
+#endif
 }
 EXPORT_SYMBOL(tasklet_init);
@@ -489,6 +707,7 @@ void tasklet_kill(struct tasklet_struct *t)
                        yield();
                } while (test_bit(TASKLET_STATE_SCHED, &t->state));
        }
        tasklet_unlock_wait(t);
        clear_bit(TASKLET_STATE_SCHED, &t->state);
 }
@@ -694,6 +913,8 @@ void __init softirq_init(void)
 static int run_ksoftirqd(void * __bind_cpu)
 {
+        unsigned long flags;
+        
        set_current_state(TASK_INTERRUPTIBLE);
        while (!kthread_should_stop()) {
@@ -712,7 +933,11 @@ static int run_ksoftirqd(void * __bind_cpu)
                           don't process */
                        if (cpu_is_offline((long)__bind_cpu))
                                goto wait_to_die;
-                        do_softirq();
+                        
+                        local_irq_save(flags);
+                        ____do_softirq();
+                        local_irq_restore(flags);
+                        
                        preempt_enable_no_resched();
                        cond_resched();
                        preempt_disable();
@@ -760,6 +985,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
        for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
                if (*i == t) {
                        *i = t->next;
                        /* If this was the tail element, move the tail ptr */
                        if (*i == NULL)
                                per_cpu(tasklet_vec, cpu).tail = i;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f77afd939229..8139208eaee1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -47,6 +47,13 @@
 #include "workqueue_sched.h"
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/litmus.h>
+#include <litmus/sched_trace.h>
+#include <litmus/nvidia_info.h>
+#endif
 enum {
        /* global_cwq flags */
        GCWQ_MANAGE_WORKERS     = 1 << 0,       /* need to manage workers */
@@ -1010,9 +1017,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
                work_flags |= WORK_STRUCT_DELAYED;
                worklist = &cwq->delayed_works;
        }
        insert_work(cwq, work, worklist, work_flags);
        spin_unlock_irqrestore(&gcwq->lock, flags);
 }
@@ -2526,10 +2531,69 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
 */
 int schedule_work(struct work_struct *work)
 {
-        return queue_work(system_wq, work);
+#ifdef CONFIG_LITMUS_NVIDIA
+        if(is_nvidia_func(work->func))
+        {
+                u32 nvidiaDevice = get_work_nv_device_num(work);
+                
+                //1) Ask Litmus which task owns GPU <nvidiaDevice>. (API to be defined.)
+                unsigned long flags;
+                struct task_struct* device_owner;
+                
+                lock_nv_registry(nvidiaDevice, &flags);
+                
+                device_owner = get_nv_device_owner(nvidiaDevice);
+                
+                //2) If there is an owner, set work->owner to the owner's task struct.
+                if(device_owner==NULL) 
+                {
+                        work->owner = NULL;
+                        //TRACE("%s: the owner task of NVIDIA Device %u is NULL\n",__FUNCTION__,nvidiaDevice);
+                }
+                else
+                {
+                        if( is_realtime(device_owner))
+                        {
+                                TRACE("%s: Handling NVIDIA work for device\t%u\tat\t%llu\n",
+                                          __FUNCTION__, nvidiaDevice,litmus_clock());
+                                TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
+                                          __FUNCTION__,
+                                          device_owner->pid,
+                                          nvidiaDevice);
+                                
+                                //3) Call litmus_schedule_work() and return (don't execute the rest
+                                //      of schedule_schedule()).
+                                work->owner = device_owner;
+                                sched_trace_work_release(work->owner);
+                                if(likely(litmus_schedule_work(work, nvidiaDevice)))
+                                {
+                                        unlock_nv_registry(nvidiaDevice, &flags);
+                                        return 1;
+                                }
+                                else
+                                {
+                                        work->owner = NULL; /* fall through to normal work scheduling */
+                                }
+                        }
+                        else
+                        {
+                                work->owner = NULL;
+                        }
+                }
+                unlock_nv_registry(nvidiaDevice, &flags);
+        }
+#endif
+        return(__schedule_work(work));
 }
 EXPORT_SYMBOL(schedule_work);
+int __schedule_work(struct work_struct* work)
+{
+        return queue_work(system_wq, work);
+}
+EXPORT_SYMBOL(__schedule_work);
 /*
 * schedule_work_on - put work task on a specific cpu
 * @cpu: cpu to put the work task on
diff --git a/litmus/Kconfig b/litmus/Kconfig
index ad8dc8308cf0..7e865d4dd703 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -62,6 +62,25 @@ config LITMUS_LOCKING
 endmenu
+menu "Performance Enhancements"
+config SCHED_CPU_AFFINITY
+        bool "Local Migration Affinity"
+        default y
+        help
+          Rescheduled tasks prefer CPUs near to their previously used CPU.  This
+          may improve performance through possible preservation of cache affinity.
+          Warning: May make bugs ahrder to find since tasks may migrate less often.
+          NOTES:
+                * Pfair/PD^2 does not support this option.
+                * Only x86 currently supported.
+          Say Yes if unsure.
+endmenu
 menu "Tracing"
 config FEATHER_TRACE
@@ -182,4 +201,74 @@ config SCHED_DEBUG_TRACE_CALLER
 endmenu
+menu "Interrupt Handling"
+config LITMUS_THREAD_ALL_SOFTIRQ
+       bool "Process all softirqs in ksoftirqd threads."
+       default n
+       help
+             (Experimental) Thread all softirqs to ksoftirqd
+                 daemon threads, similar to PREEMPT_RT.  I/O
+                 throughput will will drop with this enabled, but
+                 latencies due to interrupts will be reduced.
+                 WARNING: Timer responsiveness will likely be
+                 decreased as timer callbacks are also threaded.
+                 This is unlike PREEEMPT_RTs hardirqs.
+                If unsure, say No.
+config LITMUS_SOFTIRQD
+       bool "Spawn klitirqd interrupt handling threads."
+           depends on LITMUS_LOCKING
+           default n
+           help
+             Create klitirqd interrupt handling threads.  Work must be
+                 specifically dispatched to these workers.  (Softirqs for
+                 Litmus tasks are not magically redirected to klitirqd.)
+                 G-EDF ONLY for now!
+             If unsure, say No.
+config NR_LITMUS_SOFTIRQD
+           int "Number of klitirqd."
+           depends on LITMUS_SOFTIRQD
+           range 1 4096
+           default "1"
+           help
+             Should be <= to the number of CPUs in your system.
+config LITMUS_NVIDIA
+          bool "Litmus handling of NVIDIA interrupts."
+          depends on LITMUS_SOFTIRQD
+          default n
+          help
+            Direct tasklets from NVIDIA devices to Litmus's klitirqd.
+                If unsure, say No.
+choice
+          prompt "CUDA/Driver Version Support"
+          default CUDA_4_0
+          depends on LITMUS_NVIDIA
+          help
+                Select the version of CUDA/driver to support.
+        
+config CUDA_4_0
+          bool "CUDA 4.0"
+          depends on LITMUS_NVIDIA
+          help
+                Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40)
+config CUDA_3_2
+          bool "CUDA 3.2"
+          depends on LITMUS_NVIDIA
+          help
+                Support CUDA 3.2 (dev. driver version: x86_64-260.24)
+endchoice
+endmenu
 endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index ad9936e07b83..892e01c2e1b3 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -21,8 +21,12 @@ obj-y     = sched_plugin.o litmus.o \
 obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
 obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
+obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
 obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
 obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
 obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
 obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
+obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
+obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o
diff --git a/litmus/affinity.c b/litmus/affinity.c
new file mode 100644
index 000000000000..3b430d18885b
--- /dev/null
+++ b/litmus/affinity.c
@@ -0,0 +1,49 @@
+#include <linux/cpu.h>
+#include <litmus/affinity.h>
+struct neighborhood neigh_info[NR_CPUS];
+/* called by _init_litmus() */
+void init_topology(void)
+{
+        int cpu;
+        int i;
+        int chk;
+        int depth = num_cache_leaves;
+        if(depth > NUM_CACHE_LEVELS)
+                depth = NUM_CACHE_LEVELS;
+        for_each_online_cpu(cpu)
+        {
+                for(i = 0; i < depth; ++i)
+                {
+                        long unsigned int firstbits;
+                        chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i);
+                        if(chk) /* failed */
+                        {
+                                neigh_info[cpu].size[i] = 0;
+                        }
+                        else
+                        {
+                                /* size = num bits in mask */
+                                neigh_info[cpu].size[i] = cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
+                        }
+                        firstbits = *neigh_info[cpu].neighbors[i]->bits;
+                        printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
+                                                        cpu, neigh_info[cpu].size[i], i, firstbits);
+                }
+                /* set data for non-existent levels */
+                for(; i < NUM_CACHE_LEVELS; ++i)
+                {
+                        neigh_info[cpu].size[i] = 0;
+                        printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
+                                                        cpu, neigh_info[cpu].size[i], i, 0lu);
+                }
+        }
+}
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 9b44dc2d8d1e..fbd67ab5f467 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -65,6 +65,12 @@ int edf_higher_prio(struct task_struct* first,
        return !is_realtime(second_task)  ||
+    
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        /* proxy threads always lose w/o inheritance. */
+        (first_task->rt_param.is_proxy_thread <
+            second_task->rt_param.is_proxy_thread) ||
+#endif
                /* is the deadline of the first task earlier?
                 * Then it has higher priority.
diff --git a/litmus/fdso.c b/litmus/fdso.c
index aa7b384264e3..2b7f9ba85857 100644
--- a/litmus/fdso.c
+++ b/litmus/fdso.c
@@ -22,6 +22,7 @@ extern struct fdso_ops generic_lock_ops;
 static const struct fdso_ops* fdso_ops[] = {
        &generic_lock_ops, /* FMLP_SEM */
+        &generic_lock_ops, /* KFMLP_SEM */
        &generic_lock_ops, /* SRP_SEM */
 };
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 26938acacafc..29363c6ad565 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -17,6 +17,14 @@
 #include <litmus/litmus_proc.h>
 #include <litmus/sched_trace.h>
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+#include <litmus/affinity.h>
+#endif
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/nvidia_info.h>
+#endif
 /* Number of RT tasks that exist in the system */
 atomic_t rt_task_count          = ATOMIC_INIT(0);
 static DEFINE_RAW_SPINLOCK(task_transition_lock);
@@ -47,6 +55,28 @@ void bheap_node_free(struct bheap_node* hn)
 struct release_heap* release_heap_alloc(int gfp_flags);
 void release_heap_free(struct release_heap* rh);
+#ifdef CONFIG_LITMUS_NVIDIA
+/*
+ * sys_register_nv_device
+ * @nv_device_id: The Nvidia device id that the task want to register
+ * @reg_action: set to '1' to register the specified device. zero otherwise.
+ * Syscall for register task's designated nvidia device into NV_DEVICE_REG array
+ * Returns EFAULT  if nv_device_id is out of range.
+ *         0       if success
+ */
+asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
+{
+        /* register the device to caller (aka 'current') */
+        return(reg_nv_device(nv_device_id, reg_action));
+}
+#else
+asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
+{
+        return(-EINVAL);
+}
+#endif
 /*
 * sys_set_task_rt_param
 * @pid: Pid of the task which scheduling parameters must be changed
@@ -115,7 +145,7 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
                tp.cls != RT_CLASS_BEST_EFFORT)
        {
                printk(KERN_INFO "litmus: real-time task %d rejected "
-                                 "because its class is invalid\n");
+                                 "because its class is invalid\n", pid);
                goto out_unlock;
        }
        if (tp.budget_policy != NO_ENFORCEMENT &&
@@ -131,6 +161,22 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
        target->rt_param.task_params = tp;
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        /* proxy thread off by default */
+        target->rt_param.is_proxy_thread = 0;
+    target->rt_param.cur_klitirqd = NULL;
+        //init_MUTEX(&target->rt_param.klitirqd_sem);
+        mutex_init(&target->rt_param.klitirqd_sem);
+        //init_completion(&target->rt_param.klitirqd_sem);
+        //target->rt_param.klitirqd_sem_stat = NOT_HELD;
+        atomic_set(&target->rt_param.klitirqd_sem_stat, NOT_HELD);
+#endif
+#ifdef CONFIG_LITMUS_NVIDIA
+        atomic_set(&target->rt_param.nv_int_count, 0);
+#endif
        retval = 0;
      out_unlock:
        read_unlock_irq(&tasklist_lock);
@@ -265,6 +311,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job)
        return retval;
 }
 /* sys_null_call() is only used for determining raw system call
 * overheads (kernel entry, kernel exit). It has no useful side effects.
 * If ts is non-NULL, then the current Feather-Trace time is recorded.
@@ -278,7 +325,7 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
                now = get_cycles();
                ret = put_user(now, ts);
        }
+        
        return ret;
 }
@@ -299,6 +346,20 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
         * at this point in time.
         */
        WARN_ON(p->rt_param.inh_task);
+   
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        /* We probably should not have any tasklets executing for
+     * us at this time.
+         */    
+    WARN_ON(p->rt_param.cur_klitirqd);
+        WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD);
+        if(p->rt_param.cur_klitirqd)
+                flush_pending(p->rt_param.cur_klitirqd, p);
+        if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD)
+                up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem);
+#endif
        /* Cleanup everything else. */
        memset(&p->rt_param, 0, sizeof(p->rt_param));
@@ -399,7 +460,7 @@ static void synch_on_plugin_switch(void* info)
 */
 int switch_sched_plugin(struct sched_plugin* plugin)
 {
-        unsigned long flags;
+        //unsigned long flags;
        int ret = 0;
        BUG_ON(!plugin);
@@ -413,8 +474,15 @@ int switch_sched_plugin(struct sched_plugin* plugin)
        while (atomic_read(&cannot_use_plugin) < num_online_cpus())
                cpu_relax();
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        if(!klitirqd_is_dead())
+        {
+                kill_klitirqd();
+        }
+#endif
        /* stop task transitions */
-        raw_spin_lock_irqsave(&task_transition_lock, flags);
+        //raw_spin_lock_irqsave(&task_transition_lock, flags);
        /* don't switch if there are active real-time tasks */
        if (atomic_read(&rt_task_count) == 0) {
@@ -432,7 +500,7 @@ int switch_sched_plugin(struct sched_plugin* plugin)
        } else
                ret = -EBUSY;
 out:
-        raw_spin_unlock_irqrestore(&task_transition_lock, flags);
+        //raw_spin_unlock_irqrestore(&task_transition_lock, flags);
        atomic_set(&cannot_use_plugin, 0);
        return ret;
 }
@@ -540,6 +608,10 @@ static int __init _init_litmus(void)
        init_litmus_proc();
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+        init_topology();
+#endif
        return 0;
 }
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
index 4bf725a36c9c..381513366c7a 100644
--- a/litmus/litmus_proc.c
+++ b/litmus/litmus_proc.c
@@ -20,11 +20,18 @@ static struct proc_dir_entry *litmus_dir = NULL,
 #ifdef CONFIG_RELEASE_MASTER
        *release_master_file = NULL,
 #endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        *klitirqd_file = NULL,
+#endif
        *plugs_file = NULL;
 /* in litmus/sync.c */
 int count_tasks_waiting_for_release(void);
+extern int proc_read_klitirqd_stats(char *page, char **start,
+                                                                        off_t off, int count,
+                                                                        int *eof, void *data);
 static int proc_read_stats(char *page, char **start,
                           off_t off, int count,
                           int *eof, void *data)
@@ -161,6 +168,12 @@ int __init init_litmus_proc(void)
        release_master_file->write_proc  = proc_write_release_master;
 #endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        klitirqd_file =
+                create_proc_read_entry("klitirqd_stats", 0444, litmus_dir,
+                                                           proc_read_klitirqd_stats, NULL);
+#endif  
+        
        stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
                                           proc_read_stats, NULL);
@@ -187,6 +200,10 @@ void exit_litmus_proc(void)
                remove_proc_entry("stats", litmus_dir);
        if (curr_file)
                remove_proc_entry("active_plugin", litmus_dir);
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        if (klitirqd_file)
+                remove_proc_entry("klitirqd_stats", litmus_dir);
+#endif
 #ifdef CONFIG_RELEASE_MASTER
        if (release_master_file)
                remove_proc_entry("release_master", litmus_dir);
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
new file mode 100644
index 000000000000..271e770dbaea
--- /dev/null
+++ b/litmus/litmus_softirq.c
@@ -0,0 +1,1579 @@
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+#include <linux/kthread.h>
+#include <linux/ftrace.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/cpuset.h>
+#include <litmus/litmus.h>
+#include <litmus/sched_trace.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/litmus_softirq.h>
+/* TODO: Remove unneeded mb() and other barriers. */
+/* counts number of daemons ready to handle litmus irqs. */
+static atomic_t num_ready_klitirqds = ATOMIC_INIT(0);
+enum pending_flags
+{
+    LIT_TASKLET_LOW = 0x1,
+    LIT_TASKLET_HI  = LIT_TASKLET_LOW<<1,
+        LIT_WORK = LIT_TASKLET_HI<<1
+};
+/* only support tasklet processing for now. */
+struct tasklet_head
+{
+        struct tasklet_struct *head;
+        struct tasklet_struct **tail;
+};
+struct klitirqd_info
+{
+        struct task_struct*             klitirqd;
+    struct task_struct*     current_owner;
+    int                                         terminating;
+        raw_spinlock_t                  lock;
+        
+        u32                                             pending;
+        atomic_t                                num_hi_pending;
+        atomic_t                                num_low_pending;
+        atomic_t                                num_work_pending;
+        /* in order of priority */
+        struct tasklet_head     pending_tasklets_hi;
+        struct tasklet_head             pending_tasklets;
+        struct list_head                worklist;
+};
+/* one list for each klitirqd */
+static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD];
+int proc_read_klitirqd_stats(char *page, char **start,
+                                                         off_t off, int count,
+                                                         int *eof, void *data)
+{
+        int len = snprintf(page, PAGE_SIZE,
+                                "num ready klitirqds: %d\n\n",
+                                atomic_read(&num_ready_klitirqds));
+        
+        if(klitirqd_is_ready())
+        {
+                int i;
+                for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+                {
+                        len +=
+                                snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
+                                                 "klitirqd_th%d: %s/%d\n"
+                                                 "\tcurrent_owner: %s/%d\n"
+                                                 "\tpending: %x\n"
+                                                 "\tnum hi: %d\n"
+                                                 "\tnum low: %d\n"
+                                                 "\tnum work: %d\n\n",
+                                                 i,
+                                                 klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid,
+                                                 (klitirqds[i].current_owner != NULL) ?
+                                                        klitirqds[i].current_owner->comm : "(null)",
+                                                 (klitirqds[i].current_owner != NULL) ?
+                                                        klitirqds[i].current_owner->pid : 0,
+                                                 klitirqds[i].pending,
+                                                 atomic_read(&klitirqds[i].num_hi_pending),
+                                                 atomic_read(&klitirqds[i].num_low_pending),
+                                                 atomic_read(&klitirqds[i].num_work_pending));
+                }
+        }
+        return(len);
+}
+                                   
+#if 0
+static atomic_t dump_id = ATOMIC_INIT(0);
+static void __dump_state(struct klitirqd_info* which, const char* caller)
+{
+        struct tasklet_struct* list;
+        int id = atomic_inc_return(&dump_id);
+        //if(in_interrupt())
+        {
+                if(which->current_owner)
+                {
+                        TRACE("(id: %d  caller: %s)\n"
+                                "klitirqd: %s/%d\n"
+                                "current owner: %s/%d\n"
+                                "pending: %x\n",
+                                id, caller,
+                                which->klitirqd->comm, which->klitirqd->pid,
+                                which->current_owner->comm, which->current_owner->pid,
+                                which->pending);
+                }
+                else
+                {
+                        TRACE("(id: %d  caller: %s)\n"
+                                "klitirqd: %s/%d\n"
+                                "current owner: %p\n"
+                                "pending: %x\n",
+                                id, caller,
+                                which->klitirqd->comm, which->klitirqd->pid,
+                                NULL,
+                                which->pending);
+                }
+                list = which->pending_tasklets.head;
+                while(list)
+                {
+                        struct tasklet_struct *t = list;
+                        list = list->next; /* advance */
+                        if(t->owner)
+                                TRACE("(id: %d  caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid);
+                        else
+                                TRACE("(id: %d  caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL);
+                }
+        }
+}
+static void dump_state(struct klitirqd_info* which, const char* caller)
+{
+        unsigned long flags;
+        raw_spin_lock_irqsave(&which->lock, flags);
+    __dump_state(which, caller);
+    raw_spin_unlock_irqrestore(&which->lock, flags);
+}
+#endif
+/* forward declarations */
+static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
+                                                                           struct klitirqd_info *which,
+                                                                           int wakeup);
+static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
+                                                                                  struct klitirqd_info *which,
+                                                                                  int wakeup);
+static void ___litmus_schedule_work(struct work_struct *w,
+                                                                        struct klitirqd_info *which,
+                                                                        int wakeup);
+inline unsigned int klitirqd_id(struct task_struct* tsk)
+{
+    int i;
+    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+    {
+        if(klitirqds[i].klitirqd == tsk)
+        {
+            return i;
+        }
+    }
+    
+    BUG();
+    
+    return 0;
+}
+inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which)
+{
+    return (which->pending & LIT_TASKLET_HI);
+}
+inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which)
+{
+    return (which->pending & LIT_TASKLET_LOW);
+}
+inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which)
+{
+        return (which->pending & LIT_WORK);
+}
+inline static u32 litirq_pending_irqoff(struct klitirqd_info* which)
+{
+    return(which->pending);
+}
+inline static u32 litirq_pending(struct klitirqd_info* which)
+{
+    unsigned long flags;
+    u32 pending;
+    
+    raw_spin_lock_irqsave(&which->lock, flags);
+    pending = litirq_pending_irqoff(which);
+    raw_spin_unlock_irqrestore(&which->lock, flags);
+    
+    return pending;
+};
+inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner)
+{
+        unsigned long flags;
+        u32 pending;
+        raw_spin_lock_irqsave(&which->lock, flags);
+        pending = litirq_pending_irqoff(which);
+        if(pending)
+        {
+                if(which->current_owner != owner)
+                {
+                        pending = 0;  // owner switch!
+                }
+        }
+        raw_spin_unlock_irqrestore(&which->lock, flags);
+        return pending;
+}
+inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which,
+                                struct mutex** sem,
+                                struct task_struct** t)
+{
+        unsigned long flags;
+        u32 pending;
+        /* init values */
+        *sem = NULL;
+        *t = NULL;
+        raw_spin_lock_irqsave(&which->lock, flags);
+        pending = litirq_pending_irqoff(which);
+        if(pending)
+        {
+                if(which->current_owner != NULL)
+                {
+                        *t = which->current_owner;
+                        *sem = &tsk_rt(which->current_owner)->klitirqd_sem;
+                }
+                else
+                {
+                        BUG();
+                }
+        }
+        raw_spin_unlock_irqrestore(&which->lock, flags);
+        if(likely(*sem))
+        {
+                return pending;
+        }
+        else
+        {
+                return 0;
+        }
+}
+/* returns true if the next piece of work to do is from a different owner.
+ */
+static int tasklet_ownership_change(
+                                struct klitirqd_info* which,
+                                enum pending_flags taskletQ)
+{
+        /* this function doesn't have to look at work objects since they have
+           priority below tasklets. */
+    unsigned long flags;
+    int ret = 0;
+    raw_spin_lock_irqsave(&which->lock, flags);
+    
+        switch(taskletQ)
+        {
+        case LIT_TASKLET_HI:
+                if(litirq_pending_hi_irqoff(which))
+                {
+                        ret = (which->pending_tasklets_hi.head->owner != 
+                                                which->current_owner);
+                }
+                break;
+        case LIT_TASKLET_LOW:
+                if(litirq_pending_low_irqoff(which))
+                {
+                        ret = (which->pending_tasklets.head->owner !=
+                                                which->current_owner);
+                }
+                break;
+        default:
+                break;
+        }
+    
+    raw_spin_unlock_irqrestore(&which->lock, flags);
+    
+    TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret);
+    
+    return ret;
+}
+static void __reeval_prio(struct klitirqd_info* which)
+{
+    struct task_struct* next_owner = NULL;
+        struct task_struct* klitirqd = which->klitirqd;
+        
+        /* Check in prio-order */
+        u32 pending = litirq_pending_irqoff(which);
+        
+        //__dump_state(which, "__reeval_prio: before");
+        
+        if(pending)
+        {
+                if(pending & LIT_TASKLET_HI)
+                {
+                        next_owner = which->pending_tasklets_hi.head->owner;
+                }
+                else if(pending & LIT_TASKLET_LOW)
+                {
+                        next_owner = which->pending_tasklets.head->owner;
+                }
+                else if(pending & LIT_WORK)
+                {
+                        struct work_struct* work =
+                                list_first_entry(&which->worklist, struct work_struct, entry);
+                        next_owner = work->owner;
+                }
+        }
+        if(next_owner != which->current_owner)
+        {
+                struct task_struct* old_owner = which->current_owner;
+                /* bind the next owner. */
+                which->current_owner = next_owner;
+                mb();
+        if(next_owner != NULL)
+        {
+                        if(!in_interrupt())
+                        {
+                                TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
+                                                ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
+                                                ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
+                                                next_owner->comm, next_owner->pid);
+                        }
+                        else
+                        {
+                                TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
+                                        ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
+                                        ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
+                                        next_owner->comm, next_owner->pid);                             
+                        }
+                        litmus->set_prio_inh_klitirqd(klitirqd, old_owner, next_owner);
+        }
+        else
+        {
+                        if(likely(!in_interrupt()))
+                        {
+                                TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n",
+                                                __FUNCTION__, klitirqd->comm, klitirqd->pid);
+                        }
+                        else
+                        {
+                                // is this a bug?
+                                TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n",
+                                        __FUNCTION__, klitirqd->comm, klitirqd->pid);                   
+                        }
+           
+                        BUG_ON(pending != 0);
+                        litmus->clear_prio_inh_klitirqd(klitirqd, old_owner);
+        }
+    }
+        
+        //__dump_state(which, "__reeval_prio: after");
+}
+static void reeval_prio(struct klitirqd_info* which)
+{
+    unsigned long flags;
+    
+    raw_spin_lock_irqsave(&which->lock, flags);
+    __reeval_prio(which);
+    raw_spin_unlock_irqrestore(&which->lock, flags);
+}
+static void wakeup_litirqd_locked(struct klitirqd_info* which)
+{
+        /* Interrupts are disabled: no need to stop preemption */
+        if (which && which->klitirqd)
+        {
+        __reeval_prio(which); /* configure the proper priority */
+                if(which->klitirqd->state != TASK_RUNNING)
+                {
+                TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__,
+                                which->klitirqd->comm, which->klitirqd->pid);
+                        wake_up_process(which->klitirqd);
+                }
+    }
+}
+static void do_lit_tasklet(struct klitirqd_info* which,
+                                                   struct tasklet_head* pending_tasklets)
+{
+    unsigned long flags;
+        struct tasklet_struct *list;
+        atomic_t* count;
+    raw_spin_lock_irqsave(&which->lock, flags);
+        
+        //__dump_state(which, "do_lit_tasklet: before steal");
+        
+        /* copy out the tasklets for our private use. */
+        list = pending_tasklets->head;
+        pending_tasklets->head = NULL;
+        pending_tasklets->tail = &pending_tasklets->head;
+        
+        /* remove pending flag */
+        which->pending &= (pending_tasklets == &which->pending_tasklets) ?
+                ~LIT_TASKLET_LOW :
+                ~LIT_TASKLET_HI;
+        
+        count = (pending_tasklets == &which->pending_tasklets) ?
+                &which->num_low_pending:
+                &which->num_hi_pending;
+        
+        //__dump_state(which, "do_lit_tasklet: after steal");
+        
+    raw_spin_unlock_irqrestore(&which->lock, flags);
+    
+    while(list)
+    {
+        struct tasklet_struct *t = list;
+        
+        /* advance, lest we forget */
+                list = list->next;
+        
+        /* execute tasklet if it has my priority and is free */
+                if ((t->owner == which->current_owner) && tasklet_trylock(t)) {
+                        if (!atomic_read(&t->count)) {
+                                if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+                {
+                                        BUG();
+                }
+                TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__);
+                                t->func(t->data);
+                                tasklet_unlock(t);
+                                
+                                atomic_dec(count);
+                                
+                                continue;  /* process more tasklets */
+                        }
+                        tasklet_unlock(t);
+                }
+        
+        TRACE_CUR("%s: Could not invoke tasklet.  Requeuing.\n", __FUNCTION__);
+        
+                /* couldn't process tasklet.  put it back at the end of the queue. */
+                if(pending_tasklets == &which->pending_tasklets)
+                        ___litmus_tasklet_schedule(t, which, 0);
+                else
+                        ___litmus_tasklet_hi_schedule(t, which, 0);
+    }
+}
+// returns 1 if priorities need to be changed to continue processing
+// pending tasklets.
+static int do_litirq(struct klitirqd_info* which)
+{
+    u32 pending;
+    int resched = 0;
+    
+    if(in_interrupt())
+    {
+        TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__);
+        return(0);
+    }
+        
+        if(which->klitirqd != current)
+        {
+        TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
+                                  __FUNCTION__, current->comm, current->pid,
+                                  which->klitirqd->comm, which->klitirqd->pid);
+        return(0);
+        }
+        
+    if(!is_realtime(current))
+    {
+        TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n",
+                                  __FUNCTION__, current->policy);
+        return(0);
+    }
+    
+    /* We only handle tasklets & work objects, no need for RCU triggers? */
+    
+    pending = litirq_pending(which);
+    if(pending)
+    {
+        /* extract the work to do and do it! */
+        if(pending & LIT_TASKLET_HI)
+        {
+            TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__);
+            do_lit_tasklet(which, &which->pending_tasklets_hi);
+            resched = tasklet_ownership_change(which, LIT_TASKLET_HI);
+            
+            if(resched)
+            {
+                TRACE_CUR("%s: HI tasklets of another owner remain. "
+                                                  "Skipping any LOW tasklets.\n", __FUNCTION__);
+            }
+        }
+        
+        if(!resched && (pending & LIT_TASKLET_LOW))
+        {
+            TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__);
+                        do_lit_tasklet(which, &which->pending_tasklets);
+                        resched = tasklet_ownership_change(which, LIT_TASKLET_LOW);
+                        
+            if(resched)
+            {
+                TRACE_CUR("%s: LOW tasklets of another owner remain. "
+                                                  "Skipping any work objects.\n", __FUNCTION__);
+            }
+        }
+    }
+        
+        return(resched);
+}
+static void do_work(struct klitirqd_info* which)
+{
+        unsigned long flags;
+        work_func_t f;
+        struct work_struct* work;
+        
+        // only execute one work-queue item to yield to tasklets.
+        // ...is this a good idea, or should we just batch them?
+        raw_spin_lock_irqsave(&which->lock, flags);
+        
+        if(!litirq_pending_work_irqoff(which))
+        {
+                raw_spin_unlock_irqrestore(&which->lock, flags);
+                goto no_work;
+        }
+        work = list_first_entry(&which->worklist, struct work_struct, entry);
+        list_del_init(&work->entry);
+        
+        if(list_empty(&which->worklist))
+        {
+                which->pending &= ~LIT_WORK;
+        }
+        
+        raw_spin_unlock_irqrestore(&which->lock, flags);
+        
+        
+        
+        /* safe to read current_owner outside of lock since only this thread
+         may write to the pointer. */
+        if(work->owner == which->current_owner)
+        {
+                TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
+                // do the work!
+                work_clear_pending(work);
+                f = work->func;
+                f(work);  /* can't touch 'work' after this point,
+                                   the user may have freed it. */
+                
+                atomic_dec(&which->num_work_pending);
+        }
+        else
+        {
+                TRACE_CUR("%s: Could not invoke work object.  Requeuing.\n",
+                                  __FUNCTION__);
+                ___litmus_schedule_work(work, which, 0);
+        }
+        
+no_work:
+        return;
+}
+static int set_litmus_daemon_sched(void)
+{
+    /* set up a daemon job that will never complete.
+       it should only ever run on behalf of another
+       real-time task.
+       TODO: Transition to a new job whenever a
+       new tasklet is handled */
+    
+    int ret = 0;
+        struct rt_task tp = {
+                .exec_cost = 0,
+                .period = 1000000000, /* dummy 1 second period */
+                .phase = 0,
+                .cpu = task_cpu(current),
+                .budget_policy = NO_ENFORCEMENT,
+                .cls = RT_CLASS_BEST_EFFORT
+        };
+        
+        struct sched_param param = { .sched_priority = 0};
+        
+        
+        /* set task params, mark as proxy thread, and init other data */
+        tsk_rt(current)->task_params = tp;
+        tsk_rt(current)->is_proxy_thread = 1;
+        tsk_rt(current)->cur_klitirqd = NULL;
+        //init_MUTEX(&tsk_rt(current)->klitirqd_sem);
+        mutex_init(&tsk_rt(current)->klitirqd_sem);
+        //init_completion(&tsk_rt(current)->klitirqd_sem);
+        atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD);
+        
+        /* inform the OS we're SCHED_LITMUS --
+           sched_setscheduler_nocheck() calls litmus_admit_task(). */
+        sched_setscheduler_nocheck(current, SCHED_LITMUS, &param);      
+    return ret;
+}
+static void enter_execution_phase(struct klitirqd_info* which,
+                                                                  struct mutex* sem,
+                                                                  struct task_struct* t)
+{
+        TRACE_CUR("%s: Trying to enter execution phase. "
+                          "Acquiring semaphore of %s/%d\n", __FUNCTION__,
+                          t->comm, t->pid);
+        down_and_set_stat(current, HELD, sem);
+        TRACE_CUR("%s: Execution phase entered! "
+                          "Acquired semaphore of %s/%d\n", __FUNCTION__,
+                          t->comm, t->pid);
+}
+static void exit_execution_phase(struct klitirqd_info* which,
+                                                                 struct mutex* sem,
+                                                                 struct task_struct* t)
+{
+        TRACE_CUR("%s: Exiting execution phase. "
+                          "Releasing semaphore of %s/%d\n", __FUNCTION__,
+                          t->comm, t->pid);
+        if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD)
+        {
+                up_and_set_stat(current, NOT_HELD, sem);
+                TRACE_CUR("%s: Execution phase exited! "
+                                  "Released semaphore of %s/%d\n", __FUNCTION__,
+                                  t->comm, t->pid);             
+        }
+        else
+        {
+                TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__);
+        }
+}
+/* main loop for klitsoftirqd */
+static int run_klitirqd(void* unused)
+{
+        struct klitirqd_info* which = &klitirqds[klitirqd_id(current)];
+        struct mutex* sem;
+        struct task_struct* owner;
+    int rt_status = set_litmus_daemon_sched();
+    if(rt_status != 0)
+    {
+        TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
+        goto rt_failed;
+    }
+    
+        atomic_inc(&num_ready_klitirqds);
+        
+        set_current_state(TASK_INTERRUPTIBLE);
+    
+        while (!kthread_should_stop())
+        {
+                preempt_disable();
+                if (!litirq_pending(which))
+                {
+            /* sleep for work */
+            TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n",
+                                          __FUNCTION__);
+                        preempt_enable_no_resched();
+            schedule();
+                        if(kthread_should_stop()) /* bail out */
+                        {
+                                TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
+                                continue;
+                        }
+                        preempt_disable();
+                }
+        
+                __set_current_state(TASK_RUNNING);
+                while (litirq_pending_and_sem_and_owner(which, &sem, &owner))
+                {
+                        int needs_resched = 0;
+                        preempt_enable_no_resched();
+        
+                        BUG_ON(sem == NULL);
+                        // wait to enter execution phase; wait for 'current_owner' to block.
+                        enter_execution_phase(which, sem, owner);
+                        if(kthread_should_stop())
+                        {
+                                TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
+                                break;
+                        }
+                        preempt_disable();
+                        
+                        /* Double check that there's still pending work and the owner hasn't
+                         * changed. Pending items may have been flushed while we were sleeping.
+                         */
+                        if(litirq_pending_with_owner(which, owner))
+                        {
+                                TRACE_CUR("%s: Executing tasklets and/or work objects.\n",
+                                                  __FUNCTION__);                                
+                                
+                                needs_resched = do_litirq(which);
+                                
+                                preempt_enable_no_resched();
+                        
+                                // work objects are preemptible.
+                                if(!needs_resched)
+                                {
+                                        do_work(which);
+                                }            
+                        
+                                // exit execution phase.
+                                exit_execution_phase(which, sem, owner);
+                                
+                                TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__);
+                                reeval_prio(which); /* check if we need to change priority here */
+                        }
+                        else
+                        {
+                                TRACE_CUR("%s: Pending work was flushed!  Prev owner was %s/%d\n",
+                                                                __FUNCTION__,
+                                                                owner->comm, owner->pid);                                       
+                                preempt_enable_no_resched();
+                                // exit execution phase.
+                                exit_execution_phase(which, sem, owner);
+                        }
+                        cond_resched();
+                        preempt_disable();
+                }
+                preempt_enable();
+                set_current_state(TASK_INTERRUPTIBLE);
+        }
+        __set_current_state(TASK_RUNNING);
+        
+        atomic_dec(&num_ready_klitirqds);
+rt_failed:
+    litmus_exit_task(current);
+    
+        return rt_status;
+}
+struct klitirqd_launch_data
+{
+        int* cpu_affinity;
+        struct work_struct work;
+};
+/* executed by a kworker from workqueues */
+static void launch_klitirqd(struct work_struct *work)
+{
+    int i;
+        
+        struct klitirqd_launch_data* launch_data =
+                container_of(work, struct klitirqd_launch_data, work);
+    
+    TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
+    
+    /* create the daemon threads */
+    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+    {
+                if(launch_data->cpu_affinity)
+                {
+                        klitirqds[i].klitirqd = 
+                                kthread_create(
+                                   run_klitirqd,
+                                   /* treat the affinity as a pointer, we'll cast it back later */
+                                   (void*)(long long)launch_data->cpu_affinity[i],
+                                   "klitirqd_th%d/%d",
+                                   i,
+                                   launch_data->cpu_affinity[i]);
+                        
+                        /* litmus will put is in the right cluster. */
+                        kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]);
+                }
+                else
+                {
+                        klitirqds[i].klitirqd = 
+                                kthread_create(
+                                   run_klitirqd,
+                                   /* treat the affinity as a pointer, we'll cast it back later */
+                                   (void*)(long long)(-1),
+                                   "klitirqd_th%d",
+                                   i);
+                }
+    }    
+    
+    TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
+    
+    /* unleash the daemons */
+    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+    {
+        wake_up_process(klitirqds[i].klitirqd);
+    }
+    
+        if(launch_data->cpu_affinity)
+                kfree(launch_data->cpu_affinity);
+        kfree(launch_data);
+}
+void spawn_klitirqd(int* affinity)
+{
+    int i;
+    struct klitirqd_launch_data* delayed_launch;
+        
+        if(atomic_read(&num_ready_klitirqds) != 0)
+        {
+                TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n");
+                return;
+        }
+    
+    /* init the tasklet & work queues */
+    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+    {
+                klitirqds[i].terminating = 0;
+                klitirqds[i].pending = 0;
+                
+                klitirqds[i].num_hi_pending.counter = 0;
+                klitirqds[i].num_low_pending.counter = 0;
+                klitirqds[i].num_work_pending.counter = 0;
+                
+        klitirqds[i].pending_tasklets_hi.head = NULL;
+        klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head;         
+                
+        klitirqds[i].pending_tasklets.head = NULL;
+        klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head;
+                INIT_LIST_HEAD(&klitirqds[i].worklist);
+                
+                raw_spin_lock_init(&klitirqds[i].lock);
+    }
+    
+    /* wait to flush the initializations to memory since other threads
+       will access it. */    
+    mb();
+    
+    /* tell a work queue to launch the threads.  we can't make scheduling
+       calls since we're in an atomic state. */
+    TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__);
+        delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC);
+        if(affinity)
+        {
+                delayed_launch->cpu_affinity =
+                        kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC);
+                
+                memcpy(delayed_launch->cpu_affinity, affinity,
+                        sizeof(int)*NR_LITMUS_SOFTIRQD);
+        }
+        else
+        {
+                delayed_launch->cpu_affinity = NULL;
+        }
+    INIT_WORK(&delayed_launch->work, launch_klitirqd);
+    schedule_work(&delayed_launch->work);
+}
+void kill_klitirqd(void)
+{
+        if(!klitirqd_is_dead())
+        {
+        int i;
+    
+        TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
+    
+        for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+        {
+                        if(klitirqds[i].terminating != 1)
+                        {
+                                klitirqds[i].terminating = 1;
+                                mb(); /* just to be sure? */
+                                flush_pending(klitirqds[i].klitirqd, NULL);
+                                /* signal termination */
+                        kthread_stop(klitirqds[i].klitirqd);
+                        }
+        }
+        }
+}
+int klitirqd_is_ready(void)
+{
+        return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD);
+}
+int klitirqd_is_dead(void)
+{
+        return(atomic_read(&num_ready_klitirqds) == 0);
+}
+struct task_struct* get_klitirqd(unsigned int k_id)
+{
+        return(klitirqds[k_id].klitirqd);
+}
+void flush_pending(struct task_struct* klitirqd_thread,
+                                   struct task_struct* owner)
+{       
+        unsigned int k_id = klitirqd_id(klitirqd_thread);
+        struct klitirqd_info *which = &klitirqds[k_id];
+        
+        unsigned long flags;
+        struct tasklet_struct *list;
+        u32 work_flushed = 0;
+        
+        raw_spin_lock_irqsave(&which->lock, flags);
+        
+        //__dump_state(which, "flush_pending: before");
+        
+        // flush hi tasklets.
+        if(litirq_pending_hi_irqoff(which))
+        {
+                which->pending &= ~LIT_TASKLET_HI;
+                
+                list = which->pending_tasklets_hi.head;
+                which->pending_tasklets_hi.head = NULL;
+                which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head;
+                
+                TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__);
+                
+                while(list)
+                {
+                        struct tasklet_struct *t = list;
+                        list = list->next;
+                        
+                        if(likely((t->owner == owner) || (owner == NULL)))
+                        {
+                                if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
+                                {
+                                        BUG();
+                                }
+                                work_flushed |= LIT_TASKLET_HI;
+                                t->owner = NULL;
+                                // WTF?
+                                if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+                                {
+                                        atomic_dec(&which->num_hi_pending);
+                                        ___tasklet_hi_schedule(t);
+                                }
+                                else
+                                {
+                                        TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
+                                        BUG();
+                                }
+                        }
+                        else
+                        {
+                                TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__);
+                                // put back on queue.
+                                ___litmus_tasklet_hi_schedule(t, which, 0);
+                        }
+                }
+        }
+        
+        // flush low tasklets.
+        if(litirq_pending_low_irqoff(which))
+        {
+                which->pending &= ~LIT_TASKLET_LOW;
+                
+                list = which->pending_tasklets.head;
+                which->pending_tasklets.head = NULL;
+                which->pending_tasklets.tail = &which->pending_tasklets.head;
+                
+                TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__);
+                
+                while(list)
+                {
+                        struct tasklet_struct *t = list;
+                        list = list->next;
+                        
+                        if(likely((t->owner == owner) || (owner == NULL)))
+                        {
+                                if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
+                                {
+                                        BUG();
+                                }
+                                work_flushed |= LIT_TASKLET_LOW;
+                                
+                                t->owner = NULL;
+                                sched_trace_tasklet_end(owner, 1ul);
+                                if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+                                {
+                                        atomic_dec(&which->num_low_pending);
+                                        ___tasklet_schedule(t);
+                                }
+                                else
+                                {
+                                        TRACE("%s: dropped tasklet??\n", __FUNCTION__);
+                                        BUG();
+                                }
+                        }
+                        else
+                        {
+                                TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__);
+                                // put back on queue
+                                ___litmus_tasklet_schedule(t, which, 0);
+                        }
+                }
+        }
+        
+        // flush work objects
+        if(litirq_pending_work_irqoff(which))
+        {
+                which->pending &= ~LIT_WORK;
+                
+                TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__);
+                
+                while(!list_empty(&which->worklist))
+                {
+                        struct work_struct* work =
+                                list_first_entry(&which->worklist, struct work_struct, entry);
+                        list_del_init(&work->entry);
+                        
+                        if(likely((work->owner == owner) || (owner == NULL)))
+                        {
+                                work_flushed |= LIT_WORK;
+                                atomic_dec(&which->num_work_pending);
+                                work->owner = NULL;
+                                sched_trace_work_end(owner, current, 1ul);
+                                __schedule_work(work);
+                        }
+                        else
+                        {
+                                TRACE("%s: Could not flush a work object.\n", __FUNCTION__);
+                                // put back on queue
+                                ___litmus_schedule_work(work, which, 0);
+                        }
+                }
+        }
+        
+        //__dump_state(which, "flush_pending: after (before reeval prio)");
+        
+        mb(); /* commit changes to pending flags */
+        /* reset the scheduling priority */
+        if(work_flushed)
+        {
+                __reeval_prio(which);
+                /* Try to offload flushed tasklets to Linux's ksoftirqd. */
+                if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI))
+                {
+                        wakeup_softirqd();
+                }
+        }
+        else
+        {
+                TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__);
+        }
+        raw_spin_unlock_irqrestore(&which->lock, flags);        
+}
+static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
+                                                                           struct klitirqd_info *which,
+                                                                           int wakeup)
+{
+        unsigned long flags;
+        u32 old_pending;
+        t->next = NULL;
+        
+    raw_spin_lock_irqsave(&which->lock, flags);
+    
+        //__dump_state(which, "___litmus_tasklet_schedule: before queuing");
+        
+    *(which->pending_tasklets.tail) = t;
+    which->pending_tasklets.tail = &t->next;
+   
+        old_pending = which->pending;
+        which->pending |= LIT_TASKLET_LOW;
+        atomic_inc(&which->num_low_pending);
+        
+        mb();
+        if(!old_pending && wakeup)
+        {
+                wakeup_litirqd_locked(which); /* wake up the klitirqd */
+        }
+        
+        //__dump_state(which, "___litmus_tasklet_schedule: after queuing");
+        
+    raw_spin_unlock_irqrestore(&which->lock, flags);    
+}
+int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
+{
+        int ret = 0; /* assume failure */
+    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
+    {
+        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
+        BUG();
+    }
+    
+    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
+    {
+        TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
+        BUG();        
+    }
+        if(likely(!klitirqds[k_id].terminating))
+        {
+                /* Can't accept tasklets while we're processing a workqueue
+                   because they're handled by the same thread. This case is
+                   very RARE.
+                   TODO: Use a separate thread for work objects!!!!!!
+         */
+                if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
+                {
+                        ret = 1;
+                        ___litmus_tasklet_schedule(t, &klitirqds[k_id], 1);
+                }
+                else
+                {
+                        TRACE("%s: rejected tasklet because of pending work.\n",
+                                                __FUNCTION__);
+                }
+        }
+        return(ret);
+}
+EXPORT_SYMBOL(__litmus_tasklet_schedule);
+static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
+                                                                           struct klitirqd_info *which,
+                                                                           int wakeup)
+{
+        unsigned long flags;
+        u32 old_pending;
+        t->next = NULL;
+        
+    raw_spin_lock_irqsave(&which->lock, flags);
+    *(which->pending_tasklets_hi.tail) = t;
+    which->pending_tasklets_hi.tail = &t->next;
+    
+        old_pending = which->pending;
+        which->pending |= LIT_TASKLET_HI;
+        
+        atomic_inc(&which->num_hi_pending);
+        
+        mb();
+        if(!old_pending && wakeup)
+        {
+                wakeup_litirqd_locked(which); /* wake up the klitirqd */
+        }
+        
+    raw_spin_unlock_irqrestore(&which->lock, flags);    
+}
+int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
+{
+        int ret = 0; /* assume failure */
+    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
+    {
+        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
+        BUG();
+    }
+    
+    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
+    {
+        TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
+        BUG();        
+    }
+    
+    if(unlikely(!klitirqd_is_ready()))
+    {
+        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
+        BUG();        
+    }    
+    
+        if(likely(!klitirqds[k_id].terminating))
+        {
+                if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
+                {
+                        ret = 1;
+                        ___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1);
+                }
+                else
+                {
+                        TRACE("%s: rejected tasklet because of pending work.\n",
+                                                __FUNCTION__);
+                }
+        }
+        return(ret);
+}
+EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
+int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
+{
+        int ret = 0; /* assume failure */
+        u32 old_pending;
+        BUG_ON(!irqs_disabled());
+    
+    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
+    {
+        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
+        BUG();
+    }
+    
+    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
+    {
+        TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id);
+        BUG();        
+    }
+    
+    if(unlikely(!klitirqd_is_ready()))
+    {
+        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
+        BUG();        
+    }
+        if(likely(!klitirqds[k_id].terminating))
+        {
+        raw_spin_lock(&klitirqds[k_id].lock);
+    
+                if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
+                {
+                        ret = 1;  // success!
+                        t->next = klitirqds[k_id].pending_tasklets_hi.head;
+                klitirqds[k_id].pending_tasklets_hi.head = t;
+        
+                        old_pending = klitirqds[k_id].pending;
+                        klitirqds[k_id].pending |= LIT_TASKLET_HI;
+                
+                        atomic_inc(&klitirqds[k_id].num_hi_pending);
+   
+                        mb();
+                        if(!old_pending)
+                        wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */
+                }
+                else
+                {
+                        TRACE("%s: rejected tasklet because of pending work.\n",
+                                        __FUNCTION__);
+                }
+        raw_spin_unlock(&klitirqds[k_id].lock);
+        }
+        return(ret);
+}
+EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
+static void ___litmus_schedule_work(struct work_struct *w,
+                                                                        struct klitirqd_info *which,
+                                                                        int wakeup)
+{
+        unsigned long flags;
+        u32 old_pending;
+        raw_spin_lock_irqsave(&which->lock, flags);
+        
+        work_pending(w);
+        list_add_tail(&w->entry, &which->worklist);
+        
+        old_pending = which->pending;
+        which->pending |= LIT_WORK;
+        atomic_inc(&which->num_work_pending);
+        
+        mb();
+        if(!old_pending && wakeup)
+        {
+                wakeup_litirqd_locked(which); /* wakeup the klitirqd */
+        }
+        
+        raw_spin_unlock_irqrestore(&which->lock, flags);
+}
+int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
+{
+        int ret = 1; /* assume success */
+        if(unlikely(w->owner == NULL) || !is_realtime(w->owner))
+        {
+                TRACE("%s: No owner associated with this work object!\n", __FUNCTION__);
+                BUG();
+        }
+        
+        if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
+        {
+                TRACE("%s: No klitirqd_th%u!\n", k_id);
+                BUG();
+        }
+        
+    if(unlikely(!klitirqd_is_ready()))
+    {
+        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
+        BUG();        
+    }   
+        if(likely(!klitirqds[k_id].terminating))
+                ___litmus_schedule_work(w, &klitirqds[k_id], 1);
+        else
+                ret = 0;
+        return(ret);
+}
+EXPORT_SYMBOL(__litmus_schedule_work);
+static int set_klitirqd_sem_status(unsigned long stat)
+{
+        TRACE_CUR("SETTING STATUS FROM %d TO %d\n",
+                                        atomic_read(&tsk_rt(current)->klitirqd_sem_stat),
+                                        stat);
+        atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat);
+        //mb();
+        return(0);
+}
+static int set_klitirqd_sem_status_if_not_held(unsigned long stat)
+{
+        if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD)
+        {
+                return(set_klitirqd_sem_status(stat));
+        }
+        return(-1);
+}
+void __down_and_reset_and_set_stat(struct task_struct* t,
+                                           enum klitirqd_sem_status to_reset,
+                                           enum klitirqd_sem_status to_set,
+                                           struct mutex* sem)
+{
+#if 0
+        struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
+        struct task_struct* task = container_of(param, struct task_struct, rt_param);
+        TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
+                                        __FUNCTION__, task->comm, task->pid);
+#endif
+        mutex_lock_sfx(sem,
+                                   set_klitirqd_sem_status_if_not_held, to_reset,
+                                   set_klitirqd_sem_status, to_set);
+#if 0
+        TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
+                                        __FUNCTION__, task->comm, task->pid);
+#endif
+}
+void down_and_set_stat(struct task_struct* t,
+                                           enum klitirqd_sem_status to_set,
+                                           struct mutex* sem)
+{
+#if 0
+        struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
+        struct task_struct* task = container_of(param, struct task_struct, rt_param);
+        TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
+                                        __FUNCTION__, task->comm, task->pid);
+#endif
+        mutex_lock_sfx(sem,
+                                   NULL, 0,
+                                   set_klitirqd_sem_status, to_set);
+#if 0
+        TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
+                                        __FUNCTION__, task->comm, task->pid);
+#endif
+}
+void up_and_set_stat(struct task_struct* t,
+                                         enum klitirqd_sem_status to_set,
+                                         struct mutex* sem)
+{
+#if 0
+        struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
+        struct task_struct* task = container_of(param, struct task_struct, rt_param);
+        TRACE_CUR("%s: entered.  Unlocking semaphore of %s/%d\n",
+                                        __FUNCTION__,
+                                        task->comm, task->pid);
+#endif
+        mutex_unlock_sfx(sem, NULL, 0,
+                                         set_klitirqd_sem_status, to_set);
+#if 0
+        TRACE_CUR("%s: exiting.  Unlocked semaphore of %s/%d\n",
+                                        __FUNCTION__,
+                                        task->comm, task->pid);
+#endif
+}
+void release_klitirqd_lock(struct task_struct* t)
+{
+        if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD))
+        {
+                struct mutex* sem;
+                struct task_struct* owner = t;
+                
+                if(t->state == TASK_RUNNING)
+                {
+                        TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n");
+                        return;
+                }
+                
+                if(likely(!tsk_rt(t)->is_proxy_thread))
+                {
+                        sem = &tsk_rt(t)->klitirqd_sem;
+                }
+                else
+                {
+                        unsigned int k_id = klitirqd_id(t);
+                        owner = klitirqds[k_id].current_owner;
+                        BUG_ON(t != klitirqds[k_id].klitirqd);
+                        if(likely(owner))
+                        {
+                                sem = &tsk_rt(owner)->klitirqd_sem;
+                        }
+                        else
+                        {
+                                BUG();
+                                
+                                // We had the rug pulled out from under us.  Abort attempt
+                                // to reacquire the lock since our client no longer needs us.
+                                TRACE_CUR("HUH?!  How did this happen?\n");
+                                atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
+                                return;
+                        }
+                }
+                
+                //TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid);
+                up_and_set_stat(t, NEED_TO_REACQUIRE, sem);
+                //TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid);
+        }
+        /*
+        else if(is_realtime(t))
+        {
+                TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
+        }
+        */
+}
+int reacquire_klitirqd_lock(struct task_struct* t)
+{
+        int ret = 0;
+        if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE))
+        {
+                struct mutex* sem;
+                struct task_struct* owner = t;
+                
+                if(likely(!tsk_rt(t)->is_proxy_thread))
+                {
+                        sem = &tsk_rt(t)->klitirqd_sem;
+                }
+                else
+                {
+                        unsigned int k_id = klitirqd_id(t);             
+                        //struct task_struct* owner = klitirqds[k_id].current_owner;
+                        owner = klitirqds[k_id].current_owner;
+                        
+                        BUG_ON(t != klitirqds[k_id].klitirqd);
+                        if(likely(owner))
+                        {
+                                sem = &tsk_rt(owner)->klitirqd_sem;
+                        }
+                        else
+                        {
+                                // We had the rug pulled out from under us.  Abort attempt
+                                // to reacquire the lock since our client no longer needs us.
+                                TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n");
+                                atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
+                                return(0);
+                        }
+                }
+                
+                //TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid);
+                __down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem);
+                //TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid);
+        }
+        /*
+        else if(is_realtime(t))
+        {
+                TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
+        }
+        */
+        return(ret);
+}
diff --git a/litmus/locking.c b/litmus/locking.c
index 2693f1aca859..cfce98e7480d 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -121,7 +121,6 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
        return(t);
 }
 #else
 struct fdso_ops generic_lock_ops = {};
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
new file mode 100644
index 000000000000..78f035244d21
--- /dev/null
+++ b/litmus/nvidia_info.c
@@ -0,0 +1,526 @@
+#include <linux/module.h>
+#include <linux/semaphore.h>
+#include <linux/pci.h>
+#include <litmus/sched_trace.h>
+#include <litmus/nvidia_info.h>
+#include <litmus/litmus.h>
+typedef unsigned char      NvV8;  /* "void": enumerated or multiple fields   */
+typedef unsigned short     NvV16; /* "void": enumerated or multiple fields   */
+typedef unsigned char      NvU8;  /* 0 to 255                                */
+typedef unsigned short     NvU16; /* 0 to 65535                              */
+typedef signed char        NvS8;  /* -128 to 127                             */
+typedef signed short       NvS16; /* -32768 to 32767                         */
+typedef float              NvF32; /* IEEE Single Precision (S1E8M23)         */
+typedef double             NvF64; /* IEEE Double Precision (S1E11M52)        */
+typedef unsigned int       NvV32; /* "void": enumerated or multiple fields   */
+typedef unsigned int       NvU32; /* 0 to 4294967295                         */
+typedef unsigned long long NvU64; /* 0 to 18446744073709551615          */
+typedef union
+{
+    volatile NvV8 Reg008[1];
+    volatile NvV16 Reg016[1];
+    volatile NvV32 Reg032[1];
+} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;
+typedef struct
+{
+    NvU64 address;
+    NvU64 size;
+    NvU32 offset;
+    NvU32 *map;
+    litmus_nv_phwreg_t map_u;
+} litmus_nv_aperture_t;
+typedef struct
+{
+    void  *priv;                    /* private data */
+    void  *os_state;                /* os-specific device state */
+        
+    int    rmInitialized;
+    int    flags;
+        
+    /* PCI config info */
+    NvU32 domain;
+    NvU16 bus;
+    NvU16 slot;
+    NvU16 vendor_id;
+    NvU16 device_id;
+    NvU16 subsystem_id;
+    NvU32 gpu_id;
+    void *handle;
+        
+    NvU32 pci_cfg_space[16];
+        
+    /* physical characteristics */
+    litmus_nv_aperture_t bars[3];
+    litmus_nv_aperture_t *regs;
+    litmus_nv_aperture_t *fb, ud;
+    litmus_nv_aperture_t agp;
+        
+    NvU32  interrupt_line;
+        
+    NvU32 agp_config;
+    NvU32 agp_status;
+        
+    NvU32 primary_vga;
+        
+    NvU32 sim_env;
+        
+    NvU32 rc_timer_enabled;
+        
+    /* list of events allocated for this device */
+    void *event_list;
+        
+    void *kern_mappings;
+        
+} litmus_nv_state_t;
+typedef struct work_struct litmus_nv_task_t;
+typedef struct litmus_nv_work_s {
+    litmus_nv_task_t task;
+    void *data;
+} litmus_nv_work_t;
+typedef struct litmus_nv_linux_state_s {
+    litmus_nv_state_t nv_state;
+    atomic_t usage_count;
+        
+    struct pci_dev *dev;
+    void *agp_bridge;
+    void *alloc_queue;
+        
+    void *timer_sp;
+    void *isr_sp;
+    void *pci_cfgchk_sp;
+    void *isr_bh_sp;
+#ifdef CONFIG_CUDA_4_0
+        char registry_keys[512];
+#endif
+    /* keep track of any pending bottom halfes */
+    struct tasklet_struct tasklet;
+    litmus_nv_work_t work;
+        
+    /* get a timer callback every second */
+    struct timer_list rc_timer;
+        
+    /* lock for linux-specific data, not used by core rm */
+    struct semaphore ldata_lock;
+        
+    /* lock for linux-specific alloc queue */
+    struct semaphore at_lock;
+        
+#if 0
+#if defined(NV_USER_MAP)
+    /* list of user mappings */
+    struct nv_usermap_s *usermap_list;
+        
+    /* lock for VMware-specific mapping list */
+    struct semaphore mt_lock;
+#endif /* defined(NV_USER_MAP) */       
+#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
+        void *apm_nv_dev;
+#endif
+#endif
+        
+    NvU32 device_num;
+    struct litmus_nv_linux_state_s *next;
+} litmus_nv_linux_state_t;
+void dump_nvidia_info(const struct tasklet_struct *t)
+{
+        litmus_nv_state_t* nvstate = NULL;
+        litmus_nv_linux_state_t* linuxstate =  NULL;
+        struct pci_dev* pci = NULL;
+        
+        nvstate = (litmus_nv_state_t*)(t->data);
+        
+        if(nvstate)
+        {
+                TRACE("NV State:\n"
+                          "\ttasklet ptr = %p\n"
+                          "\tstate ptr = %p\n"
+                          "\tprivate data ptr = %p\n"
+                          "\tos state ptr = %p\n"
+                          "\tdomain = %u\n"
+                          "\tbus = %u\n"
+                          "\tslot = %u\n"
+                          "\tvender_id = %u\n"
+                          "\tdevice_id = %u\n"
+                          "\tsubsystem_id = %u\n"
+                          "\tgpu_id = %u\n"
+                          "\tinterrupt_line = %u\n",
+                          t,
+                          nvstate,
+                          nvstate->priv,
+                          nvstate->os_state,
+                          nvstate->domain,
+                          nvstate->bus,
+                          nvstate->slot,
+                          nvstate->vendor_id,
+                          nvstate->device_id,
+                          nvstate->subsystem_id,
+                          nvstate->gpu_id,
+                          nvstate->interrupt_line);
+                
+                linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
+        }
+        else
+        {
+                TRACE("INVALID NVSTATE????\n");
+        }
+        
+        if(linuxstate)
+        {
+                int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
+                int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
+                int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
+                
+                
+                TRACE("LINUX NV State:\n"
+                          "\tlinux nv state ptr: %p\n"
+                          "\taddress of tasklet: %p\n"
+                          "\taddress of work: %p\n"
+                          "\tusage_count: %d\n"
+                          "\tdevice_num: %u\n"
+                          "\ttasklet addr == this tasklet: %d\n"
+                          "\tpci: %p\n",
+                          linuxstate,
+                          &(linuxstate->tasklet),
+                          &(linuxstate->work),
+                          atomic_read(&(linuxstate->usage_count)),
+                          linuxstate->device_num,
+                          (t == &(linuxstate->tasklet)),
+                          linuxstate->dev);
+                
+                pci = linuxstate->dev;
+                
+                TRACE("Offsets:\n"
+                          "\tOffset from LinuxState: %d, %x\n"
+                          "\tOffset from NVState: %d, %x\n"
+                          "\tOffset from parameter: %d, %x\n"
+                          "\tdevice_num: %u\n",
+                          ls_offset, ls_offset,
+                          ns_offset_raw, ns_offset_raw,
+                          ns_offset_desired, ns_offset_desired,
+                          *((u32*)((void*)nvstate + ns_offset_desired)));
+        }
+        else
+        {
+                TRACE("INVALID LINUXNVSTATE?????\n");
+        }
+#if 0
+        if(pci)
+        {
+                TRACE("PCI DEV Info:\n"
+                          "pci device ptr: %p\n"
+                          "\tdevfn = %d\n"
+                          "\tvendor = %d\n"
+                          "\tdevice = %d\n"
+                          "\tsubsystem_vendor = %d\n"
+                          "\tsubsystem_device = %d\n"
+                          "\tslot # = %d\n",
+                          pci,
+                          pci->devfn,
+                          pci->vendor,
+                          pci->device,
+                          pci->subsystem_vendor,
+                          pci->subsystem_device,
+                          pci->slot->number);
+        }
+        else
+        {
+                TRACE("INVALID PCIDEV PTR?????\n");
+        }
+#endif
+}
+static struct module* nvidia_mod = NULL;
+int init_nvidia_info(void)
+{
+        mutex_lock(&module_mutex);
+        nvidia_mod = find_module("nvidia");
+        mutex_unlock(&module_mutex);    
+        if(nvidia_mod != NULL)
+        {
+                TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
+                          (void*)(nvidia_mod->module_core),
+                          (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
+                init_nv_device_reg();
+                return(0);
+        }
+        else
+        {
+                TRACE("%s : Could not find NVIDIA module!  Loaded?\n", __FUNCTION__);
+                return(-1);
+        }
+}
+/* works with pointers to static data inside the module too. */
+int is_nvidia_func(void* func_addr)
+{
+        int ret = 0;
+        if(nvidia_mod)
+        {
+                ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
+                /*
+                if(ret)
+                {
+                        TRACE("%s : %p is in NVIDIA module: %d\n",
+                                __FUNCTION__, func_addr, ret);
+                }*/
+        }
+        
+        return(ret);
+}
+u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
+{
+        // life is too short to use hard-coded offsets.  update this later.
+        litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
+        litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
+        BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
+        return(linuxstate->device_num);
+        //int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
+#if 0
+        // offset determined though observed behavior of the NV driver.
+        //const int DEVICE_NUM_OFFSET = 0x480;  // CUDA 4.0 RC1
+        //const int DEVICE_NUM_OFFSET = 0x510;  // CUDA 4.0 RC2
+        void* state = (void*)(t->data);
+        void* device_num_ptr = state + DEVICE_NUM_OFFSET;
+        
+        //dump_nvidia_info(t);
+        return(*((u32*)device_num_ptr));
+#endif
+}
+u32 get_work_nv_device_num(const struct work_struct *t)
+{
+        // offset determined though observed behavior of the NV driver.
+        const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
+        void* state = (void*)(t);
+        void** device_num_ptr = state + DEVICE_NUM_OFFSET;
+        return(*((u32*)(*device_num_ptr)));
+}
+typedef struct {
+        raw_spinlock_t  lock;
+        struct task_struct *device_owner;
+}nv_device_registry_t;
+static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
+int init_nv_device_reg(void)
+{
+        int i;
+        
+        //memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
+        
+        for(i = 0; i < NV_DEVICE_NUM; ++i)
+        {
+                raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
+                NV_DEVICE_REG[i].device_owner = NULL;
+        }
+                                 
+        return(1);
+}
+/* use to get nv_device_id by given owner.
+ (if return -1, can't get the assocaite device id)*/
+/*
+int get_nv_device_id(struct task_struct* owner)
+{
+        int i;
+        if(!owner)
+        {
+                return(-1);
+        }
+        for(i = 0; i < NV_DEVICE_NUM; ++i)
+        {
+                if(NV_DEVICE_REG[i].device_owner == owner)
+                        return(i);
+        }
+        return(-1); 
+}
+*/
+static int __reg_nv_device(int reg_device_id)
+{
+    struct task_struct* old =
+                cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner,
+                                NULL,
+                                current);
+        mb();
+        if(likely(old == NULL))
+        {
+                down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem);
+                TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id);
+                return(0);
+        }   
+        else
+        {   
+                TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
+                return(-EBUSY);    
+        }
+#if 0
+        //unsigned long flags;
+        //raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags);
+        //lock_nv_registry(reg_device_id, &flags);
+        if(likely(NV_DEVICE_REG[reg_device_id].device_owner == NULL))
+        {
+                NV_DEVICE_REG[reg_device_id].device_owner = current;
+                mb(); // needed?
+                // release spin lock before chance of going to sleep.
+                //raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags);        
+                //unlock_nv_registry(reg_device_id, &flags);
+                down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem);
+                TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id);
+                return(0);
+        }
+        else
+        {
+                //raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags);
+                //unlock_nv_registry(reg_device_id, &flags);
+                TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
+                return(-EBUSY);         
+        }
+#endif
+}
+static int __clear_reg_nv_device(int de_reg_device_id)
+{
+        int ret;
+        unsigned long flags;
+    struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
+        struct task_struct* old;
+        
+        lock_nv_registry(de_reg_device_id, &flags);
+        
+        old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner,
+                                current,
+                                NULL);
+        
+        mb();
+                            
+        if(likely(old == current))
+        {   
+                flush_pending(klitirqd_th, current);
+                //unlock_nv_registry(de_reg_device_id, &flags);
+                
+                up_and_set_stat(current, NOT_HELD, &tsk_rt(current)->klitirqd_sem);
+                unlock_nv_registry(de_reg_device_id, &flags);
+                ret = 0;
+                
+                TRACE_CUR("%s: semaphore released.\n",__FUNCTION__);
+        }
+        else
+        {
+                unlock_nv_registry(de_reg_device_id, &flags);
+                ret = -EINVAL;
+                
+                if(old)
+                        TRACE_CUR("%s: device %d is not registered for this process's use!  %s/%d is!\n",
+                                          __FUNCTION__, de_reg_device_id, old->comm, old->pid);
+                else
+                        TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n",
+                                          __FUNCTION__, de_reg_device_id);
+        }
+        return(ret);
+}
+int reg_nv_device(int reg_device_id, int reg_action)
+{
+        int ret;
+        if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
+        {
+                if(reg_action)
+                        ret = __reg_nv_device(reg_device_id);
+                else
+                        ret = __clear_reg_nv_device(reg_device_id);
+        }
+        else
+        {
+                ret = -ENODEV;
+        }
+        return(ret);
+}
+/* use to get the owner of nv_device_id. */
+struct task_struct* get_nv_device_owner(u32 target_device_id)
+{
+        struct task_struct* owner;
+        BUG_ON(target_device_id >= NV_DEVICE_NUM);
+        owner = NV_DEVICE_REG[target_device_id].device_owner;
+        return(owner);
+}
+void lock_nv_registry(u32 target_device_id, unsigned long* flags)
+{
+        BUG_ON(target_device_id >= NV_DEVICE_NUM);
+        if(in_interrupt())
+                TRACE("Locking registry for %d.\n", target_device_id);
+        else
+                TRACE_CUR("Locking registry for %d.\n", target_device_id);
+        raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
+}
+void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
+{
+        BUG_ON(target_device_id >= NV_DEVICE_NUM);
+        if(in_interrupt())
+                TRACE("Unlocking registry for %d.\n", target_device_id);
+        else
+                TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
+        raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
+}
+void increment_nv_int_count(u32 device)
+{
+        unsigned long flags;
+        struct task_struct* owner;
+        lock_nv_registry(device, &flags);
+        owner = NV_DEVICE_REG[device].device_owner;
+        if(owner)
+        {
+                atomic_inc(&tsk_rt(owner)->nv_int_count);
+        }
+        unlock_nv_registry(device, &flags);
+}
+EXPORT_SYMBOL(increment_nv_int_count);
diff --git a/litmus/preempt.c b/litmus/preempt.c
index ebe2e3461895..08b98c3b57bf 100644
--- a/litmus/preempt.c
+++ b/litmus/preempt.c
@@ -30,8 +30,11 @@ void sched_state_will_schedule(struct task_struct* tsk)
                /* Litmus tasks should never be subject to a remote
                 * set_tsk_need_resched(). */
                BUG_ON(is_realtime(tsk));
+/*
        TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
                   __builtin_return_address(0));
+*/
 }
 /* Called by the IPI handler after another CPU called smp_send_resched(). */
@@ -43,13 +46,17 @@ void sched_state_ipi(void)
                /* Cause scheduler to be invoked.
                 * This will cause a transition to WILL_SCHEDULE. */
                set_tsk_need_resched(current);
+                /*
                TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
                            current->comm, current->pid);
+                */
        } else {
                /* ignore */
+                /*
                TRACE_STATE("ignoring IPI in state %x (%s)\n",
                            get_sched_state(),
                            sched_state_name(get_sched_state()));
+                */
        }
 }
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 73fe1c442a0d..9b0a8d3b624d 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -29,6 +29,7 @@
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
 #include <linux/module.h>
@@ -45,7 +46,18 @@
 /* to configure the cluster size */
 #include <litmus/litmus_proc.h>
-#include <linux/uaccess.h>
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+#include <litmus/affinity.h>
+#endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+#include <litmus/litmus_softirq.h>
+#endif
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/nvidia_info.h>
+#endif
 /* Reference configuration variable. Determines which cache level is used to
 * group CPUs into clusters.  GLOBAL_CLUSTER, which is the default, means that
@@ -95,7 +107,7 @@ typedef struct clusterdomain {
        struct bheap_node *heap_node;
        struct bheap      cpu_heap;
        /* lock for this cluster */
-#define lock domain.ready_lock
+#define cedf_lock domain.ready_lock
 } cedf_domain_t;
 /* a cedf_domain per cluster; allocation is done at init/activation time */
@@ -257,21 +269,50 @@ static noinline void requeue(struct task_struct* task)
        }
 }
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+static cpu_entry_t* cedf_get_nearest_available_cpu(
+                                cedf_domain_t *cluster, cpu_entry_t* start)
+{
+        cpu_entry_t* affinity;
+        get_nearest_available_cpu(affinity, start, cedf_cpu_entries, -1);
+        /* make sure CPU is in our cluster */
+        if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
+                return(affinity);
+        else
+                return(NULL);
+}
+#endif
 /* check for any necessary preemptions */
 static void check_for_preemptions(cedf_domain_t *cluster)
 {
        struct task_struct *task;
-        cpu_entry_t* last;
+        cpu_entry_t *last;
        for(last = lowest_prio_cpu(cluster);
            edf_preemption_needed(&cluster->domain, last->linked);
            last = lowest_prio_cpu(cluster)) {
                /* preemption necessary */
                task = __take_ready(&cluster->domain);
-                TRACE("check_for_preemptions: attempting to link task %d to %d\n",
+#ifdef CONFIG_SCHED_CPU_AFFINITY
-                      task->pid, last->cpu);
+                {
+                        cpu_entry_t* affinity =
+                                        cedf_get_nearest_available_cpu(cluster,
+                                                        &per_cpu(cedf_cpu_entries, task_cpu(task)));
+                        if(affinity)
+                                last = affinity;
+                        else if(last->linked)
+                                requeue(last->linked);
+                }
+#else
                if (last->linked)
                        requeue(last->linked);
+#endif
+                TRACE("check_for_preemptions: attempting to link task %d to %d\n",
+                                task->pid, last->cpu);
                link_task_to_cpu(task, last);
                preempt(last);
        }
@@ -292,12 +333,12 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
        cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
        unsigned long flags;
-        raw_spin_lock_irqsave(&cluster->lock, flags);
+        raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
        __merge_ready(&cluster->domain, tasks);
        check_for_preemptions(cluster);
-        raw_spin_unlock_irqrestore(&cluster->lock, flags);
+        raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
 }
 /* caller holds cedf_lock */
@@ -307,6 +348,10 @@ static noinline void job_completion(struct task_struct *t, int forced)
        sched_trace_task_completion(t, forced);
+#ifdef CONFIG_LITMUS_NVIDIA
+        atomic_set(&tsk_rt(t)->nv_int_count, 0);
+#endif
        TRACE_TASK(t, "job_completion().\n");
        /* set flags */
@@ -378,7 +423,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
        int out_of_time, sleep, preempt, np, exists, blocks;
        struct task_struct* next = NULL;
-        raw_spin_lock(&cluster->lock);
+        raw_spin_lock(&cluster->cedf_lock);
        clear_will_schedule();
        /* sanity checking */
@@ -462,7 +507,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
                        next = prev;
        sched_state_task_picked();
-        raw_spin_unlock(&cluster->lock);
+        raw_spin_unlock(&cluster->cedf_lock);
 #ifdef WANT_ALL_SCHED_EVENTS
        TRACE("cedf_lock released, next=0x%p\n", next);
@@ -504,7 +549,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
        /* the cluster doesn't change even if t is running */
        cluster = task_cpu_cluster(t);
-        raw_spin_lock_irqsave(&cluster->domain.ready_lock, flags);
+        raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
        /* setup job params */
        release_at(t, litmus_clock());
@@ -521,20 +566,22 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
        t->rt_param.linked_on          = NO_CPU;
        cedf_job_arrival(t);
-        raw_spin_unlock_irqrestore(&(cluster->domain.ready_lock), flags);
+        raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
 }
 static void cedf_task_wake_up(struct task_struct *task)
 {
        unsigned long flags;
-        lt_t now;
+        //lt_t now;
        cedf_domain_t *cluster;
        TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
        cluster = task_cpu_cluster(task);
-        raw_spin_lock_irqsave(&cluster->lock, flags);
+        raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
+#if 0  // sporadic task model
        /* We need to take suspensions because of semaphores into
         * account! If a job resumes after being suspended due to acquiring
         * a semaphore, it should never be treated as a new job release.
@@ -556,8 +603,17 @@ static void cedf_task_wake_up(struct task_struct *task)
                        }
                }
        }
-        cedf_job_arrival(task);
+#endif
-        raw_spin_unlock_irqrestore(&cluster->lock, flags);
+        //BUG_ON(tsk_rt(task)->linked_on != NO_CPU);
+        set_rt_flags(task, RT_F_RUNNING);  // periodic model
+        if(tsk_rt(task)->linked_on == NO_CPU)
+                cedf_job_arrival(task);
+        else
+                TRACE("WTF, mate?!\n");
+        raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
 }
 static void cedf_task_block(struct task_struct *t)
@@ -570,9 +626,9 @@ static void cedf_task_block(struct task_struct *t)
        cluster = task_cpu_cluster(t);
        /* unlink if necessary */
-        raw_spin_lock_irqsave(&cluster->lock, flags);
+        raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
        unlink(t);
-        raw_spin_unlock_irqrestore(&cluster->lock, flags);
+        raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
        BUG_ON(!is_realtime(t));
 }
@@ -584,7 +640,7 @@ static void cedf_task_exit(struct task_struct * t)
        cedf_domain_t *cluster = task_cpu_cluster(t);
        /* unlink if necessary */
-        raw_spin_lock_irqsave(&cluster->lock, flags);
+        raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
        unlink(t);
        if (tsk_rt(t)->scheduled_on != NO_CPU) {
                cpu_entry_t *cpu;
@@ -592,7 +648,7 @@ static void cedf_task_exit(struct task_struct * t)
                cpu->scheduled = NULL;
                tsk_rt(t)->scheduled_on = NO_CPU;
        }
-        raw_spin_unlock_irqrestore(&cluster->lock, flags);
+        raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
        BUG_ON(!is_realtime(t));
        TRACE_TASK(t, "RIP\n");
@@ -603,6 +659,721 @@ static long cedf_admit_task(struct task_struct* tsk)
        return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
 }
+#ifdef CONFIG_LITMUS_LOCKING
+#include <litmus/fdso.h>
+static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
+{
+        int linked_on;
+        int check_preempt = 0;  
+        
+        cedf_domain_t* cluster = task_cpu_cluster(t);
+        
+        if(prio_inh != NULL)
+                TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
+        else
+                TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
+        
+        sched_trace_eff_prio_change(t, prio_inh);
+        
+        tsk_rt(t)->inh_task = prio_inh;
+        
+        linked_on  = tsk_rt(t)->linked_on;
+        
+        /* If it is scheduled, then we need to reorder the CPU heap. */
+        if (linked_on != NO_CPU) {
+                TRACE_TASK(t, "%s: linked  on %d\n",
+                                   __FUNCTION__, linked_on);
+                /* Holder is scheduled; need to re-order CPUs.
+                 * We can't use heap_decrease() here since
+                 * the cpu_heap is ordered in reverse direction, so
+                 * it is actually an increase. */
+                bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
+                     per_cpu(cedf_cpu_entries, linked_on).hn);
+                bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
+                     per_cpu(cedf_cpu_entries, linked_on).hn);
+        } else {
+                /* holder may be queued: first stop queue changes */
+                raw_spin_lock(&cluster->domain.release_lock);
+                if (is_queued(t)) {
+                        TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
+                        
+                        /* We need to update the position of holder in some
+                         * heap. Note that this could be a release heap if we
+                         * budget enforcement is used and this job overran. */
+                        check_preempt = !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
+                        
+                } else {
+                        /* Nothing to do: if it is not queued and not linked
+                         * then it is either sleeping or currently being moved
+                         * by other code (e.g., a timer interrupt handler) that
+                         * will use the correct priority when enqueuing the
+                         * task. */
+                        TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
+                }
+                raw_spin_unlock(&cluster->domain.release_lock);
+                
+                /* If holder was enqueued in a release heap, then the following
+                 * preemption check is pointless, but we can't easily detect
+                 * that case. If you want to fix this, then consider that
+                 * simply adding a state flag requires O(n) time to update when
+                 * releasing n tasks, which conflicts with the goal to have
+                 * O(log n) merges. */
+                if (check_preempt) {
+                        /* heap_decrease() hit the top level of the heap: make
+                         * sure preemption checks get the right task, not the
+                         * potentially stale cache. */
+                        bheap_uncache_min(edf_ready_order, &cluster->domain.ready_queue);
+                        check_for_preemptions(cluster);
+                }
+        }
+}
+/* called with IRQs off */
+static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
+{
+        cedf_domain_t* cluster = task_cpu_cluster(t);
+        
+        raw_spin_lock(&cluster->cedf_lock);
+        
+        __set_priority_inheritance(t, prio_inh);
+        
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        if(tsk_rt(t)->cur_klitirqd != NULL)
+        {
+                TRACE_TASK(t, "%s/%d inherits a new priority!\n",
+                                   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
+                
+                __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
+        }
+#endif
+        
+        raw_spin_unlock(&cluster->cedf_lock);
+}
+/* called with IRQs off */
+static void __clear_priority_inheritance(struct task_struct* t)
+{
+    TRACE_TASK(t, "priority restored\n");
+        
+    if(tsk_rt(t)->scheduled_on != NO_CPU)
+    {
+                sched_trace_eff_prio_change(t, NULL);
+                
+        tsk_rt(t)->inh_task = NULL;
+        
+        /* Check if rescheduling is necessary. We can't use heap_decrease()
+         * since the priority was effectively lowered. */
+        unlink(t);
+        cedf_job_arrival(t);
+    }
+    else
+    {
+        __set_priority_inheritance(t, NULL);
+    }
+        
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        if(tsk_rt(t)->cur_klitirqd != NULL)
+        {
+                TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
+                                   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
+                
+                if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
+                {
+                        sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
+                        
+                        tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
+                        
+                        /* Check if rescheduling is necessary. We can't use heap_decrease()
+                         * since the priority was effectively lowered. */
+                        unlink(tsk_rt(t)->cur_klitirqd);
+                        cedf_job_arrival(tsk_rt(t)->cur_klitirqd);
+                }
+                else
+                {
+                        __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
+                }
+        }
+#endif
+}
+/* called with IRQs off */
+static void clear_priority_inheritance(struct task_struct* t)
+{
+        cedf_domain_t* cluster = task_cpu_cluster(t);
+        
+        raw_spin_lock(&cluster->cedf_lock);
+        __clear_priority_inheritance(t);
+        raw_spin_unlock(&cluster->cedf_lock);
+}
+#ifdef CONFIG_LITMUS_SOFTIRQD
+/* called with IRQs off */
+static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
+                                                                                          struct task_struct* old_owner,
+                                                                                          struct task_struct* new_owner)
+{
+        cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
+        
+        BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
+        
+        raw_spin_lock(&cluster->cedf_lock);
+        
+        if(old_owner != new_owner)
+        {
+                if(old_owner)
+                {
+                        // unreachable?
+                        tsk_rt(old_owner)->cur_klitirqd = NULL;
+                }
+                
+                TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
+                                   new_owner->comm, new_owner->pid);
+                
+                tsk_rt(new_owner)->cur_klitirqd = klitirqd;
+        }
+        
+        __set_priority_inheritance(klitirqd,
+                                                           (tsk_rt(new_owner)->inh_task == NULL) ?
+                                                           new_owner :
+                                                           tsk_rt(new_owner)->inh_task);
+        
+        raw_spin_unlock(&cluster->cedf_lock);
+}
+/* called with IRQs off */
+static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
+                                                                                                struct task_struct* old_owner)
+{
+        cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
+        
+        BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
+        
+        raw_spin_lock(&cluster->cedf_lock);
+    
+    TRACE_TASK(klitirqd, "priority restored\n");
+        
+    if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
+    {
+        tsk_rt(klitirqd)->inh_task = NULL;
+        
+        /* Check if rescheduling is necessary. We can't use heap_decrease()
+         * since the priority was effectively lowered. */
+        unlink(klitirqd);
+        cedf_job_arrival(klitirqd);
+    }
+    else
+    {
+        __set_priority_inheritance(klitirqd, NULL);
+    }
+        
+        tsk_rt(old_owner)->cur_klitirqd = NULL;
+        
+        raw_spin_unlock(&cluster->cedf_lock);
+}
+#endif  // CONFIG_LITMUS_SOFTIRQD
+/* ******************** KFMLP support ********************** */
+/* struct for semaphore with priority inheritance */
+struct kfmlp_queue
+{
+        wait_queue_head_t wait;
+        struct task_struct* owner;
+        struct task_struct* hp_waiter;
+        int count; /* number of waiters + holder */
+};
+struct kfmlp_semaphore
+{
+        struct litmus_lock litmus_lock;
+        
+        spinlock_t lock;
+        
+        int num_resources; /* aka k */
+        struct kfmlp_queue *queues; /* array */
+        struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
+};
+static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
+{
+        return container_of(lock, struct kfmlp_semaphore, litmus_lock);
+}
+static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
+                                                                struct kfmlp_queue* queue)
+{
+        return (queue - &sem->queues[0]);
+}
+static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
+                                                                                                  struct task_struct* holder)
+{
+        int i;
+        for(i = 0; i < sem->num_resources; ++i)
+                if(sem->queues[i].owner == holder)
+                        return(&sem->queues[i]);
+        return(NULL);
+}
+/* caller is responsible for locking */
+static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
+                                                                                 struct task_struct *skip)
+{
+        struct list_head        *pos;
+        struct task_struct      *queued, *found = NULL;
+        
+        list_for_each(pos, &kqueue->wait.task_list) {
+                queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
+                                                                                                   task_list)->private;
+                
+                /* Compare task prios, find high prio task. */
+                if (queued != skip && edf_higher_prio(queued, found))
+                        found = queued;
+        }
+        return found;
+}
+static inline struct kfmlp_queue* kfmlp_find_shortest(
+                                                                                  struct kfmlp_semaphore* sem,
+                                                                                  struct kfmlp_queue* search_start)
+{
+        // we start our search at search_start instead of at the beginning of the
+        // queue list to load-balance across all resources.
+        struct kfmlp_queue* step = search_start;
+        struct kfmlp_queue* shortest = sem->shortest_queue;
+        
+        do
+        {
+                step = (step+1 != &sem->queues[sem->num_resources]) ?
+                step+1 : &sem->queues[0];
+                if(step->count < shortest->count)
+                {
+                        shortest = step;
+                        if(step->count == 0)
+                                break; /* can't get any shorter */
+                }
+        }while(step != search_start);
+        
+        return(shortest);
+}
+static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
+{
+        /* must hold sem->lock */
+        
+        struct kfmlp_queue *my_queue = NULL;
+        struct task_struct *max_hp = NULL;
+        
+        
+        struct list_head        *pos;
+        struct task_struct      *queued;
+        int i;
+        
+        for(i = 0; i < sem->num_resources; ++i)
+        {
+                if( (sem->queues[i].count > 1) &&
+                   ((my_queue == NULL) ||
+                        (edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
+                {
+                        my_queue = &sem->queues[i];
+                }
+        }
+        
+        if(my_queue)
+        {
+                cedf_domain_t* cluster;
+                
+                max_hp = my_queue->hp_waiter;
+                BUG_ON(!max_hp);
+                TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
+                                  kfmlp_get_idx(sem, my_queue),
+                                  max_hp->comm, max_hp->pid,
+                                  kfmlp_get_idx(sem, my_queue));
+                
+                my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
+                
+                /*
+                 if(my_queue->hp_waiter)
+                 TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
+                 kfmlp_get_idx(sem, my_queue),
+                 my_queue->hp_waiter->comm,
+                 my_queue->hp_waiter->pid);
+                 else
+                 TRACE_CUR("queue %d: new hp_waiter is %p\n",
+                 kfmlp_get_idx(sem, my_queue), NULL);
+                 */
+        
+                cluster = task_cpu_cluster(max_hp);
+                raw_spin_lock(&cluster->cedf_lock);
+                
+                /*
+                 if(my_queue->owner)
+                 TRACE_CUR("queue %d: owner is %s/%d\n",
+                 kfmlp_get_idx(sem, my_queue),
+                 my_queue->owner->comm,
+                 my_queue->owner->pid);
+                 else
+                 TRACE_CUR("queue %d: owner is %p\n",
+                 kfmlp_get_idx(sem, my_queue),
+                 NULL);
+                 */
+                
+                if(tsk_rt(my_queue->owner)->inh_task == max_hp)
+                {
+                        __clear_priority_inheritance(my_queue->owner);
+                        if(my_queue->hp_waiter != NULL)
+                        {
+                                __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
+                        }
+                }
+                raw_spin_unlock(&cluster->cedf_lock);
+                
+                list_for_each(pos, &my_queue->wait.task_list)
+                {
+                        queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
+                                                                                                           task_list)->private;
+                        /* Compare task prios, find high prio task. */
+                        if (queued == max_hp)
+                        {
+                                /*
+                                 TRACE_CUR("queue %d: found entry in wait queue.  REMOVING!\n",
+                                 kfmlp_get_idx(sem, my_queue));
+                                 */
+                                __remove_wait_queue(&my_queue->wait,
+                                                                        list_entry(pos, wait_queue_t, task_list));
+                                break;
+                        }
+                }
+                --(my_queue->count);
+        }
+        
+        return(max_hp);
+}
+int cedf_kfmlp_lock(struct litmus_lock* l)
+{
+        struct task_struct* t = current;
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
+        struct kfmlp_queue* my_queue;
+        wait_queue_t wait;
+        unsigned long flags;
+        
+        if (!is_realtime(t))
+                return -EPERM;
+        
+        spin_lock_irqsave(&sem->lock, flags);
+        
+        my_queue = sem->shortest_queue;
+        
+        if (my_queue->owner) {
+                /* resource is not free => must suspend and wait */
+                TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
+                                  kfmlp_get_idx(sem, my_queue));
+                
+                init_waitqueue_entry(&wait, t);
+                
+                /* FIXME: interruptible would be nice some day */
+                set_task_state(t, TASK_UNINTERRUPTIBLE);
+                
+                __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
+                
+                /* check if we need to activate priority inheritance */
+                if (edf_higher_prio(t, my_queue->hp_waiter))
+                {
+                        my_queue->hp_waiter = t;
+                        if (edf_higher_prio(t, my_queue->owner))
+                        {
+                                set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
+                        }
+                }
+                
+                ++(my_queue->count);
+                sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
+                
+                /* release lock before sleeping */
+                spin_unlock_irqrestore(&sem->lock, flags);
+                
+                /* We depend on the FIFO order.  Thus, we don't need to recheck
+                 * when we wake up; we are guaranteed to have the lock since
+                 * there is only one wake up per release (or steal).
+                 */
+                schedule();
+                
+                
+                if(my_queue->owner == t)
+                {
+                        TRACE_CUR("queue %d: acquired through waiting\n",
+                                          kfmlp_get_idx(sem, my_queue));
+                }
+                else
+                {
+                        /* this case may happen if our wait entry was stolen
+                         between queues.  record where we went.*/
+                        my_queue = kfmlp_get_queue(sem, t);
+                        BUG_ON(!my_queue);
+                        TRACE_CUR("queue %d: acquired through stealing\n",
+                                          kfmlp_get_idx(sem, my_queue));
+                }
+        }
+        else
+        {
+                TRACE_CUR("queue %d: acquired immediately\n",
+                                  kfmlp_get_idx(sem, my_queue));
+                
+                my_queue->owner = t;
+                
+                ++(my_queue->count);
+                sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);               
+                
+                spin_unlock_irqrestore(&sem->lock, flags);
+        }
+        
+        return kfmlp_get_idx(sem, my_queue);
+}
+int cedf_kfmlp_unlock(struct litmus_lock* l)
+{
+        struct task_struct *t = current, *next;
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
+        struct kfmlp_queue *my_queue;
+        unsigned long flags;
+        int err = 0;
+        
+        spin_lock_irqsave(&sem->lock, flags);
+        
+        my_queue = kfmlp_get_queue(sem, t);
+        
+        if (!my_queue) {
+                err = -EINVAL;
+                goto out;
+        }
+        
+        /* check if there are jobs waiting for this resource */
+        next = __waitqueue_remove_first(&my_queue->wait);
+        if (next) {
+                /*
+                 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
+                 kfmlp_get_idx(sem, my_queue),
+                 next->comm, next->pid);
+                 */
+                /* next becomes the resouce holder */
+                my_queue->owner = next;
+                
+                --(my_queue->count);
+                if(my_queue->count < sem->shortest_queue->count)
+                {
+                        sem->shortest_queue = my_queue;
+                }       
+                
+                TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
+                                  kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
+                
+                /* determine new hp_waiter if necessary */
+                if (next == my_queue->hp_waiter) {
+                        TRACE_TASK(next, "was highest-prio waiter\n");
+                        /* next has the highest priority --- it doesn't need to
+                         * inherit.  However, we need to make sure that the
+                         * next-highest priority in the queue is reflected in
+                         * hp_waiter. */
+                        my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
+                        if (my_queue->hp_waiter)
+                                TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
+                        else
+                                TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
+                } else {
+                        /* Well, if next is not the highest-priority waiter,
+                         * then it ought to inherit the highest-priority
+                         * waiter's priority. */
+                        set_priority_inheritance(next, my_queue->hp_waiter);
+                }
+                
+                /* wake up next */
+                wake_up_process(next);
+        }
+        else
+        {
+                TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
+                
+                next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
+                
+                /*
+                 if(next)
+                 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
+                 kfmlp_get_idx(sem, my_queue),
+                 next->comm, next->pid);
+                 */
+                
+                my_queue->owner = next;
+                
+                if(next)
+                {
+                        TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
+                                          kfmlp_get_idx(sem, my_queue),
+                                          next->comm, next->pid);
+                        
+                        /* wake up next */
+                        wake_up_process(next);                  
+                }
+                else
+                {
+                        TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
+                        
+                        --(my_queue->count);
+                        if(my_queue->count < sem->shortest_queue->count)
+                        {
+                                sem->shortest_queue = my_queue;
+                        }
+                }
+        }
+        
+        /* we lose the benefit of priority inheritance (if any) */
+        if (tsk_rt(t)->inh_task)
+                clear_priority_inheritance(t);
+        
+out:
+        spin_unlock_irqrestore(&sem->lock, flags);
+        
+        return err;
+}
+int cedf_kfmlp_close(struct litmus_lock* l)
+{
+        struct task_struct *t = current;
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
+        struct kfmlp_queue *my_queue;
+        unsigned long flags;
+        
+        int owner;
+        
+        spin_lock_irqsave(&sem->lock, flags);
+        
+        my_queue = kfmlp_get_queue(sem, t);     
+        owner = (my_queue) ? (my_queue->owner == t) : 0;
+        
+        spin_unlock_irqrestore(&sem->lock, flags);
+        
+        if (owner)
+                cedf_kfmlp_unlock(l);
+        
+        return 0;
+}
+void cedf_kfmlp_free(struct litmus_lock* l)
+{
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
+        kfree(sem->queues);
+        kfree(sem);
+}
+static struct litmus_lock_ops cedf_kfmlp_lock_ops = {
+        .close  = cedf_kfmlp_close,
+        .lock   = cedf_kfmlp_lock,
+        .unlock = cedf_kfmlp_unlock,
+        .deallocate = cedf_kfmlp_free,
+};
+static struct litmus_lock* cedf_new_kfmlp(void* __user arg, int* ret_code)
+{
+        struct kfmlp_semaphore* sem;
+        int num_resources = 0;
+        int i;
+        
+        if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
+        {
+                *ret_code = -EINVAL;
+                return(NULL);
+        }
+        if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
+        {
+                *ret_code = -EINVAL;
+                return(NULL);
+        }
+        if(num_resources < 1)
+        {
+                *ret_code = -EINVAL;
+                return(NULL);           
+        }
+        
+        sem = kmalloc(sizeof(*sem), GFP_KERNEL);
+        if(!sem)
+        {
+                *ret_code = -ENOMEM;
+                return NULL;
+        }
+        
+        sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
+        if(!sem->queues)
+        {
+                kfree(sem);
+                *ret_code = -ENOMEM;
+                return NULL;            
+        }
+        
+        sem->litmus_lock.ops = &cedf_kfmlp_lock_ops;
+        spin_lock_init(&sem->lock);
+        sem->num_resources = num_resources;
+        
+        for(i = 0; i < num_resources; ++i)
+        {
+                sem->queues[i].owner = NULL;
+                sem->queues[i].hp_waiter = NULL;
+                init_waitqueue_head(&sem->queues[i].wait);
+                sem->queues[i].count = 0;
+        }
+        
+        sem->shortest_queue = &sem->queues[0];
+        
+        *ret_code = 0;
+        return &sem->litmus_lock;
+}
+/* **** lock constructor **** */
+static long cedf_allocate_lock(struct litmus_lock **lock, int type,
+                                                                 void* __user arg)
+{
+        int err = -ENXIO;
+        
+        /* C-EDF currently only supports the FMLP for global resources
+                WITHIN a given cluster.  DO NOT USE CROSS-CLUSTER! */
+        switch (type) {
+                case KFMLP_SEM:
+                        *lock = cedf_new_kfmlp(arg, &err);
+                        break;
+        };
+        
+        return err;
+}
+#endif  // CONFIG_LITMUS_LOCKING
 /* total number of cluster */
 static int num_clusters;
 /* we do not support cluster of different sizes */
@@ -746,6 +1517,40 @@ static long cedf_activate_plugin(void)
                        break;
                }
        }
+        
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        {
+                /* distribute the daemons evenly across the clusters. */
+                int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
+                int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
+                int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
+                
+                int daemon = 0;
+                for(i = 0; i < num_clusters; ++i)
+                {
+                        int num_on_this_cluster = num_daemons_per_cluster;
+                        if(left_over)
+                        {
+                                ++num_on_this_cluster;
+                                --left_over;
+                        }
+                        
+                        for(j = 0; j < num_on_this_cluster; ++j)
+                        {
+                                // first CPU of this cluster
+                                affinity[daemon++] = i*cluster_size;
+                        }
+                }
+        
+                spawn_klitirqd(affinity);
+                
+                kfree(affinity);
+        }
+#endif
+        
+#ifdef CONFIG_LITMUS_NVIDIA
+        init_nvidia_info();
+#endif  
        free_cpumask_var(mask);
        clusters_allocated = 1;
@@ -765,6 +1570,15 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
        .task_block             = cedf_task_block,
        .admit_task             = cedf_admit_task,
        .activate_plugin        = cedf_activate_plugin,
+#ifdef CONFIG_LITMUS_LOCKING
+        .allocate_lock  = cedf_allocate_lock,
+    .set_prio_inh   = set_priority_inheritance,
+    .clear_prio_inh = clear_priority_inheritance,       
+#endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
+        .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
+#endif  
 };
 static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 3092797480f8..d04e0703c154 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -12,6 +12,8 @@
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
 #include <litmus/litmus.h>
 #include <litmus/jobs.h>
@@ -25,6 +27,19 @@
 #include <linux/module.h>
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+#include <litmus/affinity.h>
+#endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+#include <litmus/litmus_softirq.h>
+#endif
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/nvidia_info.h>
+#endif
 /* Overview of GSN-EDF operations.
 *
 * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
@@ -253,21 +268,52 @@ static noinline void requeue(struct task_struct* task)
        }
 }
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t* start)
+{
+        cpu_entry_t* affinity;
+        get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries,
+#ifdef CONFIG_RELEASE_MASTER
+                                        gsnedf.release_master
+#else
+                                        -1
+#endif
+                                        );
+        return(affinity);
+}
+#endif
 /* check for any necessary preemptions */
 static void check_for_preemptions(void)
 {
        struct task_struct *task;
-        cpu_entry_t* last;
+        cpu_entry_t *last;
        for(last = lowest_prio_cpu();
            edf_preemption_needed(&gsnedf, last->linked);
            last = lowest_prio_cpu()) {
                /* preemption necessary */
                task = __take_ready(&gsnedf);
-                TRACE("check_for_preemptions: attempting to link task %d to %d\n",
-                      task->pid, last->cpu);
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+                {
+                        cpu_entry_t* affinity = gsnedf_get_nearest_available_cpu(
+                                                        &per_cpu(gsnedf_cpu_entries, task_cpu(task)));
+                        if(affinity)
+                                last = affinity;
+                        else if(last->linked)
+                                requeue(last->linked);
+                }
+#else
                if (last->linked)
                        requeue(last->linked);
+#endif
+                TRACE("check_for_preemptions: attempting to link task %d to %d\n", 
+                                                task->pid, last->cpu);
                link_task_to_cpu(task, last);
                preempt(last);
        }
@@ -277,7 +323,7 @@ static void check_for_preemptions(void)
 static noinline void gsnedf_job_arrival(struct task_struct* task)
 {
        BUG_ON(!task);
+    
        requeue(task);
        check_for_preemptions();
 }
@@ -298,9 +344,13 @@ static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
 static noinline void job_completion(struct task_struct *t, int forced)
 {
        BUG_ON(!t);
+        
        sched_trace_task_completion(t, forced);
+#ifdef CONFIG_LITMUS_NVIDIA
+        atomic_set(&tsk_rt(t)->nv_int_count, 0);
+#endif
        TRACE_TASK(t, "job_completion().\n");
        /* set flags */
@@ -401,17 +451,19 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
        TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
 #endif
+        /*
        if (exists)
                TRACE_TASK(prev,
                           "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
                           "state:%d sig:%d\n",
                           blocks, out_of_time, np, sleep, preempt,
                           prev->state, signal_pending(prev));
+         */
+        
        if (entry->linked && preempt)
                TRACE_TASK(prev, "will be preempted by %s/%d\n",
                           entry->linked->comm, entry->linked->pid);
        /* If a task blocks we have no choice but to reschedule.
         */
        if (blocks)
@@ -456,12 +508,15 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
                        entry->scheduled->rt_param.scheduled_on = NO_CPU;
                        TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
                }
-        } else
+        }
+        else
+        {
                /* Only override Linux scheduler if we have a real-time task
                 * scheduled that needs to continue.
                 */
                if (exists)
                        next = prev;
+        }
        sched_state_task_picked();
@@ -486,8 +541,9 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
 static void gsnedf_finish_switch(struct task_struct *prev)
 {
        cpu_entry_t*    entry = &__get_cpu_var(gsnedf_cpu_entries);
+        
        entry->scheduled = is_realtime(current) ? current : NULL;
+        
 #ifdef WANT_ALL_SCHED_EVENTS
        TRACE_TASK(prev, "switched away from\n");
 #endif
@@ -536,11 +592,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
 static void gsnedf_task_wake_up(struct task_struct *task)
 {
        unsigned long flags;
-        lt_t now;
+        lt_t now;       
+        
        TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
        raw_spin_lock_irqsave(&gsnedf_lock, flags);
+        
+        
+#if 0  // sporadic task model
        /* We need to take suspensions because of semaphores into
         * account! If a job resumes after being suspended due to acquiring
         * a semaphore, it should never be treated as a new job release.
@@ -562,19 +621,26 @@ static void gsnedf_task_wake_up(struct task_struct *task)
                        }
                }
        }
+#else  // periodic task model
+        set_rt_flags(task, RT_F_RUNNING);
+#endif
+        
        gsnedf_job_arrival(task);
        raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
 }
 static void gsnedf_task_block(struct task_struct *t)
 {
+        // TODO: is this called on preemption??
        unsigned long flags;
        TRACE_TASK(t, "block at %llu\n", litmus_clock());
        /* unlink if necessary */
        raw_spin_lock_irqsave(&gsnedf_lock, flags);
+        
        unlink(t);
+        
        raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
        BUG_ON(!is_realtime(t));
@@ -608,51 +674,53 @@ static long gsnedf_admit_task(struct task_struct* tsk)
 #include <litmus/fdso.h>
-/* called with IRQs off */
-static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
+static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
 {
        int linked_on;
-        int check_preempt = 0;
+        int check_preempt = 0;  
+        
-        raw_spin_lock(&gsnedf_lock);
+        if(prio_inh != NULL)
+                TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
-        TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
+        else
+                TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
+        
+        sched_trace_eff_prio_change(t, prio_inh);
+        
        tsk_rt(t)->inh_task = prio_inh;
+        
        linked_on  = tsk_rt(t)->linked_on;
+        
        /* If it is scheduled, then we need to reorder the CPU heap. */
        if (linked_on != NO_CPU) {
                TRACE_TASK(t, "%s: linked  on %d\n",
-                           __FUNCTION__, linked_on);
+                                   __FUNCTION__, linked_on);
                /* Holder is scheduled; need to re-order CPUs.
                 * We can't use heap_decrease() here since
                 * the cpu_heap is ordered in reverse direction, so
                 * it is actually an increase. */
                bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
-                            gsnedf_cpus[linked_on]->hn);
+                     gsnedf_cpus[linked_on]->hn);
                bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
-                            gsnedf_cpus[linked_on]->hn);
+                     gsnedf_cpus[linked_on]->hn);
        } else {
                /* holder may be queued: first stop queue changes */
                raw_spin_lock(&gsnedf.release_lock);
                if (is_queued(t)) {
-                        TRACE_TASK(t, "%s: is queued\n",
+                        TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
-                                   __FUNCTION__);
                        /* We need to update the position of holder in some
                         * heap. Note that this could be a release heap if we
                         * budget enforcement is used and this job overran. */
-                        check_preempt =
+                        check_preempt = !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
-                                !bheap_decrease(edf_ready_order,
-                                               tsk_rt(t)->heap_node);
                } else {
                        /* Nothing to do: if it is not queued and not linked
                         * then it is either sleeping or currently being moved
                         * by other code (e.g., a timer interrupt handler) that
                         * will use the correct priority when enqueuing the
                         * task. */
-                        TRACE_TASK(t, "%s: is NOT queued => Done.\n",
+                        TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
-                                   __FUNCTION__);
                }
                raw_spin_unlock(&gsnedf.release_lock);
@@ -666,34 +734,148 @@ static void set_priority_inheritance(struct task_struct* t, struct task_struct*
                        /* heap_decrease() hit the top level of the heap: make
                         * sure preemption checks get the right task, not the
                         * potentially stale cache. */
-                        bheap_uncache_min(edf_ready_order,
+                        bheap_uncache_min(edf_ready_order, &gsnedf.ready_queue);
-                                         &gsnedf.ready_queue);
                        check_for_preemptions();
                }
        }
+}
+/* called with IRQs off */
+static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
+{
+        raw_spin_lock(&gsnedf_lock);
+        __set_priority_inheritance(t, prio_inh);
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        if(tsk_rt(t)->cur_klitirqd != NULL)
+        {
+                TRACE_TASK(t, "%s/%d inherits a new priority!\n",
+                                tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
+                __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
+        }
+#endif
+        
        raw_spin_unlock(&gsnedf_lock);
 }
+/* called with IRQs off */
+static void __clear_priority_inheritance(struct task_struct* t)
+{
+    TRACE_TASK(t, "priority restored\n");
+        
+    if(tsk_rt(t)->scheduled_on != NO_CPU)
+    {
+                sched_trace_eff_prio_change(t, NULL);
+                
+        tsk_rt(t)->inh_task = NULL;
+        
+        /* Check if rescheduling is necessary. We can't use heap_decrease()
+         * since the priority was effectively lowered. */
+        unlink(t);
+        gsnedf_job_arrival(t);
+    }
+    else
+    {
+        __set_priority_inheritance(t, NULL);
+    }
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        if(tsk_rt(t)->cur_klitirqd != NULL)
+        {
+                TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
+                                tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
+                if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
+                {
+                        sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
+                        
+                        tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
+                        
+                        /* Check if rescheduling is necessary. We can't use heap_decrease()
+                         * since the priority was effectively lowered. */
+                        unlink(tsk_rt(t)->cur_klitirqd);
+                        gsnedf_job_arrival(tsk_rt(t)->cur_klitirqd);
+                }
+                else
+                {
+                        __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
+                }
+        }
+#endif
+}
 /* called with IRQs off */
 static void clear_priority_inheritance(struct task_struct* t)
 {
        raw_spin_lock(&gsnedf_lock);
+        __clear_priority_inheritance(t);
+        raw_spin_unlock(&gsnedf_lock);
+}
-        /* A job only stops inheriting a priority when it releases a
+#ifdef CONFIG_LITMUS_SOFTIRQD
-         * resource. Thus we can make the following assumption.*/
+/* called with IRQs off */
-        BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
+static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
+                                                                                          struct task_struct* old_owner,
-        TRACE_TASK(t, "priority restored\n");
+                                                                                          struct task_struct* new_owner)
-        tsk_rt(t)->inh_task = NULL;
+{
+        BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
+        
+        raw_spin_lock(&gsnedf_lock);
+        
+        if(old_owner != new_owner)
+        {
+                if(old_owner)
+                {
+                        // unreachable?
+                        tsk_rt(old_owner)->cur_klitirqd = NULL;
+                }
+        
+                TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
+                                   new_owner->comm, new_owner->pid);
-        /* Check if rescheduling is necessary. We can't use heap_decrease()
+                tsk_rt(new_owner)->cur_klitirqd = klitirqd;
-         * since the priority was effectively lowered. */
+        }
-        unlink(t);
+        
-        gsnedf_job_arrival(t);
+        __set_priority_inheritance(klitirqd,
+                        (tsk_rt(new_owner)->inh_task == NULL) ?
+                                new_owner :
+                                tsk_rt(new_owner)->inh_task);
+        
+        raw_spin_unlock(&gsnedf_lock);
+}
+/* called with IRQs off */
+static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
+                                                                                                struct task_struct* old_owner)
+{
+        BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
+        
+        raw_spin_lock(&gsnedf_lock);
+    
+    TRACE_TASK(klitirqd, "priority restored\n");
+        
+    if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
+    {
+        tsk_rt(klitirqd)->inh_task = NULL;
+        
+        /* Check if rescheduling is necessary. We can't use heap_decrease()
+         * since the priority was effectively lowered. */
+        unlink(klitirqd);
+        gsnedf_job_arrival(klitirqd);
+    }
+    else
+    {
+        __set_priority_inheritance(klitirqd, NULL);
+    }
+        
+        tsk_rt(old_owner)->cur_klitirqd = NULL;
+        
        raw_spin_unlock(&gsnedf_lock);
 }
+#endif
 /* ******************** FMLP support ********************** */
@@ -892,11 +1074,477 @@ static struct litmus_lock* gsnedf_new_fmlp(void)
        return &sem->litmus_lock;
 }
+/* ******************** KFMLP support ********************** */
+/* struct for semaphore with priority inheritance */
+struct kfmlp_queue
+{
+        wait_queue_head_t wait;
+        struct task_struct* owner;
+        struct task_struct* hp_waiter;
+        int count; /* number of waiters + holder */
+};
+struct kfmlp_semaphore
+{
+        struct litmus_lock litmus_lock;
+        
+        spinlock_t      lock;
+        
+        int num_resources; /* aka k */
+        
+        struct kfmlp_queue *queues; /* array */
+        struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
+};
+static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
+{
+        return container_of(lock, struct kfmlp_semaphore, litmus_lock);
+}
+static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
+                                                                struct kfmlp_queue* queue)
+{
+        return (queue - &sem->queues[0]);
+}
+static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
+                                                                                                  struct task_struct* holder)
+{
+        int i;
+        for(i = 0; i < sem->num_resources; ++i)
+                if(sem->queues[i].owner == holder)
+                        return(&sem->queues[i]);
+        return(NULL);
+}
+/* caller is responsible for locking */
+static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
+                                                                                 struct task_struct *skip)
+{
+        struct list_head        *pos;
+        struct task_struct      *queued, *found = NULL;
+        
+        list_for_each(pos, &kqueue->wait.task_list) {
+                queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
+                                                                                                   task_list)->private;
+                
+                /* Compare task prios, find high prio task. */
+                if (queued != skip && edf_higher_prio(queued, found))
+                        found = queued;
+        }
+        return found;
+}
+static inline struct kfmlp_queue* kfmlp_find_shortest(
+                                                                                struct kfmlp_semaphore* sem,
+                                                                                struct kfmlp_queue* search_start)
+{
+        // we start our search at search_start instead of at the beginning of the
+        // queue list to load-balance across all resources.
+        struct kfmlp_queue* step = search_start;
+        struct kfmlp_queue* shortest = sem->shortest_queue;
+        
+        do
+        {
+                step = (step+1 != &sem->queues[sem->num_resources]) ?
+                        step+1 : &sem->queues[0];
+                if(step->count < shortest->count)
+                {
+                        shortest = step;
+                        if(step->count == 0)
+                                break; /* can't get any shorter */
+                }
+        }while(step != search_start);
+        
+        return(shortest);
+}
+static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
+{
+        /* must hold sem->lock */
+        
+        struct kfmlp_queue *my_queue = NULL;
+        struct task_struct *max_hp = NULL;
+        
+        struct list_head        *pos;
+        struct task_struct      *queued;
+        int i;
+        
+        for(i = 0; i < sem->num_resources; ++i)
+        {
+                if( (sem->queues[i].count > 1) &&
+                        ((my_queue == NULL) ||
+                         (edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
+                {
+                        my_queue = &sem->queues[i];
+                }
+        }
+        
+        if(my_queue)
+        {               
+                max_hp = my_queue->hp_waiter;
+                
+                BUG_ON(!max_hp);
+                
+                TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
+                                  kfmlp_get_idx(sem, my_queue),
+                                  max_hp->comm, max_hp->pid,
+                                  kfmlp_get_idx(sem, my_queue));
+                
+                my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
+                
+                /*
+                if(my_queue->hp_waiter)
+                        TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
+                                          kfmlp_get_idx(sem, my_queue),
+                                          my_queue->hp_waiter->comm,
+                                          my_queue->hp_waiter->pid);
+                else
+                        TRACE_CUR("queue %d: new hp_waiter is %p\n",
+                                          kfmlp_get_idx(sem, my_queue), NULL);
+                 */
+                
+                raw_spin_lock(&gsnedf_lock);
+                
+                /*
+                if(my_queue->owner)
+                        TRACE_CUR("queue %d: owner is %s/%d\n",
+                                          kfmlp_get_idx(sem, my_queue),
+                                          my_queue->owner->comm,
+                                          my_queue->owner->pid);
+                else
+                        TRACE_CUR("queue %d: owner is %p\n",
+                                          kfmlp_get_idx(sem, my_queue),
+                                          NULL);
+                 */
+                
+                if(tsk_rt(my_queue->owner)->inh_task == max_hp)
+                {
+                        __clear_priority_inheritance(my_queue->owner);
+                        if(my_queue->hp_waiter != NULL)
+                        {
+                                __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
+                        }
+                }
+                raw_spin_unlock(&gsnedf_lock);
+                
+                list_for_each(pos, &my_queue->wait.task_list)
+                {
+                        queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
+                                                                                                           task_list)->private;
+                        /* Compare task prios, find high prio task. */
+                        if (queued == max_hp)
+                        {
+                                /*
+                                TRACE_CUR("queue %d: found entry in wait queue.  REMOVING!\n",
+                                                  kfmlp_get_idx(sem, my_queue));
+                                */
+                                __remove_wait_queue(&my_queue->wait,
+                                                                        list_entry(pos, wait_queue_t, task_list));
+                                break;
+                        }
+                }
+                --(my_queue->count);
+        }
+        
+        return(max_hp);
+}
+int gsnedf_kfmlp_lock(struct litmus_lock* l)
+{
+        struct task_struct* t = current;
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
+        struct kfmlp_queue* my_queue;
+        wait_queue_t wait;
+        unsigned long flags;
+        
+        if (!is_realtime(t))
+                return -EPERM;
+        
+        spin_lock_irqsave(&sem->lock, flags);
+        
+        my_queue = sem->shortest_queue;
+        
+        if (my_queue->owner) {
+                /* resource is not free => must suspend and wait */
+                TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
+                                  kfmlp_get_idx(sem, my_queue));
+                
+                init_waitqueue_entry(&wait, t);
+                
+                /* FIXME: interruptible would be nice some day */
+                set_task_state(t, TASK_UNINTERRUPTIBLE);
+                
+                __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
+                
+                /* check if we need to activate priority inheritance */
+                if (edf_higher_prio(t, my_queue->hp_waiter))
+                {
+                        my_queue->hp_waiter = t;
+                        if (edf_higher_prio(t, my_queue->owner))
+                        {
+                                set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
+                        }
+                }
+                
+                ++(my_queue->count);
+                sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
+                
+                /* release lock before sleeping */
+                spin_unlock_irqrestore(&sem->lock, flags);
+                
+                /* We depend on the FIFO order.  Thus, we don't need to recheck
+                 * when we wake up; we are guaranteed to have the lock since
+                 * there is only one wake up per release (or steal).
+                 */
+                schedule();
+                if(my_queue->owner == t)
+                {
+                        TRACE_CUR("queue %d: acquired through waiting\n",
+                                          kfmlp_get_idx(sem, my_queue));
+                }
+                else
+                {
+                        /* this case may happen if our wait entry was stolen
+                           between queues. record where we went. */
+                        my_queue = kfmlp_get_queue(sem, t);
+                        BUG_ON(!my_queue);
+                        TRACE_CUR("queue %d: acquired through stealing\n",
+                                          kfmlp_get_idx(sem, my_queue));
+                }
+        }
+        else
+        {
+                TRACE_CUR("queue %d: acquired immediately\n",
+                                  kfmlp_get_idx(sem, my_queue));
+                my_queue->owner = t;
+                
+                ++(my_queue->count);
+                sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);               
+                
+                spin_unlock_irqrestore(&sem->lock, flags);
+        }
+        
+        return kfmlp_get_idx(sem, my_queue);
+}
+int gsnedf_kfmlp_unlock(struct litmus_lock* l)
+{
+        struct task_struct *t = current, *next;
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
+        struct kfmlp_queue *my_queue;
+        unsigned long flags;
+        int err = 0;
+        
+        spin_lock_irqsave(&sem->lock, flags);
+        
+        my_queue = kfmlp_get_queue(sem, t);
+        
+        if (!my_queue) {
+                err = -EINVAL;
+                goto out;
+        }
+        
+        /* check if there are jobs waiting for this resource */
+        next = __waitqueue_remove_first(&my_queue->wait);
+        if (next) {
+                /*
+                TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
+                                  kfmlp_get_idx(sem, my_queue),
+                                  next->comm, next->pid);
+                */
+                /* next becomes the resouce holder */
+                my_queue->owner = next;
+                
+                --(my_queue->count);
+                if(my_queue->count < sem->shortest_queue->count)
+                {
+                        sem->shortest_queue = my_queue;
+                }       
+                
+                TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
+                                  kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
+                
+                /* determine new hp_waiter if necessary */
+                if (next == my_queue->hp_waiter) {
+                        TRACE_TASK(next, "was highest-prio waiter\n");
+                        /* next has the highest priority --- it doesn't need to
+                         * inherit.  However, we need to make sure that the
+                         * next-highest priority in the queue is reflected in
+                         * hp_waiter. */
+                        my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
+                        if (my_queue->hp_waiter)
+                                TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
+                        else
+                                TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
+                } else {
+                        /* Well, if next is not the highest-priority waiter,
+                         * then it ought to inherit the highest-priority
+                         * waiter's priority. */
+                        set_priority_inheritance(next, my_queue->hp_waiter);
+                }
+                
+                /* wake up next */
+                wake_up_process(next);
+        }
+        else
+        {
+                TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
+                
+                next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
+                /*
+                if(next)
+                        TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
+                                          kfmlp_get_idx(sem, my_queue),
+                                          next->comm, next->pid);
+                */
+                
+                my_queue->owner = next;
+                
+                if(next)
+                {
+                        TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
+                                          kfmlp_get_idx(sem, my_queue),
+                                          next->comm, next->pid);
+                        
+                        /* wake up next */
+                        wake_up_process(next);                  
+                }
+                else
+                {
+                        TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
+                        
+                        --(my_queue->count);
+                        if(my_queue->count < sem->shortest_queue->count)
+                        {
+                                sem->shortest_queue = my_queue;
+                        }
+                }
+        }
+        
+        /* we lose the benefit of priority inheritance (if any) */
+        if (tsk_rt(t)->inh_task)
+                clear_priority_inheritance(t);
+        
+out:
+        spin_unlock_irqrestore(&sem->lock, flags);
+        
+        return err;
+}
+int gsnedf_kfmlp_close(struct litmus_lock* l)
+{
+        struct task_struct *t = current;
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
+        struct kfmlp_queue *my_queue;
+        unsigned long flags;
+        
+        int owner;
+        
+        spin_lock_irqsave(&sem->lock, flags);
+        
+        my_queue = kfmlp_get_queue(sem, t);     
+        owner = (my_queue) ? (my_queue->owner == t) : 0;
+        spin_unlock_irqrestore(&sem->lock, flags);
+        
+        if (owner)
+                gsnedf_kfmlp_unlock(l);
+        
+        return 0;
+}
+void gsnedf_kfmlp_free(struct litmus_lock* l)
+{
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
+        kfree(sem->queues);
+        kfree(sem);
+}
+static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = {
+        .close  = gsnedf_kfmlp_close,
+        .lock   = gsnedf_kfmlp_lock,
+        .unlock = gsnedf_kfmlp_unlock,
+        .deallocate = gsnedf_kfmlp_free,
+};
+static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg, int* ret_code)
+{
+        struct kfmlp_semaphore* sem;
+        int num_resources = 0;
+        int i;
+        
+        if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
+        {
+                *ret_code = -EINVAL;
+                return(NULL);
+        }
+        if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
+        {
+                *ret_code = -EINVAL;
+                return(NULL);
+        }
+        if(num_resources < 1)
+        {
+                *ret_code = -EINVAL;
+                return(NULL);           
+        }
+        
+        sem = kmalloc(sizeof(*sem), GFP_KERNEL);
+        if(!sem)
+        {
+                *ret_code = -ENOMEM;
+                return NULL;
+        }
+        
+        sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
+        if(!sem->queues)
+        {
+                kfree(sem);
+                *ret_code = -ENOMEM;
+                return NULL;            
+        }
+        
+        sem->litmus_lock.ops = &gsnedf_kfmlp_lock_ops;
+        spin_lock_init(&sem->lock);
+        sem->num_resources = num_resources;
+        
+        for(i = 0; i < num_resources; ++i)
+        {
+                sem->queues[i].owner = NULL;
+                sem->queues[i].hp_waiter = NULL;
+                init_waitqueue_head(&sem->queues[i].wait);
+                sem->queues[i].count = 0;
+        }
+        
+        sem->shortest_queue = &sem->queues[0];
+        
+        *ret_code = 0;
+        return &sem->litmus_lock;
+}
 /* **** lock constructor **** */
 static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
-                                 void* __user unused)
+                                 void* __user arg)
 {
        int err = -ENXIO;
@@ -911,7 +1559,10 @@ static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
                else
                        err = -ENOMEM;
                break;
+                        
+        case KFMLP_SEM:
+                *lock = gsnedf_new_kfmlp(arg, &err);
+                break;
        };
        return err;
@@ -919,7 +1570,6 @@ static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
 #endif
 static long gsnedf_activate_plugin(void)
 {
        int cpu;
@@ -946,6 +1596,15 @@ static long gsnedf_activate_plugin(void)
                }
 #endif
        }
+    
+#ifdef CONFIG_LITMUS_SOFTIRQD
+    spawn_klitirqd(NULL);
+#endif
+#ifdef CONFIG_LITMUS_NVIDIA
+        init_nvidia_info();
+#endif
+        
        return 0;
 }
@@ -963,8 +1622,15 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
        .admit_task             = gsnedf_admit_task,
        .activate_plugin        = gsnedf_activate_plugin,
 #ifdef CONFIG_LITMUS_LOCKING
-        .allocate_lock          = gsnedf_allocate_lock,
+        .allocate_lock  = gsnedf_allocate_lock,
+    .set_prio_inh   = set_priority_inheritance,
+    .clear_prio_inh = clear_priority_inheritance,       
+#endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
+        .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
 #endif
 };
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
index e6952896dc4b..1bca2e1a33cd 100644
--- a/litmus/sched_litmus.c
+++ b/litmus/sched_litmus.c
@@ -103,7 +103,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev)
                }
 #ifdef  __ARCH_WANT_UNLOCKED_CTXSW
                if (next->oncpu)
+                {
                        TRACE_TASK(next, "waiting for !oncpu");
+                }
                while (next->oncpu) {
                        cpu_relax();
                        mb();
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index d54886df1f57..8802670a4b0b 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -129,6 +129,27 @@ static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
        return -ENXIO;
 }
+static void litmus_dummy_set_prio_inh(struct task_struct* a, struct task_struct* b)
+{
+}
+static void litmus_dummy_clear_prio_inh(struct task_struct* t)
+{
+}
+#endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+static void litmus_dummy_set_prio_inh_klitirq(struct task_struct* klitirqd,
+                                       struct task_struct* old_owner,
+                                       struct task_struct* new_owner)
+{
+}
+static void litmus_dummy_clear_prio_inh_klitirqd(struct task_struct* klitirqd,
+                                          struct task_struct* old_owner)
+{
+}
 #endif
@@ -149,6 +170,12 @@ struct sched_plugin linux_sched_plugin = {
        .deactivate_plugin = litmus_dummy_deactivate_plugin,
 #ifdef CONFIG_LITMUS_LOCKING
        .allocate_lock = litmus_dummy_allocate_lock,
+    .set_prio_inh = litmus_dummy_set_prio_inh,
+    .clear_prio_inh = litmus_dummy_clear_prio_inh,
+#endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        .set_prio_inh_klitirqd = litmus_dummy_set_prio_inh_klitirq,
+        .clear_prio_inh_klitirqd = litmus_dummy_clear_prio_inh_klitirqd,
 #endif
        .admit_task = litmus_dummy_admit_task
 };
@@ -187,6 +214,8 @@ int register_sched_plugin(struct sched_plugin* plugin)
        CHECK(deactivate_plugin);
 #ifdef CONFIG_LITMUS_LOCKING
        CHECK(allocate_lock);
+    CHECK(set_prio_inh);
+    CHECK(clear_prio_inh);
 #endif
        CHECK(admit_task);
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 5ef8d09ab41f..7aeb99b668d3 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/percpu.h>
+#include <linux/hardirq.h>
 #include <litmus/ftdev.h>
 #include <litmus/litmus.h>
@@ -16,13 +17,13 @@
 #include <litmus/ftdev.h>
-#define NO_EVENTS               (1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
+#define NUM_EVENTS              (1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11))
 #define now() litmus_clock()
 struct local_buffer {
-        struct st_event_record record[NO_EVENTS];
+        struct st_event_record record[NUM_EVENTS];
-        char   flag[NO_EVENTS];
+        char   flag[NUM_EVENTS];
        struct ft_buffer ftbuf;
 };
@@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void)
        int i, ok = 0, err;
        printk("Allocated %u sched_trace_xxx() events per CPU "
               "(buffer size: %d bytes)\n",
-               NO_EVENTS, (int) sizeof(struct local_buffer));
+               NUM_EVENTS, (int) sizeof(struct local_buffer));
        err = ftdev_init(&st_dev, THIS_MODULE,
                        num_online_cpus(), "sched_trace");
@@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void)
        for (i = 0; i < st_dev.minor_cnt; i++) {
                buf = &per_cpu(st_event_buffer, i);
-                ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS,
+                ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS,
                                     sizeof(struct st_event_record),
                                     buf->flag,
                                     buf->record);
@@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id,
 {
        struct task_struct *t = (struct task_struct*) _task;
        struct st_event_record* rec;
-        if (is_realtime(t)) {
+        //if (is_realtime(t))  /* comment out to trace EVERYTHING */
+        {
                rec = get_record(ST_SWITCH_TO, t);
                if (rec) {
                        rec->data.switch_to.when      = now();
@@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id,
 {
        struct task_struct *t = (struct task_struct*) _task;
        struct st_event_record* rec;
-        if (is_realtime(t)) {
+        //if (is_realtime(t))  /* comment out to trace EVERYTHING */
+        {
                rec = get_record(ST_SWITCH_AWAY, t);
                if (rec) {
                        rec->data.switch_away.when      = now();
@@ -188,6 +191,7 @@ feather_callback void do_sched_trace_task_completion(unsigned long id,
        if (rec) {
                rec->data.completion.when   = now();
                rec->data.completion.forced = forced;
+                rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count);
                put_record(rec);
        }
 }
@@ -239,3 +243,201 @@ feather_callback void do_sched_trace_action(unsigned long id,
                put_record(rec);
        }
 }
+feather_callback void do_sched_trace_tasklet_release(unsigned long id,
+                                                                                                   unsigned long _owner)
+{
+        struct task_struct *t = (struct task_struct*) _owner;
+        struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t);
+        
+        if (rec) {
+                rec->data.tasklet_release.when = now();
+                put_record(rec);
+        }
+}
+feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
+                                                                                                   unsigned long _owner)
+{
+        struct task_struct *t = (struct task_struct*) _owner;
+        struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t);
+        
+        if (rec) {
+                rec->data.tasklet_begin.when = now();
+                if(!in_interrupt())
+                        rec->data.tasklet_begin.exe_pid = current->pid;
+                else
+                        rec->data.tasklet_begin.exe_pid = 0;
+                put_record(rec);
+        }
+}
+EXPORT_SYMBOL(do_sched_trace_tasklet_begin);
+feather_callback void do_sched_trace_tasklet_end(unsigned long id,
+                                                                                                 unsigned long _owner,
+                                                                                                 unsigned long _flushed)
+{
+        struct task_struct *t = (struct task_struct*) _owner;
+        struct st_event_record *rec = get_record(ST_TASKLET_END, t);
+        
+        if (rec) {
+                rec->data.tasklet_end.when = now();
+                rec->data.tasklet_end.flushed = _flushed;
+                if(!in_interrupt())
+                        rec->data.tasklet_end.exe_pid = current->pid;
+                else
+                        rec->data.tasklet_end.exe_pid = 0;
+                put_record(rec);
+        }
+}
+EXPORT_SYMBOL(do_sched_trace_tasklet_end);
+feather_callback void do_sched_trace_work_release(unsigned long id,
+                                                                                                         unsigned long _owner)
+{
+        struct task_struct *t = (struct task_struct*) _owner;
+        struct st_event_record *rec = get_record(ST_WORK_RELEASE, t);
+        
+        if (rec) {
+                rec->data.work_release.when = now();
+                put_record(rec);
+        }
+}
+feather_callback void do_sched_trace_work_begin(unsigned long id,
+                                                                                                unsigned long _owner,
+                                                                                                unsigned long _exe)
+{
+        struct task_struct *t = (struct task_struct*) _owner;
+        struct st_event_record *rec = get_record(ST_WORK_BEGIN, t);
+        
+        if (rec) {
+                struct task_struct *exe = (struct task_struct*) _exe;
+                rec->data.work_begin.exe_pid = exe->pid;
+                rec->data.work_begin.when = now();
+                put_record(rec);
+        }
+}
+EXPORT_SYMBOL(do_sched_trace_work_begin);
+feather_callback void do_sched_trace_work_end(unsigned long id,
+                                                                                          unsigned long _owner,
+                                                                                          unsigned long _exe,
+                                                                                          unsigned long _flushed)
+{
+        struct task_struct *t = (struct task_struct*) _owner;
+        struct st_event_record *rec = get_record(ST_WORK_END, t);
+        
+        if (rec) {
+                struct task_struct *exe = (struct task_struct*) _exe;
+                rec->data.work_end.exe_pid = exe->pid;
+                rec->data.work_end.flushed = _flushed;
+                rec->data.work_end.when = now();
+                put_record(rec);
+        }
+}
+EXPORT_SYMBOL(do_sched_trace_work_end);
+feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
+                                                                                          unsigned long _task,
+                                                                                          unsigned long _inh)
+{
+        struct task_struct *t = (struct task_struct*) _task;
+        struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t);
+        
+        if (rec) {
+                struct task_struct *inh = (struct task_struct*) _inh;
+                rec->data.effective_priority_change.when = now();
+                rec->data.effective_priority_change.inh_pid = (inh != NULL) ?
+                        inh->pid :
+                        0xffff;
+                
+                put_record(rec);
+        }
+}
+/* pray for no nesting of nv interrupts on same CPU... */
+struct tracing_interrupt_map
+{
+        int active;
+        int count;
+        unsigned long data[128]; // assume nesting less than 128...
+};
+DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing);
+feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
+                                                                                                unsigned long _device)
+{
+        struct st_event_record *rec;
+        {
+                struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
+                if(int_map->active == 0xcafebabe)
+                {
+                        int_map->count++;
+                }
+                else
+                {
+                        int_map->active = 0xcafebabe;
+                        int_map->count = 1;
+                }
+                int_map->data[int_map->count-1] = _device;
+        }
+        rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL);
+        if(rec) {
+                u32 device = _device;
+                rec->data.nv_interrupt_begin.when = now();
+                rec->data.nv_interrupt_begin.device = device;
+                put_record(rec);
+        }
+}
+EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin);
+/*
+int is_interrupt_tracing_active(void)
+{
+        struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
+        if(int_map->active == 0xcafebabe)
+                return 1;
+        return 0;
+}
+*/
+feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long unused)
+{
+        struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
+        if(int_map->active == 0xcafebabe)
+        {
+                struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL);
+                int_map->count--;
+                if(int_map->count == 0)
+                        int_map->active = 0;
+                if(rec) {
+                        rec->data.nv_interrupt_end.when = now();
+                        rec->data.nv_interrupt_end.device = int_map->data[int_map->count];
+                        put_record(rec);
+                }
+        }
+}
+EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);
diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c
new file mode 100644
index 000000000000..d7d7d8bae298
--- /dev/null
+++ b/litmus/sched_trace_external.c
@@ -0,0 +1,45 @@
+#include <linux/module.h>
+#include <litmus/sched_trace.h>
+#include <litmus/litmus.h>
+void __sched_trace_tasklet_begin_external(struct task_struct* t)
+{
+        sched_trace_tasklet_begin(t);
+}
+EXPORT_SYMBOL(__sched_trace_tasklet_begin_external);
+void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
+{
+        sched_trace_tasklet_end(t, flushed);
+}
+EXPORT_SYMBOL(__sched_trace_tasklet_end_external);
+void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
+{
+        sched_trace_work_begin(t, e);
+}
+EXPORT_SYMBOL(__sched_trace_work_begin_external);
+void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
+{
+        sched_trace_work_end(t, e, f);
+}
+EXPORT_SYMBOL(__sched_trace_work_end_external);
+void __sched_trace_nv_interrupt_begin_external(u32 device)
+{
+        unsigned long _device = device;
+        sched_trace_nv_interrupt_begin(_device);
+}
+EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external);
+void __sched_trace_nv_interrupt_end_external(void)
+{
+        sched_trace_nv_interrupt_end();
+}
+EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external);
author	Glenn Elliott <gelliott@cs.unc.edu>	2011-06-02 16:06:05 -0400
committer	Glenn Elliott <gelliott@cs.unc.edu>	2011-06-02 16:06:05 -0400
commit	3d5537c160c1484e8d562b9828baf679cc53f67a (patch)
tree	b595364f1b0f94ac2426c8315bc5967debc7bbb0
parent	7d754596756240fa918b94cd0c3011c77a638987 (diff)