make klmirqd work like aux tasks. checkpoint.

this code is untested!
author: Glenn Elliott <gelliott@cs.unc.edu> 2012-12-11 22:01:01 -0500
committer: Glenn Elliott <gelliott@cs.unc.edu> 2012-12-12 14:14:41 -0500
commit: c8483ef0959672310bf4ebb72e1a308b00543f74 (patch)
tree: 9cb306009b01c5226178f69172738026431d37f2 /litmus
parent: fbd9574e298157b54c38f82f536e5cea8f766dff (diff)
13 files changed, 1301 insertions, 1472 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig
index f2434b87239b..9aeae659ae32 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -382,7 +382,7 @@ menu "Interrupt Handling"
 choice 
        prompt "Scheduling of interrupt bottom-halves in Litmus."
        default LITMUS_SOFTIRQD_NONE
-        depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ
+        depends on LITMUS_LOCKING
        help
                Schedule tasklets with known priorities in Litmus.
@@ -398,7 +398,7 @@ config LITMUS_SOFTIRQD
          specifically dispatched to these workers.  (Softirqs for
          Litmus tasks are not magically redirected to klmirqd.)
-          G-EDF/RM, C-EDF/RM ONLY for now!
+          G-EDF, C-EDF ONLY for now!
 config LITMUS_PAI_SOFTIRQD
@@ -409,19 +409,11 @@ config LITMUS_PAI_SOFTIRQD
          at the cost of non-preemptive durations of bottom half
          processing.
                 
-          G-EDF/RM, C-EDF/RM ONLY for now!       
+          G-EDF, C-EDF ONLY for now!     
                 
 endchoice          
           
-config NR_LITMUS_SOFTIRQD
-           int "Number of klmirqd."
-           depends on LITMUS_SOFTIRQD
-           range 1 4096
-           default "1"
-           help
-             Should be <= to the number of CPUs in your system.
 config LITMUS_NVIDIA
          bool "Litmus handling of NVIDIA interrupts."
          default n
@@ -445,7 +437,7 @@ config LITMUS_AFFINITY_AWARE_GPU_ASSINGMENT
 config NV_DEVICE_NUM
           int "Number of NVIDIA GPUs."
           depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
-           range 1 4096
+           range 1 16
           default "1"
           help
             Should be (<= to the number of CPUs) and
@@ -453,11 +445,11 @@ config NV_DEVICE_NUM
 config NV_MAX_SIMULT_USERS
        int "Maximum number of threads sharing a GPU simultanously"
-        depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
+        depends on LITMUS_NVIDIA
        range 1 3
        default "2"
        help
-                Should be equal to the #copy_engines + #execution_engines
+                Should be at least equal to the #copy_engines + #execution_engines
                of the GPUs in your system.
                Scientific/Professional GPUs = 3  (ex. M2070, Quadro 6000?)
diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c
index 20f477f6e3bc..ef26bba3be77 100644
--- a/litmus/aux_tasks.c
+++ b/litmus/aux_tasks.c
@@ -54,7 +54,7 @@ int exit_aux_task(struct task_struct *t)
        TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, t->group_leader->comm, t->group_leader->pid);
        tsk_rt(t)->is_aux_task = 0;
-        
 #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
        list_del(&tsk_rt(t)->aux_task_node);
        if (tsk_rt(t)->inh_task) {
@@ -218,36 +218,36 @@ int make_aux_task_if_required(struct task_struct *t)
 {
        struct task_struct *leader;
        int retval = 0;
-        
-        read_lock_irq(&tasklist_lock);  
+        read_lock_irq(&tasklist_lock);
-        
        leader = t->group_leader;
        if(!tsk_aux(leader)->initialized || !tsk_aux(leader)->aux_future) {
                goto out;
        }
-        
        TRACE_CUR("Making %s/%d in %s/%d an aux thread.\n", t->comm, t->pid, leader->comm, leader->pid);
-        
        INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node);
        INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node);
-        
        retval = admit_aux_task(t);
        if (retval == 0) {
                tsk_rt(t)->is_aux_task = 1;
-                
-#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE    
+#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
                list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks);
-                
                if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
                        struct task_struct *hp =
                                container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
                                                         struct task_struct, rt_param);
-                        
                        TRACE_CUR("hp in group: %s/%d\n", hp->comm, hp->pid);
-                        
                        retval = litmus->__increase_prio(t, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
-                        
                        if (retval != 0) {
                                /* don't know how to recover from bugs with prio inheritance.  better just crash. */
                                read_unlock_irq(&tasklist_lock);
@@ -256,7 +256,7 @@ int make_aux_task_if_required(struct task_struct *t)
                }
 #endif
        }
-        
 out:
        read_unlock_irq(&tasklist_lock);
@@ -385,7 +385,7 @@ static long __do_enable_aux_tasks(int flags)
        if (flags & AUX_FUTURE) {
                tsk_aux(leader)->aux_future = 1;
        }
-        
        t = leader;
        do {
                if (!tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->is_aux_task) {
@@ -398,22 +398,22 @@ static long __do_enable_aux_tasks(int flags)
                TRACE_CUR("Checking task in %s/%d: %s/%d = (p = %llu):\n",
                                  leader->comm, leader->pid, t->comm, t->pid,
                                  tsk_rt(t)->task_params.period);
-                
                /* inspect period to see if it is an rt task */
                if (tsk_rt(t)->task_params.period == 0) {
                        if (flags && AUX_CURRENT) {
                                if (!tsk_rt(t)->is_aux_task) {
                                        int admit_ret;
-                                        
                                        TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
                                        admit_ret = admit_aux_task(t);
-                                        
                                        if (admit_ret == 0) {
                                                /* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */
                                                tsk_rt(t)->is_aux_task = 1;
                                                aux_tasks_added = 1;
-                                                
 #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
                                                list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks);
 #endif
@@ -464,7 +464,7 @@ static long __do_disable_aux_tasks(int flags)
        if (flags & AUX_FUTURE) {
                tsk_aux(leader)->aux_future = 0;
        }
-        
        if (flags & AUX_CURRENT) {
                t = leader;
                do {
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index c279bf12a7f5..27b728a55669 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -73,6 +73,22 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
                return first && !second;
        }
+        /* There is some goofy stuff in this code here.  There are three subclasses
+         * within the SCHED_LITMUS scheduling class:
+         * 1) Auxiliary tasks: COTS helper threads from the application level that
+         *      are forced to be real-time.
+         * 2) klmirqd interrupt threads: Litmus threaded interrupt handlers.
+         * 3) Normal Litmus tasks.
+         *
+         * At their base priorities, #3 > #2 > #1.  However, #1 and #2 threads might
+         * inherit a priority from a task of #3.
+         *
+         * The code proceeds in the following manner:
+         * 1) Make aux and klmirqd threads with base-priorities have low priorities.
+         * 2) Determine effective priorities.
+         * 3) Perform priority comparison.  Favor #3 over #1 and #2 in case of tie.
+         */
 #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_BOOSTED)
        /* run aux tasks at max priority */
@@ -109,7 +125,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
                                return temp;
                        }
                }
-                
                if (first->rt_param.is_aux_task && second->rt_param.is_aux_task &&
                        first->rt_param.inh_task == second->rt_param.inh_task) {  // inh_task is !NULL for both tasks since neither was a lo_aux task
                        // Both aux tasks inherit from the same task, so tie-break
@@ -120,6 +136,36 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
        }
 #endif
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        {
+                int first_lo_klmirqd = first->rt_param.is_interrupt_thread && !first->rt_param.inh_task;
+                int second_lo_klmirqd = second->rt_param.is_interrupt_thread && !second->rt_param.inh_task;
+                /* prioritize aux tasks without inheritance below real-time tasks */
+                if (first_lo_klmirqd || second_lo_klmirqd) {
+                        // one of these is an klmirqd thread without inheritance.
+                        if(first_lo_klmirqd && second_lo_klmirqd) {
+                                TRACE_CUR("klmirqd tie break!\n");  // tie-break by BASE priority of the aux tasks
+                                goto klmirqd_tie_break;
+                        }
+                        else {
+                                // make the klmirqd thread (second) lowest priority real-time task
+                                int temp = (first_lo_klmirqd) ? !is_realtime(second) : !is_realtime(first);
+                                TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp);
+                                return temp;
+                        }
+                }
+                if (first->rt_param.is_interrupt_thread && second->rt_param.is_interrupt_thread &&
+                        first->rt_param.inh_task == second->rt_param.inh_task) {  // inh_task is !NULL for both tasks since neither was a lo_klmirqd task
+                                                                                                                                          // Both klmirqd tasks inherit from the same task, so tie-break
+                                                                                                                                          // by base priority of the klmirqd tasks.
+                        TRACE_CUR("klmirqd tie break!\n");
+                        goto klmirqd_tie_break;
+                }
+        }
+#endif
 #ifdef CONFIG_LITMUS_LOCKING
        /* Check for EFFECTIVE priorities. Change task
@@ -161,7 +207,8 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
 #endif
 aux_tie_break:
-        
+klmirqd_tie_break:
        if (!is_realtime(second_task)) {
                return 1;
        }
@@ -230,15 +277,13 @@ aux_tie_break:
                        }
                        else if (first_task->pid == second_task->pid) {
 #ifdef CONFIG_LITMUS_SOFTIRQD
-                                if (first_task->rt_param.is_proxy_thread <
+                                if (first_task->rt_param.is_interrupt_thread < second_task->rt_param.is_interrupt_thread) {
-                                        second_task->rt_param.is_proxy_thread) {
                                        return 1;
                                }
-                                else if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) {
+                                else if (first_task->rt_param.is_interrupt_thread == second_task->rt_param.is_interrupt_thread) {
 #endif
 #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE)
-                                /* is this dead code? */
                                if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) {
                                        return 1;
                                }
@@ -246,8 +291,7 @@ aux_tie_break:
 #endif
                                /* Something could be wrong if you get this far. */
-                                if (unlikely(first->rt_param.inh_task ==
+                                if (unlikely(first->rt_param.inh_task == second->rt_param.inh_task)) {
-                                                                                second->rt_param.inh_task)) {
                                        /* Both tasks have the same inherited priority.
                                         * Likely in a bug-condition.
                                     */
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index b29828344dd1..a4ae74331782 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1960,11 +1960,11 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
                return(NULL);
        }
-        if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
+//      if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
-                TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
+//              TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
-                                  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
+//                                NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-//              return(NULL);
+////            return(NULL);
-        }
+//      }
        ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
        if(!ikglp_aff) {
@@ -2124,7 +2124,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
                        if(aff->q_info[i].q->count < max_fifo_len) {
                                int want = 0;
-                                lt_t migration = 
+                                lt_t migration =
                                        get_gpu_estimate(t,
                                                                gpu_migration_distance(tsk_rt(t)->last_gpu,
                                                                                                        replica_to_gpu(aff, i)));
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 9fe4eb1fa168..8593a8d2f107 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -30,7 +30,7 @@ void prepare_for_next_period(struct task_struct *t)
         * release and deadline. Lateness may be negative.
         */
        t->rt_param.job_params.lateness =
-                (long long)litmus_clock() - 
+                (long long)litmus_clock() -
                (long long)t->rt_param.job_params.deadline;
        setup_release(t, get_release(t) + get_rt_period(t));
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index ab472330095d..785a095275e6 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -587,11 +587,11 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
                return(NULL);
        }
-        if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
+//      if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
-                TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
+//              TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
-                                  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
+//                                NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-//              return(NULL);
+////            return(NULL);
-        }
+//      }
        kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
        if(!kfmlp_aff) {
@@ -829,6 +829,7 @@ void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* f
        reg_nv_device(gpu, 1, t);  // register
        tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
        reset_gpu_tracker(t);
        start_gpu_tracker(t);
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 3b8017397e80..fa244ba53e22 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -60,28 +60,6 @@ void bheap_node_free(struct bheap_node* hn)
 struct release_heap* release_heap_alloc(int gfp_flags);
 void release_heap_free(struct release_heap* rh);
-#ifdef CONFIG_LITMUS_NVIDIA
-/*
- * sys_register_nv_device
- * @nv_device_id: The Nvidia device id that the task want to register
- * @reg_action: set to '1' to register the specified device. zero otherwise.
- * Syscall for register task's designated nvidia device into NV_DEVICE_REG array
- * Returns EFAULT  if nv_device_id is out of range.
- *         0       if success
- */
-asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
-{
-        /* register the device to caller (aka 'current') */
-        return(reg_nv_device(nv_device_id, reg_action, current));
-}
-#else
-asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
-{
-        return(-EINVAL);
-}
-#endif
 /*
 * sys_set_task_rt_param
 * @pid: Pid of the task which scheduling parameters must be changed
@@ -393,22 +371,11 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 //    WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks));
 #endif
-#ifdef CONFIG_LITMUS_SOFTIRQD
-        /* We probably should not have any tasklets executing for
-     * us at this time.
-         */
-    WARN_ON(p->rt_param.cur_klmirqd);
-        WARN_ON(atomic_read(&p->rt_param.klmirqd_sem_stat) == HELD);
-        if(p->rt_param.cur_klmirqd)
-                flush_pending(p->rt_param.cur_klmirqd, p);
-        if(atomic_read(&p->rt_param.klmirqd_sem_stat) == HELD)
-                up_and_set_stat(p, NOT_HELD, &p->rt_param.klmirqd_sem);
-#endif
 #ifdef CONFIG_LITMUS_NVIDIA
        WARN_ON(p->rt_param.held_gpus != 0);
+        INIT_BINHEAP_NODE(&p->rt_param.gpu_owner_node);
 #endif
        /* Cleanup everything else. */
@@ -477,11 +444,9 @@ long __litmus_admit_task(struct task_struct* tsk)
        //INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, prio_order);  // done by scheduler
 #endif
 #ifdef CONFIG_LITMUS_SOFTIRQD
-        /* proxy thread off by default */
+        /* not an interrupt thread by default */
-        tsk_rt(tsk)is_proxy_thread = 0;
+        tsk_rt(tsk)->is_interrupt_thread = 0;
-    tsk_rt(tsk)cur_klmirqd = NULL;
+    tsk_rt(tsk)->klmirqd_info = NULL;
-        mutex_init(&tsk_rt(tsk)->klmirqd_sem);
-        atomic_set(&tsk_rt(tsk)->klmirqd_sem_stat, NOT_HELD);
 #endif
        retval = litmus->admit_task(tsk);
@@ -580,8 +545,7 @@ int switch_sched_plugin(struct sched_plugin* plugin)
                cpu_relax();
 #ifdef CONFIG_LITMUS_SOFTIRQD
-        if(!klmirqd_is_dead())
+        if (!klmirqd_is_dead()) {
-        {
                kill_klmirqd();
        }
 #endif
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
index 73a3053e662b..44e2d38ad982 100644
--- a/litmus/litmus_softirq.c
+++ b/litmus/litmus_softirq.c
@@ -18,10 +18,6 @@
 /* TODO: Remove unneeded mb() and other barriers. */
-/* counts number of daemons ready to handle litmus irqs. */
-static atomic_t num_ready_klmirqds = ATOMIC_INIT(0);
 enum pending_flags
 {
    LIT_TASKLET_LOW = 0x1,
@@ -29,35 +25,313 @@ enum pending_flags
        LIT_WORK = LIT_TASKLET_HI<<1
 };
-/* only support tasklet processing for now. */
+struct klmirqd_registration
-struct tasklet_head
 {
-        struct tasklet_struct *head;
+        raw_spinlock_t lock;
-        struct tasklet_struct **tail;
+        u32 nr_threads;
+        unsigned int initialized:1;
+        unsigned int shuttingdown:1;
+        struct list_head threads;
 };
-struct klmirqd_info
+static atomic_t klmirqd_id_gen = ATOMIC_INIT(0);
+static struct klmirqd_registration klmirqd_state;
+void init_klmirqd(void)
+{
+        raw_spin_lock_init(&klmirqd_state.lock);
+        klmirqd_state.nr_threads = 0;
+        klmirqd_state.initialized = 1;
+        klmirqd_state.shuttingdown = 0;
+        INIT_LIST_HEAD(&klmirqd_state.threads);
+}
+static int __klmirqd_is_ready(void)
+{
+        return (klmirqd_state.initialized == 1 && klmirqd_state.shuttingdown == 0);
+}
+int klmirqd_is_ready(void)
+{
+        unsigned long flags;
+        int ret;
+        raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
+    ret = __klmirqd_is_ready();
+    raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
+        return ret;
+}
+int klmirqd_is_dead(void)
+{
+        return(!klmirqd_is_ready());
+}
+void kill_klmirqd(void)
+{
+        if(!klmirqd_is_dead())
+        {
+                unsigned long flags;
+                struct list_head *pos;
+                raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
+                TRACE("%s: Killing all klmirqd threads! (%d of them)\n", __FUNCTION__, klmirqd_state.nr_threads);
+                klmirqd_state.shuttingdown = 1;
+                list_for_each(pos, &klmirqd_state.threads) {
+                        struct klmirqd_info* info = list_entry(pos, struct klmirqd_info, klmirqd_reg);
+                        if(info->terminating != 1)
+                        {
+                                info->terminating = 1;
+                                mb(); /* just to be sure? */
+                                flush_pending(info->klmirqd);
+                                /* signal termination */
+                                kthread_stop(info->klmirqd);
+                        }
+                }
+                raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
+        }
+}
+void kill_klmirqd_thread(struct task_struct* klmirqd_thread)
 {
-        struct task_struct*             klmirqd;
+        unsigned long flags;
-    struct task_struct*     current_owner;
+        struct klmirqd_info* info;
-    int                                         terminating;
+        if (!tsk_rt(klmirqd_thread)->is_interrupt_thread) {
+                TRACE("%s/%d is not a klmirqd thread\n", klmirqd_thread->comm, klmirqd_thread->pid);
+                return;
+        }
+        TRACE("%s: Killing klmirqd thread %s/%d\n", __FUNCTION__, klmirqd_thread->comm, klmirqd_thread->pid);
-        raw_spinlock_t                  lock;
+        raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
-        u32                                             pending;
+        info = tsk_rt(klmirqd_thread)->klmirqd_info;
-        atomic_t                                num_hi_pending;
-        atomic_t                                num_low_pending;
+        if(info->terminating != 1) {
-        atomic_t                                num_work_pending;
+                info->terminating = 1;
+                mb();
+                flush_pending(klmirqd_thread);
+                kthread_stop(klmirqd_thread);
+        }
+        raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
+}
-        /* in order of priority */
-        struct tasklet_head     pending_tasklets_hi;
-        struct tasklet_head             pending_tasklets;
+struct klmirqd_launch_data
-        struct list_head                worklist;
+{
+        int cpu_affinity;
+        klmirqd_callback_t* cb;
+        struct work_struct work;
 };
-/* one list for each klmirqd */
+static int run_klmirqd(void* callback);
-static struct klmirqd_info klmirqds[NR_LITMUS_SOFTIRQD];
+/* executed by a kworker from workqueues */
+static void __launch_klmirqd_thread(struct work_struct *work)
+{
+        int id;
+        struct task_struct* thread = NULL;
+        struct klmirqd_launch_data* launch_data =
+                container_of(work, struct klmirqd_launch_data, work);
+    TRACE("%s: Creating klmirqd thread\n", __FUNCTION__);
+        id = atomic_inc_return(&klmirqd_id_gen);
+        if (launch_data->cpu_affinity != -1) {
+                thread = kthread_create(
+                                        run_klmirqd,
+                                        /* treat the affinity as a pointer, we'll cast it back later */
+                                        (void*)launch_data->cb,
+                                        "klmirqd_th%d/%d",
+                                        id,
+                                        launch_data->cpu_affinity);
+                /* litmus will put is in the right cluster. */
+                kthread_bind(thread, launch_data->cpu_affinity);
+                TRACE("%s: Launching klmirqd_th%d/%d\n", __FUNCTION__, id, launch_data->cpu_affinity);
+        }
+        else {
+                thread = kthread_create(
+                                        run_klmirqd,
+                                        /* treat the affinity as a pointer, we'll cast it back later */
+                                        (void*)launch_data->cb,
+                                        "klmirqd_th%d",
+                                        id);
+                TRACE("%s: Launching klmirqd_th%d\n", __FUNCTION__, id);
+        }
+        if (thread) {
+                wake_up_process(thread);
+        }
+        else {
+                TRACE("Could not create klmirqd/%d thread!\n", id);
+        }
+        kfree(launch_data);
+}
+int launch_klmirqd_thread(int cpu, klmirqd_callback_t* cb)
+{
+    struct klmirqd_launch_data* delayed_launch;
+        if (!klmirqd_is_ready()) {
+                TRACE("klmirqd is not ready.  Check that it was initialized!\n");
+                return -1;
+        }
+    /* tell a work queue to launch the threads.  we can't make scheduling
+         calls since we're in an atomic state. */
+        delayed_launch = kmalloc(sizeof(struct klmirqd_launch_data), GFP_ATOMIC);
+        delayed_launch->cpu_affinity = cpu;
+        delayed_launch->cb = cb;
+    INIT_WORK(&delayed_launch->work, __launch_klmirqd_thread);
+    schedule_work(&delayed_launch->work);
+        return 0;
+}
+#define KLMIRQD_SLICE_NR_JIFFIES 1
+#define KLMIRQD_SLICE_NS ((NSEC_PER_SEC / HZ) * KLMIRQD_SLICE_NR_JIFFIES)
+static int set_litmus_daemon_sched(struct task_struct* tsk)
+{
+    int ret = 0;
+        struct rt_task tp = {
+                .period = KLMIRQD_SLICE_NS, /* dummy 1 second period */
+                .relative_deadline = KLMIRQD_SLICE_NS,
+                .exec_cost = KLMIRQD_SLICE_NS,
+                .phase = 0,
+                .cpu = task_cpu(current),
+                .budget_policy = NO_ENFORCEMENT,
+                .budget_signal_policy = NO_SIGNALS,
+                .cls = RT_CLASS_BEST_EFFORT
+        };
+        struct sched_param param = { .sched_priority = 0};
+        TRACE_CUR("Setting %s/%d as daemon thread.\n", tsk->comm, tsk->pid);
+        /* set task params */
+        tsk_rt(tsk)->task_params = tp;
+        tsk_rt(tsk)->is_interrupt_thread = 1;
+        /* inform the OS we're SCHED_LITMUS --
+         sched_setscheduler_nocheck() calls litmus_admit_task(). */
+        sched_setscheduler_nocheck(tsk, SCHED_LITMUS, &param);
+    return ret;
+}
+static int register_klmirqd(struct task_struct* tsk)
+{
+        int retval = 0;
+        unsigned long flags;
+        struct klmirqd_info *info = NULL;
+        if (!tsk_rt(tsk)->is_interrupt_thread) {
+                TRACE("Only proxy threads already running in Litmus may become klmirqd threads!\n");
+                WARN_ON(1);
+                retval = -1;
+                goto out;
+        }
+        raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
+        if (!__klmirqd_is_ready()) {
+                TRACE("klmirqd is not ready! Did you forget to initialize it?\n");
+                WARN_ON(1);
+                retval = -1;
+                goto out_unlock;
+        }
+        /* allocate and initialize klmirqd data for the thread */
+        info = kmalloc(sizeof(struct klmirqd_info), GFP_KERNEL);
+        if (!info) {
+                TRACE("Failed to allocate klmirqd_info struct!\n");
+                retval = -1; /* todo: pick better code */
+                goto out_unlock;
+        }
+        memset(info, 0, sizeof(struct klmirqd_info));
+        info->klmirqd = tsk;
+        info->pending_tasklets_hi.tail = &info->pending_tasklets_hi.head;
+        info->pending_tasklets.tail = &info->pending_tasklets.head;
+        INIT_LIST_HEAD(&info->worklist);
+        INIT_LIST_HEAD(&info->klmirqd_reg);
+        raw_spin_lock_init(&info->lock);
+        /* now register with klmirqd */
+        list_add_tail(&info->klmirqd_reg, &klmirqd_state.threads);
+        ++klmirqd_state.nr_threads;
+        /* update the task struct to point to klmirqd info */
+        tsk_rt(tsk)->klmirqd_info = info;
+out_unlock:
+        raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
+out:
+        return retval;
+}
+static int unregister_klmirqd(struct task_struct* tsk)
+{
+        int retval = 0;
+        unsigned long flags;
+        struct klmirqd_info *info = tsk_rt(tsk)->klmirqd_info;
+        if (!tsk_rt(tsk)->is_interrupt_thread || !info) {
+                TRACE("%s/%d is not a klmirqd thread!\n", tsk->comm, tsk->pid);
+                WARN_ON(1);
+                retval = -1;
+                goto out;
+        }
+        raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
+        /* remove the entry in the klmirqd thread list */
+        list_del(&info->klmirqd_reg);
+        --klmirqd_state.nr_threads;
+        /* remove link to klmirqd info from thread */
+        tsk_rt(tsk)->klmirqd_info = NULL;
+        /* clean up memory */
+        kfree(info);
+        raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
+out:
+        return retval;
+}
@@ -67,35 +341,50 @@ int proc_read_klmirqd_stats(char *page, char **start,
                                                         off_t off, int count,
                                                         int *eof, void *data)
 {
-        int len = snprintf(page, PAGE_SIZE,
+        unsigned long flags;
-                                "num ready klmirqds: %d\n\n",
+        int len;
-                                atomic_read(&num_ready_klmirqds));
+        raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
-        if(klmirqd_is_ready())
-        {
+        if (klmirqd_state.initialized) {
-                int i;
+                if (!klmirqd_state.shuttingdown) {
-                for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+                        struct list_head *pos;
-                {
-                        len +=
+                        len = snprintf(page, PAGE_SIZE,
-                                snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
+                                                   "num ready klmirqds: %d\n\n",
-                                                 "klmirqd_th%d: %s/%d\n"
+                                                   klmirqd_state.nr_threads);
-                                                 "\tcurrent_owner: %s/%d\n"
-                                                 "\tpending: %x\n"
+                        list_for_each(pos, &klmirqd_state.threads) {
-                                                 "\tnum hi: %d\n"
+                                struct klmirqd_info* info = list_entry(pos, struct klmirqd_info, klmirqd_reg);
-                                                 "\tnum low: %d\n"
-                                                 "\tnum work: %d\n\n",
+                                len +=
-                                                 i,
+                                        snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
-                                                 klmirqds[i].klmirqd->comm, klmirqds[i].klmirqd->pid,
+                                                         "klmirqd_thread: %s/%d\n"
-                                                 (klmirqds[i].current_owner != NULL) ?
+                                                         "\tcurrent_owner: %s/%d\n"
-                                                        klmirqds[i].current_owner->comm : "(null)",
+                                                         "\tpending: %x\n"
-                                                 (klmirqds[i].current_owner != NULL) ?
+                                                         "\tnum hi: %d\n"
-                                                        klmirqds[i].current_owner->pid : 0,
+                                                         "\tnum low: %d\n"
-                                                 klmirqds[i].pending,
+                                                         "\tnum work: %d\n\n",
-                                                 atomic_read(&klmirqds[i].num_hi_pending),
+                                                         info->klmirqd->comm, info->klmirqd->pid,
-                                                 atomic_read(&klmirqds[i].num_low_pending),
+                                                         (info->current_owner != NULL) ?
-                                                 atomic_read(&klmirqds[i].num_work_pending));
+                                                                info->current_owner->comm : "(null)",
+                                                         (info->current_owner != NULL) ?
+                                                                info->current_owner->pid : 0,
+                                                         info->pending,
+                                                         atomic_read(&info->num_hi_pending),
+                                                         atomic_read(&info->num_low_pending),
+                                                         atomic_read(&info->num_work_pending));
+                        }
+                }
+                else {
+                        len = snprintf(page, PAGE_SIZE, "klmirqd is shutting down\n");
                }
        }
+        else {
+                len = snprintf(page, PAGE_SIZE, "klmirqd is not initialized!\n");
+        }
+        raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
        return(len);
 }
@@ -162,6 +451,15 @@ static void dump_state(struct klmirqd_info* which, const char* caller)
 #endif
 /* forward declarations */
 static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
                                                                           struct klmirqd_info *which,
@@ -174,24 +472,6 @@ static void ___litmus_schedule_work(struct work_struct *w,
                                                                        int wakeup);
-inline unsigned int klmirqd_id(struct task_struct* tsk)
-{
-    int i;
-    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
-    {
-        if(klmirqds[i].klmirqd == tsk)
-        {
-            return i;
-        }
-    }
-    BUG();
-    return 0;
-}
 inline static u32 litirq_pending_hi_irqoff(struct klmirqd_info* which)
 {
    return (which->pending & LIT_TASKLET_HI);
@@ -225,200 +505,11 @@ inline static u32 litirq_pending(struct klmirqd_info* which)
    return pending;
 };
-inline static u32 litirq_pending_with_owner(struct klmirqd_info* which, struct task_struct* owner)
-{
-        unsigned long flags;
-        u32 pending;
-        raw_spin_lock_irqsave(&which->lock, flags);
-        pending = litirq_pending_irqoff(which);
-        if(pending)
-        {
-                if(which->current_owner != owner)
-                {
-                        pending = 0;  // owner switch!
-                }
-        }
-        raw_spin_unlock_irqrestore(&which->lock, flags);
-        return pending;
-}
-inline static u32 litirq_pending_and_sem_and_owner(struct klmirqd_info* which,
-                                struct mutex** sem,
-                                struct task_struct** t)
-{
-        unsigned long flags;
-        u32 pending;
-        /* init values */
-        *sem = NULL;
-        *t = NULL;
-        raw_spin_lock_irqsave(&which->lock, flags);
-        pending = litirq_pending_irqoff(which);
-        if(pending)
-        {
-                if(which->current_owner != NULL)
-                {
-                        *t = which->current_owner;
-                        *sem = &tsk_rt(which->current_owner)->klmirqd_sem;
-                }
-                else
-                {
-                        BUG();
-                }
-        }
-        raw_spin_unlock_irqrestore(&which->lock, flags);
-        if(likely(*sem))
-        {
-                return pending;
-        }
-        else
-        {
-                return 0;
-        }
-}
-/* returns true if the next piece of work to do is from a different owner.
- */
-static int tasklet_ownership_change(
-                                struct klmirqd_info* which,
-                                enum pending_flags taskletQ)
-{
-        /* this function doesn't have to look at work objects since they have
-           priority below tasklets. */
-    unsigned long flags;
-    int ret = 0;
-    raw_spin_lock_irqsave(&which->lock, flags);
-        switch(taskletQ)
-        {
-        case LIT_TASKLET_HI:
-                if(litirq_pending_hi_irqoff(which))
-                {
-                        ret = (which->pending_tasklets_hi.head->owner !=
-                                                which->current_owner);
-                }
-                break;
-        case LIT_TASKLET_LOW:
-                if(litirq_pending_low_irqoff(which))
-                {
-                        ret = (which->pending_tasklets.head->owner !=
-                                                which->current_owner);
-                }
-                break;
-        default:
-                break;
-        }
-    raw_spin_unlock_irqrestore(&which->lock, flags);
-    TRACE_TASK(which->klmirqd, "ownership change needed: %d\n", ret);
-    return ret;
-}
-static void __reeval_prio(struct klmirqd_info* which)
-{
-    struct task_struct* next_owner = NULL;
-        struct task_struct* klmirqd = which->klmirqd;
-        /* Check in prio-order */
-        u32 pending = litirq_pending_irqoff(which);
-        //__dump_state(which, "__reeval_prio: before");
-        if(pending)
-        {
-                if(pending & LIT_TASKLET_HI)
-                {
-                        next_owner = which->pending_tasklets_hi.head->owner;
-                }
-                else if(pending & LIT_TASKLET_LOW)
-                {
-                        next_owner = which->pending_tasklets.head->owner;
-                }
-                else if(pending & LIT_WORK)
-                {
-                        struct work_struct* work =
-                                list_first_entry(&which->worklist, struct work_struct, entry);
-                        next_owner = work->owner;
-                }
-        }
-        if(next_owner != which->current_owner)
-        {
-                struct task_struct* old_owner = which->current_owner;
-                /* bind the next owner. */
-                which->current_owner = next_owner;
-                mb();
-        if(next_owner != NULL)
-        {
-                        if(!in_interrupt())
-                        {
-                                TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
-                                                ((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->comm,
-                                                ((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->pid,
-                                                next_owner->comm, next_owner->pid);
-                        }
-                        else
-                        {
-                                TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
-                                        ((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->comm,
-                                        ((tsk_rt(klmirqd)->inh_task) ? tsk_rt(klmirqd)->inh_task : klmirqd)->pid,
-                                        next_owner->comm, next_owner->pid);
-                        }
-                        litmus->increase_prio_inheritance_klmirqd(klmirqd, old_owner, next_owner);
-        }
-        else
-        {
-                        if(likely(!in_interrupt()))
-                        {
-                                TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n",
-                                                __FUNCTION__, klmirqd->comm, klmirqd->pid);
-                        }
-                        else
-                        {
-                                // is this a bug?
-                                TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n",
-                                        __FUNCTION__, klmirqd->comm, klmirqd->pid);
-                        }
-                        BUG_ON(pending != 0);
-                        litmus->decrease_prio_inheritance_klmirqd(klmirqd, old_owner, NULL);
-        }
-    }
-        //__dump_state(which, "__reeval_prio: after");
-}
-static void reeval_prio(struct klmirqd_info* which)
-{
-    unsigned long flags;
-    raw_spin_lock_irqsave(&which->lock, flags);
-    __reeval_prio(which);
-    raw_spin_unlock_irqrestore(&which->lock, flags);
-}
 static void wakeup_litirqd_locked(struct klmirqd_info* which)
 {
        /* Interrupts are disabled: no need to stop preemption */
        if (which && which->klmirqd)
        {
-        __reeval_prio(which); /* configure the proper priority */
        if(which->klmirqd->state != TASK_RUNNING)
        {
            TRACE("%s: Waking up klmirqd: %s/%d\n", __FUNCTION__,
@@ -468,7 +559,7 @@ static void do_lit_tasklet(struct klmirqd_info* which,
                list = list->next;
        /* execute tasklet if it has my priority and is free */
-                if ((t->owner == which->current_owner) && tasklet_trylock(t)) {
+                if (tasklet_trylock(t)) {
                        if (!atomic_read(&t->count)) {
                                sched_trace_tasklet_begin(t->owner);
@@ -503,15 +594,14 @@ static void do_lit_tasklet(struct klmirqd_info* which,
 // returns 1 if priorities need to be changed to continue processing
 // pending tasklets.
-static int do_litirq(struct klmirqd_info* which)
+static void do_litirq(struct klmirqd_info* which)
 {
    u32 pending;
-    int resched = 0;
    if(in_interrupt())
    {
        TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__);
-        return(0);
+                return;
    }
        if(which->klmirqd != current)
@@ -519,59 +609,40 @@ static int do_litirq(struct klmirqd_info* which)
        TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
                                  __FUNCTION__, current->comm, current->pid,
                                  which->klmirqd->comm, which->klmirqd->pid);
-        return(0);
+        return;
        }
    if(!is_realtime(current))
    {
        TRACE_CUR("%s: exiting early: klmirqd is not real-time. Sched Policy = %d\n",
                                  __FUNCTION__, current->policy);
-        return(0);
+                return;
    }
    /* We only handle tasklets & work objects, no need for RCU triggers? */
    pending = litirq_pending(which);
-    if(pending)
+    if(pending) {
-    {
        /* extract the work to do and do it! */
-        if(pending & LIT_TASKLET_HI)
+        if(pending & LIT_TASKLET_HI) {
-        {
            TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__);
            do_lit_tasklet(which, &which->pending_tasklets_hi);
-            resched = tasklet_ownership_change(which, LIT_TASKLET_HI);
-            if(resched)
-            {
-                TRACE_CUR("%s: HI tasklets of another owner remain. "
-                                                  "Skipping any LOW tasklets.\n", __FUNCTION__);
-            }
        }
-        if(!resched && (pending & LIT_TASKLET_LOW))
+        if(pending & LIT_TASKLET_LOW) {
-        {
            TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__);
                        do_lit_tasklet(which, &which->pending_tasklets);
-                        resched = tasklet_ownership_change(which, LIT_TASKLET_LOW);
-            if(resched)
-            {
-                TRACE_CUR("%s: LOW tasklets of another owner remain. "
-                                                  "Skipping any work objects.\n", __FUNCTION__);
-            }
        }
    }
-        return(resched);
 }
 static void do_work(struct klmirqd_info* which)
 {
        unsigned long flags;
-        work_func_t f;
        struct work_struct* work;
+        work_func_t f;
        // only execute one work-queue item to yield to tasklets.
        // ...is this a good idea, or should we just batch them?
@@ -594,125 +665,58 @@ static void do_work(struct klmirqd_info* which)
        raw_spin_unlock_irqrestore(&which->lock, flags);
+        TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
+        // do the work!
+        work_clear_pending(work);
+        f = work->func;
+        f(work);  /* can't touch 'work' after this point,
+                           the user may have freed it. */
-        /* safe to read current_owner outside of lock since only this thread
+        atomic_dec(&which->num_work_pending);
-         may write to the pointer. */
-        if(work->owner == which->current_owner)
-        {
-                TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
-                // do the work!
-                work_clear_pending(work);
-                f = work->func;
-                f(work);  /* can't touch 'work' after this point,
-                                   the user may have freed it. */
-                atomic_dec(&which->num_work_pending);
-        }
-        else
-        {
-                TRACE_CUR("%s: Could not invoke work object.  Requeuing.\n",
-                                  __FUNCTION__);
-                ___litmus_schedule_work(work, which, 0);
-        }
 no_work:
        return;
 }
-static int set_litmus_daemon_sched(void)
-{
-    /* set up a daemon job that will never complete.
-       it should only ever run on behalf of another
-       real-time task.
-       TODO: Transition to a new job whenever a
-       new tasklet is handled */
-    int ret = 0;
-        struct rt_task tp = {
-                .exec_cost = 0,
-                .period = 1000000000, /* dummy 1 second period */
-                .phase = 0,
-                .cpu = task_cpu(current),
-                .budget_policy = NO_ENFORCEMENT,
-                .cls = RT_CLASS_BEST_EFFORT
-        };
-        struct sched_param param = { .sched_priority = 0};
-        /* set task params, mark as proxy thread, and init other data */
-        tsk_rt(current)->task_params = tp;
-        tsk_rt(current)->is_proxy_thread = 1;
-        tsk_rt(current)->cur_klmirqd = NULL;
-        mutex_init(&tsk_rt(current)->klmirqd_sem);
-        atomic_set(&tsk_rt(current)->klmirqd_sem_stat, NOT_HELD);
-        /* inform the OS we're SCHED_LITMUS --
-           sched_setscheduler_nocheck() calls litmus_admit_task(). */
-        sched_setscheduler_nocheck(current, SCHED_LITMUS, &param);
-    return ret;
-}
-static void enter_execution_phase(struct klmirqd_info* which,
-                                                                  struct mutex* sem,
-                                                                  struct task_struct* t)
-{
-        TRACE_CUR("%s: Trying to enter execution phase. "
-                          "Acquiring semaphore of %s/%d\n", __FUNCTION__,
-                          t->comm, t->pid);
-        down_and_set_stat(current, HELD, sem);
-        TRACE_CUR("%s: Execution phase entered! "
-                          "Acquired semaphore of %s/%d\n", __FUNCTION__,
-                          t->comm, t->pid);
-}
-static void exit_execution_phase(struct klmirqd_info* which,
-                                                                 struct mutex* sem,
-                                                                 struct task_struct* t)
-{
-        TRACE_CUR("%s: Exiting execution phase. "
-                          "Releasing semaphore of %s/%d\n", __FUNCTION__,
-                          t->comm, t->pid);
-        if(atomic_read(&tsk_rt(current)->klmirqd_sem_stat) == HELD)
-        {
-                up_and_set_stat(current, NOT_HELD, sem);
-                TRACE_CUR("%s: Execution phase exited! "
-                                  "Released semaphore of %s/%d\n", __FUNCTION__,
-                                  t->comm, t->pid);
-        }
-        else
-        {
-                TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__);
-        }
-}
 /* main loop for klitsoftirqd */
-static int run_klmirqd(void* unused)
+static int run_klmirqd(void* callback)
 {
-        struct klmirqd_info* which = &klmirqds[klmirqd_id(current)];
+    int retval = 0;
-        struct mutex* sem;
+        struct klmirqd_info* info = NULL;
-        struct task_struct* owner;
+        klmirqd_callback_t* cb = (klmirqd_callback_t*)(callback);
-    int rt_status = set_litmus_daemon_sched();
+        retval = set_litmus_daemon_sched(current);
+    if (retval != 0) {
-    if(rt_status != 0)
-    {
        TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
-        goto rt_failed;
+        goto failed;
    }
-        atomic_inc(&num_ready_klmirqds);
+        retval = register_klmirqd(current);
+        if (retval != 0) {
+                TRACE_CUR("%s: Failed to become a klmirqd thread.\n", __FUNCTION__);
+                goto failed;
+        }
+        if (cb && cb->func) {
+                retval = cb->func(cb->arg);
+                if (retval != 0) {
+                        TRACE_CUR("%s: klmirqd callback reported failure. retval = %d\n", __FUNCTION__, retval);
+                        goto failed_unregister;
+                }
+        }
+        /* enter the interrupt handling workloop */
+        info = tsk_rt(current)->klmirqd_info;
        set_current_state(TASK_INTERRUPTIBLE);
        while (!kthread_should_stop())
        {
                preempt_disable();
-                if (!litirq_pending(which))
+                if (!litirq_pending(info))
                {
            /* sleep for work */
            TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n",
@@ -731,17 +735,10 @@ static int run_klmirqd(void* unused)
                __set_current_state(TASK_RUNNING);
-                while (litirq_pending_and_sem_and_owner(which, &sem, &owner))
+                while (litirq_pending(info))
                {
-                        int needs_resched = 0;
                        preempt_enable_no_resched();
-                        BUG_ON(sem == NULL);
-                        // wait to enter execution phase; wait for 'current_owner' to block.
-                        enter_execution_phase(which, sem, owner);
                        if(kthread_should_stop())
                        {
                                TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
@@ -753,36 +750,23 @@ static int run_klmirqd(void* unused)
                        /* Double check that there's still pending work and the owner hasn't
                         * changed. Pending items may have been flushed while we were sleeping.
                         */
-                        if(litirq_pending_with_owner(which, owner))
+                        if(litirq_pending(info))
                        {
                                TRACE_CUR("%s: Executing tasklets and/or work objects.\n",
                                                  __FUNCTION__);
-                                needs_resched = do_litirq(which);
+                                do_litirq(info);
                                preempt_enable_no_resched();
                                // work objects are preemptible.
-                                if(!needs_resched)
+                                do_work(info);
-                                {
-                                        do_work(which);
-                                }
-                                // exit execution phase.
-                                exit_execution_phase(which, sem, owner);
-                                TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__);
-                                reeval_prio(which); /* check if we need to change priority here */
                        }
                        else
                        {
-                                TRACE_CUR("%s: Pending work was flushed!  Prev owner was %s/%d\n",
+                                TRACE_CUR("%s: Pending work was flushed!\n", __FUNCTION__);
-                                                                __FUNCTION__,
-                                                                owner->comm, owner->pid);
-                                preempt_enable_no_resched();
-                                // exit execution phase.
+                                preempt_enable_no_resched();
-                                exit_execution_phase(which, sem, owner);
                        }
                        cond_resched();
@@ -793,183 +777,39 @@ static int run_klmirqd(void* unused)
        }
        __set_current_state(TASK_RUNNING);
-        atomic_dec(&num_ready_klmirqds);
+failed_unregister:
+        /* remove our registration from klmirqd */
+        unregister_klmirqd(current);
-rt_failed:
+failed:
    litmus_exit_task(current);
-        return rt_status;
+        return retval;
 }
-struct klmirqd_launch_data
+void flush_pending(struct task_struct* tsk)
-{
-        int* cpu_affinity;
-        struct work_struct work;
-};
-/* executed by a kworker from workqueues */
-static void launch_klmirqd(struct work_struct *work)
 {
-    int i;
+        unsigned long flags;
+        struct tasklet_struct *list;
-        struct klmirqd_launch_data* launch_data =
+        u32 work_flushed = 0;
-                container_of(work, struct klmirqd_launch_data, work);
-    TRACE("%s: Creating %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
-    /* create the daemon threads */
-    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
-    {
-                if(launch_data->cpu_affinity)
-                {
-                        klmirqds[i].klmirqd =
-                                kthread_create(
-                                   run_klmirqd,
-                                   /* treat the affinity as a pointer, we'll cast it back later */
-                                   (void*)(long long)launch_data->cpu_affinity[i],
-                                   "klmirqd_th%d/%d",
-                                   i,
-                                   launch_data->cpu_affinity[i]);
-                        /* litmus will put is in the right cluster. */
-                        kthread_bind(klmirqds[i].klmirqd, launch_data->cpu_affinity[i]);
-                }
-                else
-                {
-                        klmirqds[i].klmirqd =
-                                kthread_create(
-                                   run_klmirqd,
-                                   /* treat the affinity as a pointer, we'll cast it back later */
-                                   (void*)(long long)(-1),
-                                   "klmirqd_th%d",
-                                   i);
-                }
-    }
-    TRACE("%s: Launching %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
-    /* unleash the daemons */
-    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
-    {
-        wake_up_process(klmirqds[i].klmirqd);
-    }
-        if(launch_data->cpu_affinity)
-                kfree(launch_data->cpu_affinity);
-        kfree(launch_data);
-}
+        struct klmirqd_info *which;
-void spawn_klmirqd(int* affinity)
+        if (!tsk_rt(tsk)->is_interrupt_thread) {
-{
+                TRACE("%s/%d is not a proxy thread\n", tsk->comm, tsk->pid);
-    int i;
+                WARN_ON(1);
-    struct klmirqd_launch_data* delayed_launch;
-        if(atomic_read(&num_ready_klmirqds) != 0)
-        {
-                TRACE("%s: At least one klmirqd is already running! Need to call kill_klmirqd()?\n");
                return;
        }
-    /* init the tasklet & work queues */
+        which = tsk_rt(tsk)->klmirqd_info;
-    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
+        if (!which) {
-    {
+                TRACE("%s/%d is not a klmirqd thread!\n", tsk->comm, tsk->pid);
-                klmirqds[i].terminating = 0;
+                WARN_ON(1);
-                klmirqds[i].pending = 0;
+                return;
-                klmirqds[i].num_hi_pending.counter = 0;
-                klmirqds[i].num_low_pending.counter = 0;
-                klmirqds[i].num_work_pending.counter = 0;
-        klmirqds[i].pending_tasklets_hi.head = NULL;
-        klmirqds[i].pending_tasklets_hi.tail = &klmirqds[i].pending_tasklets_hi.head;
-        klmirqds[i].pending_tasklets.head = NULL;
-        klmirqds[i].pending_tasklets.tail = &klmirqds[i].pending_tasklets.head;
-                INIT_LIST_HEAD(&klmirqds[i].worklist);
-                raw_spin_lock_init(&klmirqds[i].lock);
-    }
-    /* wait to flush the initializations to memory since other threads
-       will access it. */
-    mb();
-    /* tell a work queue to launch the threads.  we can't make scheduling
-       calls since we're in an atomic state. */
-    TRACE("%s: Setting callback up to launch klmirqds\n", __FUNCTION__);
-        delayed_launch = kmalloc(sizeof(struct klmirqd_launch_data), GFP_ATOMIC);
-        if(affinity)
-        {
-                delayed_launch->cpu_affinity =
-                        kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC);
-                memcpy(delayed_launch->cpu_affinity, affinity,
-                        sizeof(int)*NR_LITMUS_SOFTIRQD);
-        }
-        else
-        {
-                delayed_launch->cpu_affinity = NULL;
-        }
-    INIT_WORK(&delayed_launch->work, launch_klmirqd);
-    schedule_work(&delayed_launch->work);
-}
-void kill_klmirqd(void)
-{
-        if(!klmirqd_is_dead())
-        {
-        int i;
-        TRACE("%s: Killing %d klmirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
-        for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
-        {
-                        if(klmirqds[i].terminating != 1)
-                        {
-                                klmirqds[i].terminating = 1;
-                                mb(); /* just to be sure? */
-                                flush_pending(klmirqds[i].klmirqd, NULL);
-                                /* signal termination */
-                                kthread_stop(klmirqds[i].klmirqd);
-                        }
-        }
        }
-}
-int klmirqd_is_ready(void)
-{
-        return(atomic_read(&num_ready_klmirqds) == NR_LITMUS_SOFTIRQD);
-}
-int klmirqd_is_dead(void)
-{
-        return(atomic_read(&num_ready_klmirqds) == 0);
-}
-struct task_struct* get_klmirqd(unsigned int k_id)
-{
-        return(klmirqds[k_id].klmirqd);
-}
-void flush_pending(struct task_struct* klmirqd_thread,
-                                   struct task_struct* owner)
-{
-        unsigned int k_id = klmirqd_id(klmirqd_thread);
-        struct klmirqd_info *which = &klmirqds[k_id];
-        unsigned long flags;
-        struct tasklet_struct *list;
-        u32 work_flushed = 0;
        raw_spin_lock_irqsave(&which->lock, flags);
        //__dump_state(which, "flush_pending: before");
@@ -990,35 +830,27 @@ void flush_pending(struct task_struct* klmirqd_thread,
                        struct tasklet_struct *t = list;
                        list = list->next;
-                        if(likely((t->owner == owner) || (owner == NULL)))
+                        if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
                        {
-                                if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
+                                BUG();
-                                {
+                        }
-                                        BUG();
-                                }
-                                work_flushed |= LIT_TASKLET_HI;
+                        work_flushed |= LIT_TASKLET_HI;
-                                t->owner = NULL;
+                        t->owner = NULL;
-                                // WTF?
+                        // WTF?
-                                if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+                        if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
-                                {
+                        {
-                                        atomic_dec(&which->num_hi_pending);
+                                atomic_dec(&which->num_hi_pending);
-                                        ___tasklet_hi_schedule(t);
+                                ___tasklet_hi_schedule(t);
-                                }
-                                else
-                                {
-                                        TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
-                                        BUG();
-                                }
                        }
                        else
                        {
-                                TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__);
+                                TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
-                                // put back on queue.
+                                BUG();
-                                ___litmus_tasklet_hi_schedule(t, which, 0);
                        }
                }
        }
@@ -1038,34 +870,25 @@ void flush_pending(struct task_struct* klmirqd_thread,
                        struct tasklet_struct *t = list;
                        list = list->next;
-                        if(likely((t->owner == owner) || (owner == NULL)))
+                        if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
                        {
-                                if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
+                                BUG();
-                                {
+                        }
-                                        BUG();
-                                }
-                                work_flushed |= LIT_TASKLET_LOW;
+                        work_flushed |= LIT_TASKLET_LOW;
-                                t->owner = NULL;
+                        t->owner = NULL;
-                                sched_trace_tasklet_end(owner, 1ul);
+//                      sched_trace_tasklet_end(owner, 1ul);
-                                if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+                        if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
-                                {
+                        {
-                                        atomic_dec(&which->num_low_pending);
+                                atomic_dec(&which->num_low_pending);
-                                        ___tasklet_schedule(t);
+                                ___tasklet_schedule(t);
-                                }
-                                else
-                                {
-                                        TRACE("%s: dropped tasklet??\n", __FUNCTION__);
-                                        BUG();
-                                }
                        }
                        else
                        {
-                                TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__);
+                                TRACE("%s: dropped tasklet??\n", __FUNCTION__);
-                                // put back on queue
+                                BUG();
-                                ___litmus_tasklet_schedule(t, which, 0);
                        }
                }
        }
@@ -1083,21 +906,12 @@ void flush_pending(struct task_struct* klmirqd_thread,
                                list_first_entry(&which->worklist, struct work_struct, entry);
                        list_del_init(&work->entry);
-                        if(likely((work->owner == owner) || (owner == NULL)))
+                        work_flushed |= LIT_WORK;
-                        {
+                        atomic_dec(&which->num_work_pending);
-                                work_flushed |= LIT_WORK;
-                                atomic_dec(&which->num_work_pending);
-                                work->owner = NULL;
+                        work->owner = NULL;
-                                sched_trace_work_end(owner, current, 1ul);
+//                      sched_trace_work_end(owner, current, 1ul);
-                                __schedule_work(work);
+                        __schedule_work(work);
-                        }
-                        else
-                        {
-                                TRACE("%s: Could not flush a work object.\n", __FUNCTION__);
-                                // put back on queue
-                                ___litmus_schedule_work(work, which, 0);
-                        }
                }
        }
@@ -1106,22 +920,6 @@ void flush_pending(struct task_struct* klmirqd_thread,
        mb(); /* commit changes to pending flags */
-        /* reset the scheduling priority */
-        if(work_flushed)
-        {
-                __reeval_prio(which);
-                /* Try to offload flushed tasklets to Linux's ksoftirqd. */
-                if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI))
-                {
-                        wakeup_softirqd();
-                }
-        }
-        else
-        {
-                TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__);
-        }
        raw_spin_unlock_irqrestore(&which->lock, flags);
 }
@@ -1161,39 +959,27 @@ static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
    raw_spin_unlock_irqrestore(&which->lock, flags);
 }
-int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
+int __litmus_tasklet_schedule(struct tasklet_struct *t, struct task_struct* klmirqd_thread)
 {
        int ret = 0; /* assume failure */
-    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
+        struct klmirqd_info* info;
-    {
-        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
-        BUG();
-    }
-    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
+        if (unlikely(!is_realtime(klmirqd_thread) ||
-    {
+                !tsk_rt(klmirqd_thread)->is_interrupt_thread ||
-        TRACE("%s: No klmirqd_th%d!\n", __FUNCTION__, k_id);
+                !tsk_rt(klmirqd_thread)->klmirqd_info)) {
-        BUG();
+                TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid);
-    }
+                return ret;
+        }
-        if(likely(!klmirqds[k_id].terminating))
+        info = tsk_rt(klmirqd_thread)->klmirqd_info;
-        {
-                /* Can't accept tasklets while we're processing a workqueue
-                   because they're handled by the same thread. This case is
-                   very RARE.
-                   TODO: Use a separate thread for work objects!!!!!!
+        if (likely(!info->terminating)) {
-         */
+                ret = 1;
-                if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0))
+                ___litmus_tasklet_schedule(t, info, 1);
-                {
+        }
-                        ret = 1;
+        else {
-                        ___litmus_tasklet_schedule(t, &klmirqds[k_id], 1);
+                TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid);
-                }
-                else
-                {
-                        TRACE("%s: rejected tasklet because of pending work.\n",
-                                                __FUNCTION__);
-                }
        }
        return(ret);
 }
@@ -1230,100 +1016,77 @@ static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
    raw_spin_unlock_irqrestore(&which->lock, flags);
 }
-int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
+int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, struct task_struct* klmirqd_thread)
 {
        int ret = 0; /* assume failure */
-    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
+        struct klmirqd_info* info;
-    {
-        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
-        BUG();
-    }
-    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
+        if (unlikely(!is_realtime(klmirqd_thread) ||
-    {
+                !tsk_rt(klmirqd_thread)->is_interrupt_thread ||
-        TRACE("%s: No klmirqd_th%d!\n", __FUNCTION__, k_id);
+                !tsk_rt(klmirqd_thread)->klmirqd_info)) {
-        BUG();
+                TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid);
-    }
+                return ret;
+        }
-    if(unlikely(!klmirqd_is_ready()))
+        info = tsk_rt(klmirqd_thread)->klmirqd_info;
-    {
-        TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id);
-        BUG();
-    }
-        if(likely(!klmirqds[k_id].terminating))
+        if (likely(!info->terminating)) {
-        {
+                ret = 1;
-                if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0))
+                ___litmus_tasklet_hi_schedule(t, info, 1);
-                {
-                        ret = 1;
-                        ___litmus_tasklet_hi_schedule(t, &klmirqds[k_id], 1);
-                }
-                else
-                {
-                        TRACE("%s: rejected tasklet because of pending work.\n",
-                                                __FUNCTION__);
-                }
        }
+        else {
+                TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid);
+        }
        return(ret);
 }
 EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
-int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
+int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, struct task_struct* klmirqd_thread)
 {
        int ret = 0; /* assume failure */
        u32 old_pending;
+        struct klmirqd_info* info;
        BUG_ON(!irqs_disabled());
-    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
+        if (unlikely(!is_realtime(klmirqd_thread) ||
-    {
+                                 !tsk_rt(klmirqd_thread)->is_interrupt_thread ||
-        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
+                                 !tsk_rt(klmirqd_thread)->klmirqd_info)) {
-        BUG();
+                TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid);
-    }
+                return ret;
+        }
-    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
+        info = tsk_rt(klmirqd_thread)->klmirqd_info;
-    {
-        TRACE("%s: No klmirqd_th%u!\n", __FUNCTION__, k_id);
-        BUG();
-    }
-    if(unlikely(!klmirqd_is_ready()))
+        if (likely(!info->terminating)) {
-    {
-        TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id);
-        BUG();
-    }
-        if(likely(!klmirqds[k_id].terminating))
+        raw_spin_lock(&info->lock);
-        {
-        raw_spin_lock(&klmirqds[k_id].lock);
-                if(likely(atomic_read(&klmirqds[k_id].num_work_pending) == 0))
+                ret = 1;  // success!
-                {
-                        ret = 1;  // success!
-                        t->next = klmirqds[k_id].pending_tasklets_hi.head;
+                t->next = info->pending_tasklets_hi.head;
-            klmirqds[k_id].pending_tasklets_hi.head = t;
+        info->pending_tasklets_hi.head = t;
-                        old_pending = klmirqds[k_id].pending;
+                old_pending = info->pending;
-                        klmirqds[k_id].pending |= LIT_TASKLET_HI;
+                info->pending |= LIT_TASKLET_HI;
-                        atomic_inc(&klmirqds[k_id].num_hi_pending);
+                atomic_inc(&info->num_hi_pending);
-                        mb();
+                mb();
-                        if(!old_pending)
+                if(!old_pending) {
-                wakeup_litirqd_locked(&klmirqds[k_id]); /* wake up the klmirqd */
+                        wakeup_litirqd_locked(info); /* wake up the klmirqd */
-                }
-                else
-                {
-                        TRACE("%s: rejected tasklet because of pending work.\n",
-                                        __FUNCTION__);
                }
-        raw_spin_unlock(&klmirqds[k_id].lock);
+                raw_spin_unlock(&info->lock);
        }
+        else {
+                TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid);
+        }
        return(ret);
 }
@@ -1358,225 +1121,30 @@ static void ___litmus_schedule_work(struct work_struct *w,
        raw_spin_unlock_irqrestore(&which->lock, flags);
 }
-int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
+int __litmus_schedule_work(struct work_struct *w, struct task_struct* klmirqd_thread)
 {
        int ret = 1; /* assume success */
-        if(unlikely(w->owner == NULL) || !is_realtime(w->owner))
+        struct klmirqd_info* info;
-        {
-                TRACE("%s: No owner associated with this work object!\n", __FUNCTION__);
-                BUG();
-        }
-        if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
-        {
-                TRACE("%s: No klmirqd_th%u!\n", k_id);
-                BUG();
-        }
-    if(unlikely(!klmirqd_is_ready()))
-    {
-        TRACE("%s: klmirqd is not ready!\n", __FUNCTION__, k_id);
-        BUG();
-    }
-        if(likely(!klmirqds[k_id].terminating))
-                ___litmus_schedule_work(w, &klmirqds[k_id], 1);
-        else
-                ret = 0;
-        return(ret);
-}
-EXPORT_SYMBOL(__litmus_schedule_work);
-static int set_klmirqd_sem_status(unsigned long stat)
-{
-        TRACE_CUR("SETTING STATUS FROM %d TO %d\n",
-                                        atomic_read(&tsk_rt(current)->klmirqd_sem_stat),
-                                        stat);
-        atomic_set(&tsk_rt(current)->klmirqd_sem_stat, stat);
-        //mb();
-        return(0);
-}
-static int set_klmirqd_sem_status_if_not_held(unsigned long stat)
-{
-        if(atomic_read(&tsk_rt(current)->klmirqd_sem_stat) != HELD)
-        {
-                return(set_klmirqd_sem_status(stat));
-        }
-        return(-1);
-}
-void __down_and_reset_and_set_stat(struct task_struct* t,
-                                           enum klmirqd_sem_status to_reset,
-                                           enum klmirqd_sem_status to_set,
-                                           struct mutex* sem)
-{
-#if 0
-        struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem);
-        struct task_struct* task = container_of(param, struct task_struct, rt_param);
-        TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
-                                        __FUNCTION__, task->comm, task->pid);
-#endif
-        mutex_lock_sfx(sem,
-                                   set_klmirqd_sem_status_if_not_held, to_reset,
-                                   set_klmirqd_sem_status, to_set);
-#if 0
-        TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
-                                        __FUNCTION__, task->comm, task->pid);
-#endif
-}
-void down_and_set_stat(struct task_struct* t,
-                                           enum klmirqd_sem_status to_set,
-                                           struct mutex* sem)
-{
-#if 0
-        struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem);
-        struct task_struct* task = container_of(param, struct task_struct, rt_param);
-        TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
-                                        __FUNCTION__, task->comm, task->pid);
-#endif
-        mutex_lock_sfx(sem,
-                                   NULL, 0,
-                                   set_klmirqd_sem_status, to_set);
-#if 0
-        TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
-                                        __FUNCTION__, task->comm, task->pid);
-#endif
-}
-void up_and_set_stat(struct task_struct* t,
-                                         enum klmirqd_sem_status to_set,
-                                         struct mutex* sem)
-{
-#if 0
-        struct rt_param* param = container_of(sem, struct rt_param, klmirqd_sem);
-        struct task_struct* task = container_of(param, struct task_struct, rt_param);
-        TRACE_CUR("%s: entered.  Unlocking semaphore of %s/%d\n",
-                                        __FUNCTION__,
-                                        task->comm, task->pid);
-#endif
-        mutex_unlock_sfx(sem, NULL, 0,
-                                         set_klmirqd_sem_status, to_set);
-#if 0
-        TRACE_CUR("%s: exiting.  Unlocked semaphore of %s/%d\n",
-                                        __FUNCTION__,
-                                        task->comm, task->pid);
-#endif
-}
-void release_klmirqd_lock(struct task_struct* t)
-{
-        if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klmirqd_sem_stat) == HELD))
-        {
-                struct mutex* sem;
-                struct task_struct* owner = t;
-                if(t->state == TASK_RUNNING)
-                {
-                        TRACE_TASK(t, "NOT giving up klmirqd_sem because we're not blocked!\n");
-                        return;
-                }
-                if(likely(!tsk_rt(t)->is_proxy_thread))
+        if (unlikely(!is_realtime(klmirqd_thread) ||
-                {
+                                 !tsk_rt(klmirqd_thread)->is_interrupt_thread ||
-                        sem = &tsk_rt(t)->klmirqd_sem;
+                                 !tsk_rt(klmirqd_thread)->klmirqd_info)) {
-                }
+                TRACE("%s: %s/%d can't handle work items\n", klmirqd_thread->comm, klmirqd_thread->pid);
-                else
+                return ret;
-                {
-                        unsigned int k_id = klmirqd_id(t);
-                        owner = klmirqds[k_id].current_owner;
-                        BUG_ON(t != klmirqds[k_id].klmirqd);
-                        if(likely(owner))
-                        {
-                                sem = &tsk_rt(owner)->klmirqd_sem;
-                        }
-                        else
-                        {
-                                BUG();
-                                // We had the rug pulled out from under us.  Abort attempt
-                                // to reacquire the lock since our client no longer needs us.
-                                TRACE_CUR("HUH?!  How did this happen?\n");
-                                atomic_set(&tsk_rt(t)->klmirqd_sem_stat, NOT_HELD);
-                                return;
-                        }
-                }
-                //TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid);
-                up_and_set_stat(t, NEED_TO_REACQUIRE, sem);
-                //TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid);
-        }
-        /*
-        else if(is_realtime(t))
-        {
-                TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klmirqd_sem_stat);
        }
-        */
-}
-int reacquire_klmirqd_lock(struct task_struct* t)
+        info = tsk_rt(klmirqd_thread)->klmirqd_info;
-{
-        int ret = 0;
-        if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klmirqd_sem_stat) == NEED_TO_REACQUIRE))
-        {
-                struct mutex* sem;
-                struct task_struct* owner = t;
-                if(likely(!tsk_rt(t)->is_proxy_thread))
-                {
-                        sem = &tsk_rt(t)->klmirqd_sem;
-                }
-                else
-                {
-                        unsigned int k_id = klmirqd_id(t);
-                        //struct task_struct* owner = klmirqds[k_id].current_owner;
-                        owner = klmirqds[k_id].current_owner;
-                        BUG_ON(t != klmirqds[k_id].klmirqd);
-                        if(likely(owner))
-                        {
-                                sem = &tsk_rt(owner)->klmirqd_sem;
-                        }
-                        else
-                        {
-                                // We had the rug pulled out from under us.  Abort attempt
-                                // to reacquire the lock since our client no longer needs us.
-                                TRACE_CUR("No longer needs to reacquire klmirqd_sem!\n");
-                                atomic_set(&tsk_rt(t)->klmirqd_sem_stat, NOT_HELD);
-                                return(0);
-                        }
-                }
-                //TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid);
+        if (likely(!info->terminating)) {
-                __down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem);
+                ___litmus_schedule_work(w, info, 1);
-                //TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid);
        }
-        /*
+        else {
-        else if(is_realtime(t))
+                TRACE("%s: Work rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid);
-        {
+                ret = 0;
-                TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klmirqd_sem_stat);
        }
-        */
        return(ret);
 }
+EXPORT_SYMBOL(__litmus_schedule_work);
diff --git a/litmus/locking.c b/litmus/locking.c
index 22f46df4308a..7af1dd69a079 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -543,32 +543,54 @@ out:
 void suspend_for_lock(void)
 {
-#ifdef CONFIG_REALTIME_AUX_TASKS
+#if defined(CONFIG_REALTIME_AUX_TASKS) || defined(CONFIG_LITMUS_NVIDIA)
-#if 0
-        unsigned int restore = 0;
        struct task_struct *t = current;
-        unsigned int hide;
-        if (tsk_rt(t)->has_aux_tasks) {
-                /* hide from aux tasks so they can't inherit our priority when we block
-                 * for a litmus lock. inheritance is already going to a litmus lock
-                 * holder. */
-                hide = tsk_rt(t)->hide_from_aux_tasks;
-                restore = 1;
-                tsk_rt(t)->hide_from_aux_tasks = 1;
-        }
 #endif
+#ifdef CONFIG_REALTIME_AUX_TASKS
+        unsigned int aux_restore = 0;
+        unsigned int aux_hide;
+#endif
+#ifdef CONFIG_LITMUS_NVIDIA
+        unsigned int gpu_restore = 0;
+        unsigned int gpu_hide;
+#endif
+//#ifdef CONFIG_REALTIME_AUX_TASKS
+//      if (tsk_rt(t)->has_aux_tasks) {
+//              /* hide from aux tasks so they can't inherit our priority when we block
+//               * for a litmus lock. inheritance is already going to a litmus lock
+//               * holder. */
+//              aux_hide = tsk_rt(t)->hide_from_aux_tasks;
+//              aux_restore = 1;
+//              tsk_rt(t)->hide_from_aux_tasks = 1;
+//      }
+//#endif
+#ifdef CONFIG_LITMUS_NVIDIA
+        if (tsk_rt(t)->held_gpus) {
+                gpu_hide = tsk_rt(t)->hide_from_gpu;
+                gpu_restore = 1;
+                tsk_rt(t)->hide_from_gpu = 1;
+        }
 #endif
        schedule();
-#ifdef CONFIG_REALTIME_AUX_TASKS
+#ifdef CONFIG_LITMUS_NVIDIA
-#if 0
+        if (gpu_restore) {
-        if (restore) {
                /* restore our state */
-                tsk_rt(t)->hide_from_aux_tasks = hide;
+                tsk_rt(t)->hide_from_gpu = gpu_hide;
        }
 #endif
+#ifdef CONFIG_REALTIME_AUX_TASKS
+        if (aux_restore) {
+                /* restore our state */
+                tsk_rt(t)->hide_from_aux_tasks = aux_hide;
+        }
 #endif
 }
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 22586cde8255..b29f4d3f0dac 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -10,6 +10,10 @@
 #include <litmus/binheap.h>
+#ifdef CONFIG_LITMUS_SOFTIRQD
+#include <litmus/litmus_softirq.h>
+#endif
 typedef unsigned char      NvV8;  /* "void": enumerated or multiple fields   */
 typedef unsigned short     NvV16; /* "void": enumerated or multiple fields   */
 typedef unsigned char      NvU8;  /* 0 to 255                                */
@@ -296,9 +300,14 @@ static struct notifier_block nvidia_going = {
 };
 #endif
+static int init_nv_device_reg(void);
+static int shutdown_nv_device_reg(void);
 int init_nvidia_info(void)
 {
-#if 1
        mutex_lock(&module_mutex);
        nvidia_mod = find_module("nvidia");
        mutex_unlock(&module_mutex);
@@ -315,13 +324,14 @@ int init_nvidia_info(void)
                TRACE("%s : Could not find NVIDIA module!  Loaded?\n", __FUNCTION__);
                return(-1);
        }
-#endif
 }
 void shutdown_nvidia_info(void)
 {
        nvidia_mod = NULL;
        mb();
+        shutdown_nv_device_reg();
 }
 /* works with pointers to static data inside the module too. */
@@ -351,20 +361,6 @@ u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
        BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
        return(linuxstate->device_num);
-        //int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
-#if 0
-        // offset determined though observed behavior of the NV driver.
-        //const int DEVICE_NUM_OFFSET = 0x480;  // CUDA 4.0 RC1
-        //const int DEVICE_NUM_OFFSET = 0x510;  // CUDA 4.0 RC2
-        void* state = (void*)(t->data);
-        void* device_num_ptr = state + DEVICE_NUM_OFFSET;
-        //dump_nvidia_info(t);
-        return(*((u32*)device_num_ptr));
-#endif
 }
 u32 get_work_nv_device_num(const struct work_struct *t)
@@ -377,203 +373,452 @@ u32 get_work_nv_device_num(const struct work_struct *t)
 }
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
 typedef struct {
-        raw_spinlock_t  lock;
+        raw_spinlock_t  lock;  /* not needed if GPU not shared between scheudling domains */
-        int     nr_owners;
+        struct binheap  owners;
-        struct task_struct* max_prio_owner;
-        struct task_struct*     owners[NV_MAX_SIMULT_USERS];
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        klmirqd_callback_t callback;
+        struct task_struct* thread;
+        int ready:1;  /* todo: make threads check for the ready flag */
+#endif
 }nv_device_registry_t;
 static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
-int init_nv_device_reg(void)
+#ifdef CONFIG_LITMUS_SOFTIRQD
+static int nvidia_klmirqd_cb(void *arg)
 {
-        int i;
+        unsigned long flags;
+        int reg_device_id = (int)(long long)(arg);
+        nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
-        memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
+        TRACE("nv klmirqd callback for GPU %d\n", reg_device_id);
-        for(i = 0; i < NV_DEVICE_NUM; ++i)
+        raw_spin_lock_irqsave(&reg->lock, flags);
-        {
+        reg->thread = current;
-                raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
+        reg->ready = 1;
-        }
+        raw_spin_unlock_irqrestore(&reg->lock, flags);
-        return(1);
+        return 0;
 }
+#endif
-/* use to get nv_device_id by given owner.
- (if return -1, can't get the assocaite device id)*/
+static int gpu_owner_max_priority_order(struct binheap_node *a,
-/*
+                                                                                        struct binheap_node *b)
-int get_nv_device_id(struct task_struct* owner)
 {
-        int i;
+        struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, gpu_owner_node),
-        if(!owner)
+                                                                                   struct task_struct, rt_param);
-        {
+        struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, gpu_owner_node),
-                return(-1);
+                                                                                   struct task_struct, rt_param);
-        }
-        for(i = 0; i < NV_DEVICE_NUM; ++i)
+        BUG_ON(!d_a);
-        {
+        BUG_ON(!d_b);
-                if(NV_DEVICE_REG[i].device_owner == owner)
-                        return(i);
+        return litmus->compare(d_a, d_b);
-        }
-        return(-1);
 }
-*/
-static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_struct *skip) {
+static int init_nv_device_reg(void)
+{
        int i;
-        struct task_struct *found = NULL;
-        for(i = 0; i < reg->nr_owners; ++i) {
+#ifdef CONFIG_LITMUS_SOFTIRQD
-                if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
+        if (!klmirqd_is_ready()) {
-                        found = reg->owners[i];
+                TRACE("klmirqd is not ready!\n");
+                return 0;
+        }
+#endif
+        memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
+        mb();
+        for(i = 0; i < NV_DEVICE_NUM; ++i) {
+                raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
+                INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order);
+#ifdef CONFIG_LITMUS_SOFTIRQD
+                // TODO: Make thread spawning this a litmus plugin call.
+                NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb;
+                NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i);
+                mb();
+                if(launch_klmirqd_thread(0, &NV_DEVICE_REG[i].callback) != 0) {
+                        TRACE("Failed to create klmirqd thread for GPU %d\n", i);
                }
+#endif
        }
-        return found;
+        return(1);
 }
-#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
-void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
+/* The following code is full of nasty race conditions... */
+/* spawning of klimirqd threads can race with init_nv_device_reg()!!!! */
+static int shutdown_nv_device_reg(void)
 {
-        unsigned long flags;
+        TRACE("Shutting down nv device registration.\n");
-        nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        {
+                int i;
+                nv_device_registry_t *reg;
-        if(reg->max_prio_owner != t) {
+                for (i = 0; i < NV_DEVICE_NUM; ++i) {
-                raw_spin_lock_irqsave(&reg->lock, flags);
+                        TRACE("Shutting down GPU %d.\n", i);
-                if(reg->max_prio_owner != t) {
+                        reg = &NV_DEVICE_REG[i];
-                        if(litmus->compare(t, reg->max_prio_owner)) {
-                                litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
+                        if (reg->thread && reg->ready) {
-                                reg->max_prio_owner = t;
+                                kill_klmirqd_thread(reg->thread);
+                                /* assume that all goes according to plan... */
+                                reg->thread = NULL;
+                                reg->ready = 0;
                        }
-                }
-                raw_spin_unlock_irqrestore(&reg->lock, flags);
+                        while (!binheap_empty(&reg->owners)) {
+                                binheap_delete_root(&reg->owners, struct rt_param, gpu_owner_node);
+                        }
+                }
        }
+#endif
+        return(1);
 }
-void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
+/* use to get the owner of nv_device_id. */
+struct task_struct* get_nv_max_device_owner(u32 target_device_id)
 {
-        unsigned long flags;
+        struct task_struct *owner = NULL;
-        nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
+        nv_device_registry_t *reg;
-        if(reg->max_prio_owner == t) {
+        BUG_ON(target_device_id >= NV_DEVICE_NUM);
-                raw_spin_lock_irqsave(&reg->lock, flags);
+        reg = &NV_DEVICE_REG[target_device_id];
-                if(reg->max_prio_owner == t) {
+        if (!binheap_empty(&reg->owners)) {
-                        reg->max_prio_owner = find_hp_owner(reg, NULL);
+                struct task_struct *hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
-                        if(reg->max_prio_owner != t) {
+                                                                                          struct task_struct, rt_param);
-                                litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
+                TRACE_CUR("hp: %s/%d\n", hp->comm, hp->pid);
-                        }
+        }
-                }
-                raw_spin_unlock_irqrestore(&reg->lock, flags);
+        return(owner);
+}
+#ifdef CONFIG_LITMUS_SOFTIRQD
+struct task_struct* get_nv_klmirqd_thread(u32 target_device_id)
+{
+        struct task_struct *klmirqd = NULL;
+        nv_device_registry_t *reg;
+        BUG_ON(target_device_id >= NV_DEVICE_NUM);
+        reg = &NV_DEVICE_REG[target_device_id];
+        if(likely(reg->ready)) {
+                klmirqd = reg->thread;
        }
+        return klmirqd;
 }
 #endif
-static int __reg_nv_device(int reg_device_id, struct task_struct *t)
+#ifdef CONFIG_LITMUS_SOFTIRQD
+static int gpu_klmirqd_increase_priority(struct task_struct *klmirqd, struct task_struct *hp)
 {
-        int ret = 0;
+        int retval = 0;
-        int i;
-        struct task_struct *old_max = NULL;
-        unsigned long flags;
-        nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
-    if(test_bit(reg_device_id, &tsk_rt(t)->held_gpus)) {
+        TRACE_CUR("Increasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid);
-                // TODO: check if taks is already registered.
-                return ret;  // assume already registered.
-        }
+        /* the klmirqd thread should never attempt to hold a litmus-level real-time
+         * so nested support is not required */
+        retval = litmus->__increase_prio(klmirqd, hp);
-        raw_spin_lock_irqsave(&reg->lock, flags);
+        return retval;
+}
+static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct task_struct *hp)
+{
+        int retval = 0;
-        if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
+        TRACE_CUR("Decreasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid);
-                TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
-                for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
-                        if(reg->owners[i] == NULL) {
-                                reg->owners[i] = t;
-                                //if(edf_higher_prio(t, reg->max_prio_owner)) {
+        /* the klmirqd thread should never attempt to hold a litmus-level real-time
-                                if(litmus->compare(t, reg->max_prio_owner)) {
+         * so nested support is not required */
-                                        old_max = reg->max_prio_owner;
+        retval = litmus->__decrease_prio(klmirqd, hp);
-                                        reg->max_prio_owner = t;
-#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+        return retval;
-                                        litmus->change_prio_pai_tasklet(old_max, t);
+}
 #endif
-                                }
+/* call when an aux_owner becomes real-time */
+long enable_gpu_owner(struct task_struct *t)
+{
+        long retval = 0;
+//      unsigned long flags;
+        int gpu;
+        nv_device_registry_t *reg;
 #ifdef CONFIG_LITMUS_SOFTIRQD
-                                down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem);
+        struct task_struct *hp;
 #endif
-                                ++(reg->nr_owners);
-                                break;
+        if (!tsk_rt(t)->held_gpus) {
-                        }
+                TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid);
-                }
+                return -1;
        }
-        else
-        {
+        BUG_ON(!is_realtime(t));
-                TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
-                //ret = -EBUSY;
+        gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
+        if (binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
+                TRACE_CUR("task %s/%d is already active on GPU %d\n", t->comm, t->pid, gpu);
+                goto out;
        }
-        raw_spin_unlock_irqrestore(&reg->lock, flags);
+        /* update the registration (and maybe klmirqd) */
+        reg = &NV_DEVICE_REG[gpu];
-        __set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
+//      raw_spin_lock_irqsave(&reg->lock, flags);
-        return(ret);
+        binheap_add(&tsk_rt(t)->gpu_owner_node, &reg->owners,
+                                struct rt_param, gpu_owner_node);
+#ifdef CONFIG_LITMUS_SOFTIRQD
+        hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+                                                  struct task_struct, rt_param);
+        if (hp == t) {
+                /* we're the new hp */
+                TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu);
+                retval = gpu_klmirqd_increase_priority(reg->thread, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
+        }
+#endif
+//      raw_spin_unlock_irqsave(&reg->lock, flags);
+out:
+        return retval;
 }
-static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
+/* call when an aux_owner exits real-time */
+long disable_gpu_owner(struct task_struct *t)
 {
-        int ret = 0;
+        long retval = 0;
-        int i;
+//      unsigned long flags;
-        unsigned long flags;
+        int gpu;
-        nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
+        nv_device_registry_t *reg;
 #ifdef CONFIG_LITMUS_SOFTIRQD
-    struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id);
+        struct task_struct *hp;
+        struct task_struct *new_hp = NULL;
 #endif
-        if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
+        if (!tsk_rt(t)->held_gpus) {
-                return ret;
+                TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid);
+                return -1;
        }
-        raw_spin_lock_irqsave(&reg->lock, flags);
+        BUG_ON(!is_realtime(t));
-        TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
+        gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
+        if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
+                TRACE_CUR("task %s/%d is not active on GPU %d\n", t->comm, t->pid, gpu);
+                goto out;
+        }
+        TRACE_CUR("task %s/%d exiting from GPU %d.\n", t->comm, t->pid, gpu);
+        reg = &NV_DEVICE_REG[gpu];
+//      raw_spin_lock_irqsave(&reg->lock, flags);
-        for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
-                if(reg->owners[i] == t) {
-#ifdef CONFIG_LITMUS_SOFTIRQD
-                        flush_pending(klmirqd_th, t);
-#endif
-                        if(reg->max_prio_owner == t) {
-                                reg->max_prio_owner = find_hp_owner(reg, t);
-#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
-                                litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
-#endif
-                        }
 #ifdef CONFIG_LITMUS_SOFTIRQD
-                        up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem);
+        hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+                                          struct task_struct, rt_param);
+        binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
+        if (!binheap_empty(&reg->owners)) {
+                new_hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+                                                          struct task_struct, rt_param);
+        }
+        if (hp == t && new_hp != t) {
+                struct task_struct *to_inh = NULL;
+                TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu);
+                if (new_hp) {
+                        to_inh = (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp;
+                }
+                retval = gpu_klmirqd_decrease_priority(reg->thread, to_inh);
+        }
+#else
+        binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
 #endif
-                        reg->owners[i] = NULL;
+//      raw_spin_unlock_irqsave(&reg->lock, flags);
-                        --(reg->nr_owners);
+out:
+        return retval;
+}
+int gpu_owner_increase_priority(struct task_struct *t)
+{
+        int retval = 0;
+        int gpu;
+        nv_device_registry_t *reg;
+        struct task_struct *hp = NULL;
+        struct task_struct *hp_eff = NULL;
+        BUG_ON(!is_realtime(t));
+        BUG_ON(!tsk_rt(t)->held_gpus);
+        gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
+        if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
+                WARN_ON(!is_running(t));
+                TRACE_CUR("gpu klmirqd may not inherit from %s/%d on GPU %d\n",
+                                  t->comm, t->pid, gpu);
+                goto out;
+        }
+        TRACE_CUR("task %s/%d on GPU %d increasing priority.\n", t->comm, t->pid, gpu);
+        reg = &NV_DEVICE_REG[gpu];
+        hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+                                          struct task_struct, rt_param);
+        hp_eff = effective_priority(hp);
+        if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */
+                binheap_decrease(&tsk_rt(t)->gpu_owner_node, &reg->owners);
+        }
+        hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+                                          struct task_struct, rt_param);
+        if (effective_priority(hp) != hp_eff) { /* the eff. prio. of hp has changed */
+                hp_eff = effective_priority(hp);
+                TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu);
+                retval = gpu_klmirqd_increase_priority(reg->thread, hp_eff);
+        }
+out:
+        return retval;
+}
+int gpu_owner_decrease_priority(struct task_struct *t)
+{
+        int retval = 0;
+        int gpu;
+        nv_device_registry_t *reg;
+        struct task_struct *hp = NULL;
+        struct task_struct *hp_eff = NULL;
-                        break;
+        BUG_ON(!is_realtime(t));
+        BUG_ON(!tsk_rt(t)->held_gpus);
+        gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
+        if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) {
+                WARN_ON(!is_running(t));
+                TRACE_CUR("aux tasks may not inherit from %s/%d on GPU %d\n",
+                                  t->comm, t->pid, gpu);
+                goto out;
+        }
+        TRACE_CUR("task %s/%d on GPU %d decresing priority.\n", t->comm, t->pid, gpu);
+        reg = &NV_DEVICE_REG[gpu];
+        hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+                                          struct task_struct, rt_param);
+        hp_eff = effective_priority(hp);
+        binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
+        binheap_add(&tsk_rt(t)->gpu_owner_node, &reg->owners,
+                                struct rt_param, gpu_owner_node);
+        if (hp == t) { /* t was originally the hp */
+                struct task_struct *new_hp =
+                        container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
+                                         struct task_struct, rt_param);
+                if (effective_priority(new_hp) != hp_eff) { /* eff prio. of hp has changed */
+                        hp_eff = effective_priority(new_hp);
+                        TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu);
+                        retval = gpu_klmirqd_decrease_priority(reg->thread, hp_eff);
                }
        }
-        raw_spin_unlock_irqrestore(&reg->lock, flags);
+out:
+        return retval;
+}
+static int __reg_nv_device(int reg_device_id, struct task_struct *t)
+{
+        __set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
+        return(0);
+}
+static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
+{
        __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
-        return(ret);
+        return(0);
 }
@@ -596,55 +841,213 @@ int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
        return(ret);
 }
-/* use to get the owner of nv_device_id. */
-struct task_struct* get_nv_max_device_owner(u32 target_device_id)
-{
-        struct task_struct *owner = NULL;
-        BUG_ON(target_device_id >= NV_DEVICE_NUM);
-        owner = NV_DEVICE_REG[target_device_id].max_prio_owner;
-        return(owner);
-}
-void lock_nv_registry(u32 target_device_id, unsigned long* flags)
-{
-        BUG_ON(target_device_id >= NV_DEVICE_NUM);
-        if(in_interrupt())
-                TRACE("Locking registry for %d.\n", target_device_id);
-        else
-                TRACE_CUR("Locking registry for %d.\n", target_device_id);
-        raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
-}
-void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
-{
-        BUG_ON(target_device_id >= NV_DEVICE_NUM);
-        if(in_interrupt())
-                TRACE("Unlocking registry for %d.\n", target_device_id);
-        else
-                TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
-        raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
-}
-//void increment_nv_int_count(u32 device)
+#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+//void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
+//{
+//      unsigned long flags;
+//      nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
+//
+//
+//
+//      if(reg->max_prio_owner != t) {
+//
+//              raw_spin_lock_irqsave(&reg->lock, flags);
+//
+//              if(reg->max_prio_owner != t) {
+//                      if(litmus->compare(t, reg->max_prio_owner)) {
+//                              litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
+//                              reg->max_prio_owner = t;
+//                      }
+//              }
+//
+//              raw_spin_unlock_irqrestore(&reg->lock, flags);
+//      }
+//}
+//
+//
+//void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
 //{
 //      unsigned long flags;
-//      struct task_struct* owner;
+//      nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
 //
-//      lock_nv_registry(device, &flags);
+//      if(reg->max_prio_owner == t) {
 //
-//      owner = NV_DEVICE_REG[device].device_owner;
+//              raw_spin_lock_irqsave(&reg->lock, flags);
-//      if(owner)
+//
+//              if(reg->max_prio_owner == t) {
+//                      reg->max_prio_owner = find_hp_owner(reg, NULL);
+//                      if(reg->max_prio_owner != t) {
+//                              litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
+//                      }
+//              }
+//
+//              raw_spin_unlock_irqrestore(&reg->lock, flags);
+//      }
+//}
+#endif
+//static int __reg_nv_device(int reg_device_id, struct task_struct *t)
+//{
+//      int ret = 0;
+//      int i;
+//      struct task_struct *old_max = NULL;
+//
+//
+//      raw_spin_lock_irqsave(&reg->lock, flags);
+//
+//      if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
+//              TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
+//              for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
+//                      if(reg->owners[i] == NULL) {
+//                              reg->owners[i] = t;
+//
+//                              //if(edf_higher_prio(t, reg->max_prio_owner)) {
+//                              if(litmus->compare(t, reg->max_prio_owner)) {
+//                                      old_max = reg->max_prio_owner;
+//                                      reg->max_prio_owner = t;
+//
+//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+//                                      litmus->change_prio_pai_tasklet(old_max, t);
+//#endif
+//                              }
+//
+//#ifdef CONFIG_LITMUS_SOFTIRQD
+//                              down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem);
+//#endif
+//                              ++(reg->nr_owners);
+//
+//                              break;
+//                      }
+//              }
+//      }
+//      else
 //      {
-//              atomic_inc(&tsk_rt(owner)->nv_int_count);
+//              TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
+//              //ret = -EBUSY;
 //      }
 //
-//      unlock_nv_registry(device, &flags);
+//      raw_spin_unlock_irqrestore(&reg->lock, flags);
+//
+//      __set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
+//
+//      return(ret);
+//}
+//
+//static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
+//{
+//      int ret = 0;
+//      int i;
+//      unsigned long flags;
+//      nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
+//
+//#ifdef CONFIG_LITMUS_SOFTIRQD
+//    struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id);
+//#endif
+//
+//      if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
+//              return ret;
+//      }
+//
+//      raw_spin_lock_irqsave(&reg->lock, flags);
+//
+//      TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
+//
+//      for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
+//              if(reg->owners[i] == t) {
+//#ifdef CONFIG_LITMUS_SOFTIRQD
+//                      flush_pending(klmirqd_th, t);
+//#endif
+//                      if(reg->max_prio_owner == t) {
+//                              reg->max_prio_owner = find_hp_owner(reg, t);
+//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
+//                              litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
+//#endif
+//                      }
+//
+//#ifdef CONFIG_LITMUS_SOFTIRQD
+//                      up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem);
+//#endif
+//
+//                      reg->owners[i] = NULL;
+//                      --(reg->nr_owners);
+//
+//                      break;
+//              }
+//      }
+//
+//      raw_spin_unlock_irqrestore(&reg->lock, flags);
+//
+//      __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
+//
+//      return(ret);
+//}
+//
+//
+//int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
+//{
+//      int ret;
+//
+//      if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
+//      {
+//              if(reg_action)
+//                      ret = __reg_nv_device(reg_device_id, t);
+//              else
+//                      ret = __clear_reg_nv_device(reg_device_id, t);
+//      }
+//      else
+//      {
+//              ret = -ENODEV;
+//      }
+//
+//      return(ret);
+//}
+//void lock_nv_registry(u32 target_device_id, unsigned long* flags)
+//{
+//      BUG_ON(target_device_id >= NV_DEVICE_NUM);
+//
+//      if(in_interrupt())
+//              TRACE("Locking registry for %d.\n", target_device_id);
+//      else
+//              TRACE_CUR("Locking registry for %d.\n", target_device_id);
+//
+//      raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
+//}
+//
+//void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
+//{
+//      BUG_ON(target_device_id >= NV_DEVICE_NUM);
+//
+//      if(in_interrupt())
+//              TRACE("Unlocking registry for %d.\n", target_device_id);
+//      else
+//              TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
+//
+//      raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
 //}
-//EXPORT_SYMBOL(increment_nv_int_count);
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 44c8336c5061..84aafca78cde 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -44,6 +44,7 @@
 #include <litmus/bheap.h>
 #include <litmus/binheap.h>
+#include <litmus/trace.h>
 #ifdef CONFIG_LITMUS_LOCKING
 #include <litmus/kfmlp_lock.h>
@@ -75,7 +76,6 @@
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 #include <linux/interrupt.h>
-#include <litmus/trace.h>
 #endif
 #ifdef CONFIG_LITMUS_NVIDIA
@@ -118,14 +118,6 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
 #define test_will_schedule(cpu) \
        (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
-#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
-struct tasklet_head
-{
-        struct tasklet_struct *head;
-        struct tasklet_struct **tail;
-};
-#endif
 /*
 * In C-EDF there is a cedf domain _per_ cluster
 * The number of clusters is dynamically determined accordingly to the
@@ -1038,6 +1030,13 @@ static void cedf_task_wake_up(struct task_struct *task)
        }
 #endif
+#ifdef CONFIG_LITMUS_NVIDIA
+        if (tsk_rt(task)->held_gpus && !tsk_rt(task)->hide_from_gpu) {
+                TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", task->comm, task->pid);
+                disable_gpu_owner(task);
+        }
+#endif
        cedf_job_arrival(task);
        raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
 }
@@ -1064,6 +1063,14 @@ static void cedf_task_block(struct task_struct *t)
        }
 #endif
+#ifdef CONFIG_LITMUS_NVIDIA
+        if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) {
+                TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
+                enable_gpu_owner(t);
+        }
+#endif
        raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
        BUG_ON(!is_realtime(t));
@@ -1092,6 +1099,13 @@ static void cedf_task_exit(struct task_struct * t)
        }
 #endif
+#ifdef CONFIG_LITMUS_NVIDIA
+        /* make sure we clean up on our way out */
+        if(tsk_rt(t)->held_gpus) {
+                disable_gpu_owner(t);
+        }
+#endif
        unlink(t);
        if (tsk_rt(t)->scheduled_on != NO_CPU) {
                cpu_entry_t *cpu;
@@ -1208,6 +1222,13 @@ static int __increase_priority_inheritance(struct task_struct* t,
                                aux_task_owner_increase_priority(t);
                        }
 #endif
+#ifdef CONFIG_LITMUS_NVIDIA
+                        /* propagate to gpu klmirqd */
+                        if (tsk_rt(t)->held_gpus) {
+                                gpu_owner_increase_priority(t);
+                        }
+#endif
                }
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
        }
@@ -1237,16 +1258,6 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str
        __increase_priority_inheritance(t, prio_inh);
-#ifdef CONFIG_LITMUS_SOFTIRQD
-        if(tsk_rt(t)->cur_klmirqd != NULL)
-        {
-                TRACE_TASK(t, "%s/%d inherits a new priority!\n",
-                                   tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid);
-                __increase_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh);
-        }
-#endif
        raw_spin_unlock(&cluster->cluster_lock);
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
@@ -1320,6 +1331,13 @@ static int __decrease_priority_inheritance(struct task_struct* t,
                }
 #endif
+#ifdef CONFIG_LITMUS_NVIDIA
+                /* propagate to gpu */
+                if (tsk_rt(t)->held_gpus) {
+                        gpu_owner_decrease_priority(t);
+                }
+#endif
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
        }
        else {
@@ -1346,16 +1364,6 @@ static void decrease_priority_inheritance(struct task_struct* t,
        raw_spin_lock(&cluster->cluster_lock);
        __decrease_priority_inheritance(t, prio_inh);
-#ifdef CONFIG_LITMUS_SOFTIRQD
-        if(tsk_rt(t)->cur_klmirqd != NULL)
-        {
-                TRACE_TASK(t, "%s/%d decreases in priority!\n",
-                                   tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid);
-                __decrease_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh);
-        }
-#endif
        raw_spin_unlock(&cluster->cluster_lock);
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
@@ -1371,73 +1379,6 @@ static void decrease_priority_inheritance(struct task_struct* t,
 }
-#ifdef CONFIG_LITMUS_SOFTIRQD
-/* called with IRQs off */
-static void increase_priority_inheritance_klmirqd(struct task_struct* klmirqd,
-                                                                                          struct task_struct* old_owner,
-                                                                                          struct task_struct* new_owner)
-{
-        cedf_domain_t* cluster = task_cpu_cluster(klmirqd);
-        BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread));
-        raw_spin_lock(&cluster->cluster_lock);
-        if(old_owner != new_owner)
-        {
-                if(old_owner)
-                {
-                        // unreachable?
-                        tsk_rt(old_owner)->cur_klmirqd = NULL;
-                }
-                TRACE_TASK(klmirqd, "giving ownership to %s/%d.\n",
-                                   new_owner->comm, new_owner->pid);
-                tsk_rt(new_owner)->cur_klmirqd = klmirqd;
-        }
-        __decrease_priority_inheritance(klmirqd, NULL);  // kludge to clear out cur prio.
-        __increase_priority_inheritance(klmirqd,
-                        (tsk_rt(new_owner)->inh_task == NULL) ?
-                                new_owner :
-                                tsk_rt(new_owner)->inh_task);
-        raw_spin_unlock(&cluster->cluster_lock);
-}
-/* called with IRQs off */
-static void decrease_priority_inheritance_klmirqd(struct task_struct* klmirqd,
-                                                                                                   struct task_struct* old_owner,
-                                                                                                   struct task_struct* new_owner)
-{
-        cedf_domain_t* cluster = task_cpu_cluster(klmirqd);
-        BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread));
-        raw_spin_lock(&cluster->cluster_lock);
-    TRACE_TASK(klmirqd, "priority restored\n");
-        __decrease_priority_inheritance(klmirqd, new_owner);
-        tsk_rt(old_owner)->cur_klmirqd = NULL;
-        raw_spin_unlock(&cluster->cluster_lock);
-}
-#endif // CONFIG_LITMUS_SOFTIRQD
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 /* called with IRQs off */
@@ -1836,33 +1777,7 @@ static long cedf_activate_plugin(void)
        }
 #ifdef CONFIG_LITMUS_SOFTIRQD
-        {
+        init_klmirqd();
-                /* distribute the daemons evenly across the clusters. */
-                int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
-                int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
-                int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
-                int daemon = 0;
-                for(i = 0; i < num_clusters; ++i)
-                {
-                        int num_on_this_cluster = num_daemons_per_cluster;
-                        if(left_over)
-                        {
-                                ++num_on_this_cluster;
-                                --left_over;
-                        }
-                        for(j = 0; j < num_on_this_cluster; ++j)
-                        {
-                                // first CPU of this cluster
-                                affinity[daemon++] = i*cluster_size;
-                        }
-                }
-                spawn_klmirqd(affinity);
-                kfree(affinity);
-        }
 #endif
 #ifdef CONFIG_LITMUS_NVIDIA
@@ -1906,10 +1821,6 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
        .allocate_aff_obs = cedf_allocate_affinity_observer,
 #endif
-#ifdef CONFIG_LITMUS_SOFTIRQD
-        .increase_prio_klmirqd = increase_priority_inheritance_klmirqd,
-        .decrease_prio_klmirqd = decrease_priority_inheritance_klmirqd,
-#endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
        .enqueue_pai_tasklet = cedf_enqueue_pai_tasklet,
        .change_prio_pai_tasklet = cedf_change_prio_pai_tasklet,
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index d52be9325044..f27c104ea027 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -26,6 +26,7 @@
 #include <litmus/bheap.h>
 #include <litmus/binheap.h>
+#include <litmus/trace.h>
 #ifdef CONFIG_LITMUS_LOCKING
 #include <litmus/kfmlp_lock.h>
@@ -50,7 +51,6 @@
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 #include <linux/interrupt.h>
-#include <litmus/trace.h>
 #endif
 #ifdef CONFIG_LITMUS_NVIDIA
@@ -156,12 +156,6 @@ static raw_spinlock_t* gsnedf_get_dgl_spinlock(struct task_struct *t)
 #endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
-struct tasklet_head
-{
-        struct tasklet_struct *head;
-        struct tasklet_struct **tail;
-};
 struct tasklet_head gsnedf_pending_tasklets;
 #endif
@@ -938,13 +932,6 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
        sched_state_task_picked();
-#if 0
-        if (next && is_realtime(next) && tsk_rt(next)->is_aux_task && !tsk_rt(next)->inh_task) {
-                TRACE_TASK(next, "is aux with no inheritance. preventing it from actually running.\n");
-                next = NULL;
-        }
-#endif
        raw_spin_unlock(&gsnedf_lock);
 #ifdef WANT_ALL_SCHED_EVENTS
@@ -1056,6 +1043,13 @@ static void gsnedf_task_wake_up(struct task_struct *task)
        }
 #endif
+#ifdef CONFIG_LITMUS_NVIDIA
+        if (tsk_rt(task)->held_gpus && !tsk_rt(task)->hide_from_gpu) {
+                TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", task->comm, task->pid);
+                disable_gpu_owner(task);
+        }
+#endif
        gsnedf_job_arrival(task);
        raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
 }
@@ -1079,6 +1073,14 @@ static void gsnedf_task_block(struct task_struct *t)
        }
 #endif
+#ifdef CONFIG_LITMUS_NVIDIA
+        if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) {
+                TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
+                enable_gpu_owner(t);
+        }
+#endif
        raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
        BUG_ON(!is_realtime(t));
@@ -1106,6 +1108,13 @@ static void gsnedf_task_exit(struct task_struct * t)
        }
 #endif
+#ifdef CONFIG_LITMUS_NVIDIA
+        /* make sure we clean up on our way out */
+        if(tsk_rt(t)->held_gpus) {
+                disable_gpu_owner(t);
+        }
+#endif
        unlink(t);
        if (tsk_rt(t)->scheduled_on != NO_CPU) {
                gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
@@ -1154,7 +1163,6 @@ static int __increase_priority_inheritance(struct task_struct* t,
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
        /* this sanity check allows for weaker locking in protocols */
-        /* TODO (klmirqd): Skip this check if 't' is a proxy thread (???) */
        if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
 #endif
                TRACE_TASK(t, "inherits priority from %s/%d\n",
@@ -1218,6 +1226,14 @@ static int __increase_priority_inheritance(struct task_struct* t,
                                aux_task_owner_increase_priority(t);
                        }
 #endif
+#ifdef CONFIG_LITMUS_NVIDIA
+                        /* propagate to gpu klmirqd */
+                        if (tsk_rt(t)->held_gpus) {
+                                gpu_owner_increase_priority(t);
+                        }
+#endif
                }
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
        }
@@ -1247,16 +1263,6 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str
        success = __increase_priority_inheritance(t, prio_inh);
-#ifdef CONFIG_LITMUS_SOFTIRQD
-        if(tsk_rt(t)->cur_klmirqd != NULL)
-        {
-                TRACE_TASK(t, "%s/%d inherits a new priority!\n",
-                                tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid);
-                __increase_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh);
-        }
-#endif
        raw_spin_unlock(&gsnedf_lock);
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
@@ -1330,6 +1336,14 @@ static int __decrease_priority_inheritance(struct task_struct* t,
                }
 #endif
+#ifdef CONFIG_LITMUS_NVIDIA
+                /* propagate to gpu */
+                if (tsk_rt(t)->held_gpus) {
+                        gpu_owner_decrease_priority(t);
+                }
+#endif
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
        }
        else {
@@ -1357,16 +1371,6 @@ static void decrease_priority_inheritance(struct task_struct* t,
        success = __decrease_priority_inheritance(t, prio_inh);
-#ifdef CONFIG_LITMUS_SOFTIRQD
-        if(tsk_rt(t)->cur_klmirqd != NULL)
-        {
-                TRACE_TASK(t, "%s/%d decreases in priority!\n",
-                                   tsk_rt(t)->cur_klmirqd->comm, tsk_rt(t)->cur_klmirqd->pid);
-                __decrease_priority_inheritance(tsk_rt(t)->cur_klmirqd, prio_inh);
-        }
-#endif
        raw_spin_unlock(&gsnedf_lock);
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
@@ -1382,62 +1386,6 @@ static void decrease_priority_inheritance(struct task_struct* t,
 }
-#ifdef CONFIG_LITMUS_SOFTIRQD
-/* called with IRQs off */
-static void increase_priority_inheritance_klmirqd(struct task_struct* klmirqd,
-                                                                                          struct task_struct* old_owner,
-                                                                                          struct task_struct* new_owner)
-{
-        BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread));
-        raw_spin_lock(&gsnedf_lock);
-        if(old_owner != new_owner)
-        {
-                if(old_owner)
-                {
-                        // unreachable?
-                        tsk_rt(old_owner)->cur_klmirqd = NULL;
-                }
-                TRACE_TASK(klmirqd, "giving ownership to %s/%d.\n",
-                                   new_owner->comm, new_owner->pid);
-                tsk_rt(new_owner)->cur_klmirqd = klmirqd;
-        }
-        __decrease_priority_inheritance(klmirqd, NULL);  // kludge to clear out cur prio.
-        __increase_priority_inheritance(klmirqd,
-                        (tsk_rt(new_owner)->inh_task == NULL) ?
-                                new_owner :
-                                tsk_rt(new_owner)->inh_task);
-        raw_spin_unlock(&gsnedf_lock);
-}
-/* called with IRQs off */
-static void decrease_priority_inheritance_klmirqd(struct task_struct* klmirqd,
-                                                                                                   struct task_struct* old_owner,
-                                                                                                   struct task_struct* new_owner)
-{
-        BUG_ON(!(tsk_rt(klmirqd)->is_proxy_thread));
-        raw_spin_lock(&gsnedf_lock);
-    TRACE_TASK(klmirqd, "priority restored\n");
-        __decrease_priority_inheritance(klmirqd, new_owner);
-        tsk_rt(old_owner)->cur_klmirqd = NULL;
-        raw_spin_unlock(&gsnedf_lock);
-}
-#endif
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
@@ -1923,7 +1871,7 @@ static long gsnedf_activate_plugin(void)
 #endif
 #ifdef CONFIG_LITMUS_SOFTIRQD
-    spawn_klmirqd(NULL);
+    init_klmirqd();
 #endif
 #ifdef CONFIG_LITMUS_NVIDIA
@@ -1965,10 +1913,6 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
        .allocate_aff_obs = gsnedf_allocate_affinity_observer,
 #endif
-#ifdef CONFIG_LITMUS_SOFTIRQD
-        .increase_prio_klmirqd = increase_priority_inheritance_klmirqd,
-        .decrease_prio_klmirqd = decrease_priority_inheritance_klmirqd,
-#endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
        .enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet,
        .change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet,
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index cda67e0f6bc8..30c216fd6fdc 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -167,18 +167,6 @@ static int litmus_dummy___decrease_prio(struct task_struct* t, struct task_struc
 }
 #endif
-#ifdef CONFIG_LITMUS_SOFTIRQD
-static void litmus_dummy_increase_prio_klmirqd(struct task_struct* klmirqd,
-                                       struct task_struct* old_owner,
-                                       struct task_struct* new_owner)
-{
-}
-static void litmus_dummy_decrease_prio_klmirqd(struct task_struct* klmirqd,
-                                                struct task_struct* old_owner)
-{
-}
-#endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t)
@@ -263,10 +251,6 @@ struct sched_plugin linux_sched_plugin = {
        .nested_decrease_prio = litmus_dummy_nested_decrease_prio,
        .__compare = litmus_dummy___compare,
 #endif
-#ifdef CONFIG_LITMUS_SOFTIRQD
-        .increase_prio_klmirqd = litmus_dummy_increase_prio_klmirqd,
-        .decrease_prio_klmirqd = litmus_dummy_decrease_prio_klmirqd,
-#endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
        .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet,
        .change_prio_pai_tasklet = litmus_dummy_change_prio_pai_tasklet,
@@ -327,10 +311,6 @@ int register_sched_plugin(struct sched_plugin* plugin)
        CHECK(nested_decrease_prio);
        CHECK(__compare);
 #endif
-#ifdef CONFIG_LITMUS_SOFTIRQD
-        CHECK(increase_prio_klmirqd);
-        CHECK(decrease_prio_klmirqd);
-#endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
        CHECK(enqueue_pai_tasklet);
        CHECK(change_prio_pai_tasklet);
author	Glenn Elliott <gelliott@cs.unc.edu>	2012-12-11 22:01:01 -0500
committer	Glenn Elliott <gelliott@cs.unc.edu>	2012-12-12 14:14:41 -0500
commit	c8483ef0959672310bf4ebb72e1a308b00543f74 (patch)
tree	9cb306009b01c5226178f69172738026431d37f2 /litmus
parent	fbd9574e298157b54c38f82f536e5cea8f766dff (diff)