12 files changed, 185 insertions, 45 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig
index a34440f3d8bc..03cc92c50eb9 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -157,7 +157,7 @@ config SCHED_TASK_TRACE
 config SCHED_TASK_TRACE_SHIFT
       int "Buffer size for sched_trace_xxx() events"
       depends on SCHED_TASK_TRACE
-       range 8 13
+       range 8 15
       default 9
       help
@@ -253,7 +253,7 @@ endmenu
 menu "Interrupt Handling"
-choice 
+choice
        prompt "Scheduling of interrupt bottom-halves in Litmus."
        default LITMUS_SOFTIRQD_NONE
        depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ
@@ -282,11 +282,11 @@ config LITMUS_PAI_SOFTIRQD
          scheduling points.  Trades context switch overhead
          at the cost of non-preemptive durations of bottom half
          processing.
-                 
-          G-EDF/RM, C-EDF/RM ONLY for now!       
+          G-EDF/RM, C-EDF/RM ONLY for now!
-                 
-endchoice          
+endchoice
-           
 config NR_LITMUS_SOFTIRQD
           int "Number of klitirqd."
@@ -344,7 +344,7 @@ choice
          depends on LITMUS_NVIDIA
          help
                Select the version of CUDA/driver to support.
-        
 config CUDA_4_0
          bool "CUDA 4.0"
          depends on LITMUS_NVIDIA
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
index 70a86bdd9aec..9762be1a085e 100644
--- a/litmus/gpu_affinity.c
+++ b/litmus/gpu_affinity.c
@@ -5,25 +5,32 @@
 #include <litmus/litmus.h>
 #include <litmus/gpu_affinity.h>
+#include <litmus/sched_trace.h>
 #define OBSERVATION_CAP 2*1e9
-static void update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
+static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
 {
+        fp_t relative_err;
        fp_t err, new;
        fp_t actual = _integer_to_fp(observed);
        err = _sub(actual, fb->est);
        new = _add(_mul(a, err), _mul(b, fb->accum_err));
+        relative_err = _div(err, actual);
        fb->est = new;
        fb->accum_err = _add(fb->accum_err, err);
+        return relative_err;
 }
 void update_gpu_estimate(struct task_struct *t, lt_t observed)
 {
        feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
-        WARN_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
+        BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
        if(unlikely(fb->est.val == 0)) {
                // kludge-- cap observed values to prevent whacky estimations.
@@ -40,18 +47,29 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
                fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
        }
        else {
-                update_estimate(fb,
+                fp_t rel_err = update_estimate(fb,
-                                                tsk_rt(t)->gpu_fb_param_a,
+                                                                           tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
-                                                tsk_rt(t)->gpu_fb_param_b,
+                                                                           tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
-                                                observed);
+                                                                           observed);
-                if(_fp_to_integer(fb->est) <= 0) {
+                if(unlikely(_fp_to_integer(fb->est) <= 0)) {
-                        // TODO: talk to Jonathan about how well this works.
-                        // Maybe we should average the observed and est instead?
                        TRACE_TASK(t, "Invalid estimate. Patching.\n");
                        fb->est = _integer_to_fp(observed);
                        fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
                }
+                else {
+//                      struct migration_info mig_info;
+                        sched_trace_prediction_err(t,
+                                                                           &(tsk_rt(t)->gpu_migration),
+                                                                           &rel_err);
+//                      mig_info.observed = observed;
+//                      mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
+//                      mig_info.distance = tsk_rt(t)->gpu_migration;
+//
+//                      sched_trace_migration(t, &mig_info);
+                }
        }
        TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 023443014d4b..83b708ab85cb 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1346,6 +1346,10 @@ int ikglp_unlock(struct litmus_lock* l)
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
                if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
                        fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+                        if(fq_of_new_on_fq->count == 0) {
+                                // ignore it?
+//                              fq_of_new_on_fq = fq;
+                        }
                }
                else {
                        fq_of_new_on_fq = fq;
@@ -1383,6 +1387,10 @@ int ikglp_unlock(struct litmus_lock* l)
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
                if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
                        fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+                        if(fq_of_new_on_fq->count == 0) {
+                                // ignore it?
+//                              fq_of_new_on_fq = fq;
+                        }
                }
                else {
                        fq_of_new_on_fq = fq;
@@ -1409,6 +1417,10 @@ int ikglp_unlock(struct litmus_lock* l)
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
                if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
                        fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+                        if(fq_of_new_on_fq->count == 0) {
+                                // ignore it?
+//                              fq_of_new_on_fq = fq;
+                        }
                }
                else {
                        fq_of_new_on_fq = fq;
@@ -1569,7 +1581,7 @@ int ikglp_unlock(struct litmus_lock* l)
                }
        }
+wake_kludge:
        if(waitqueue_active(&fq->wait))
        {
                wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
@@ -1672,6 +1684,16 @@ int ikglp_unlock(struct litmus_lock* l)
                // wake up the new resource holder!
                wake_up_process(next);
        }
+        if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) {
+                // The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?)
+                // Wake up the new guy too.
+                BUG_ON(fq_of_new_on_fq->owner != NULL);
+                fq = fq_of_new_on_fq;
+                fq_of_new_on_fq = NULL;
+                goto wake_kludge;
+        }
        unlock_fine_irqrestore(&sem->lock, flags);
        unlock_global_irqrestore(dgl_lock, flags);
@@ -1917,7 +1939,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
        if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
                TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
                                  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-                return(NULL);
+//              return(NULL);
        }
        ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
@@ -2600,8 +2622,8 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
        tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
-        TRACE_CUR("%s/%d acquired gpu %d.  migration type = %d\n",
+        TRACE_CUR("%s/%d acquired gpu %d (prev = %d).  migration type = %d\n",
-                          t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
+                          t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration);
        // count the number or resource holders
        ++(*(aff->q_info[replica].nr_cur_users));
@@ -2626,8 +2648,6 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
        est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
-        tsk_rt(t)->last_gpu = gpu;
        // count the number or resource holders
        --(*(aff->q_info[replica].nr_cur_users));
@@ -2636,12 +2656,15 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
        // update estimates
        update_gpu_estimate(t, get_gpu_time(t));
-        TRACE_CUR("%s/%d freed gpu %d.  actual time was %llu.  "
+        TRACE_CUR("%s/%d freed gpu %d (prev = %d).  mig type = %d.  actual time was %llu.  "
                          "estimated was %llu.  diff is %d\n",
-                          t->comm, t->pid, gpu,
+                          t->comm, t->pid, gpu, tsk_rt(t)->last_gpu,
+                          tsk_rt(t)->gpu_migration,
                          get_gpu_time(t),
                          est_time,
                          (long long)get_gpu_time(t) - (long long)est_time);
+        tsk_rt(t)->last_gpu = gpu;
 }
 struct ikglp_affinity_ops gpu_ikglp_affinity =
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 36e314625d86..1d97462cc128 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -10,8 +10,21 @@ void prepare_for_next_period(struct task_struct *t)
 {
        BUG_ON(!t);
        /* prepare next release */
-        t->rt_param.job_params.release   = t->rt_param.job_params.deadline;
-        t->rt_param.job_params.deadline += get_rt_period(t);
+        if(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT_W_SLIP) {
+                /* allow the release point to slip if we've passed our deadline. */
+                lt_t now = litmus_clock();
+                t->rt_param.job_params.release =
+                        (t->rt_param.job_params.deadline < now) ?
+                                now : t->rt_param.job_params.deadline;
+                t->rt_param.job_params.deadline =
+                        t->rt_param.job_params.release + get_rt_period(t);
+        }
+        else {
+                t->rt_param.job_params.release   = t->rt_param.job_params.deadline;
+                t->rt_param.job_params.deadline += get_rt_period(t);
+        }
        t->rt_param.job_params.exec_time = 0;
        /* update job sequence number */
        t->rt_param.job_params.job_no++;
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index 0b64977789a6..bff857ed8d4e 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -590,7 +590,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
        if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
                TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
                                  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-                return(NULL);
+//              return(NULL);
        }
        kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 5b301c418b96..d1f836c8af6e 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -318,11 +318,21 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
 void init_gpu_affinity_state(struct task_struct* p)
 {
        // under-damped
-        p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
+        //p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
-        p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
+        //p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
-        // critically-damped
-        //      p->rt_param.gpu_fb_param_a = _frac(102, 1000);
+        // emperical;
-        //      p->rt_param.gpu_fb_param_b = _frac(303, 1000);
+        p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
+        p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
+        p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000);
+        p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000);
+        p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000);
+        p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000);
+        p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
+        p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
        p->rt_param.gpu_migration = MIG_NONE;
        p->rt_param.last_gpu = -1;
diff --git a/litmus/locking.c b/litmus/locking.c
index cb11c04ed0d4..718a5a3281d7 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -349,10 +349,10 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
 all_acquired:
        // FOR SANITY CHECK FOR TESTING
-        for(i = 0; i < dgl_wait->size; ++i) {
+//      for(i = 0; i < dgl_wait->size; ++i) {
-                struct litmus_lock *l = dgl_wait->locks[i];
+//              struct litmus_lock *l = dgl_wait->locks[i];
-                BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
+//              BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
-        }
+//      }
        TRACE_CUR("Acquired entire DGL\n");
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 3d8c50882066..4b86a50d3bd1 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -265,6 +265,11 @@ int init_nvidia_info(void)
        }
 }
+void shutdown_nvidia_info(void)
+{
+        nvidia_mod = NULL;
+        mb();
+}
 /* works with pointers to static data inside the module too. */
 int is_nvidia_func(void* func_addr)
@@ -319,14 +324,11 @@ u32 get_work_nv_device_num(const struct work_struct *t)
 }
-#define MAX_NR_OWNERS 3
 typedef struct {
        raw_spinlock_t  lock;
        int     nr_owners;
        struct task_struct* max_prio_owner;
-        struct task_struct*     owners[MAX_NR_OWNERS];
+        struct task_struct*     owners[NV_MAX_SIMULT_USERS];
 }nv_device_registry_t;
 static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
@@ -431,6 +433,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
                return ret;  // assume already registered.
        }
        raw_spin_lock_irqsave(&reg->lock, flags);
        if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
@@ -461,7 +464,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
        else
        {
                TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
-                ret = -EBUSY;
+                //ret = -EBUSY;
        }
        raw_spin_unlock_irqrestore(&reg->lock, flags);
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
index 965164c43537..75ed87c5ed48 100644
--- a/litmus/rsm_lock.c
+++ b/litmus/rsm_lock.c
@@ -502,6 +502,13 @@ int rsm_mutex_unlock(struct litmus_lock* l)
                        tsk_rt(next)->blocked_lock = NULL;
                        mb();
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+                        // re-enable tracking
+                        if(tsk_rt(next)->held_gpus) {
+                                tsk_rt(next)->suspend_gpu_tracker_on_block = 0;
+                        }
+#endif
                        wake_up_process(next);
                }
                else {
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 99f7620925ba..be14dbec6ed2 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -733,11 +733,11 @@ static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
        unsigned long flags;
        cedf_domain_t *cluster;
        struct task_struct *probe;
-        
        // identify the cluster by the assignment of these tasks.  one should
        // be non-NULL.
        probe = (old_prio) ? old_prio : new_prio;
-        
        if(probe) {
                cluster = task_cpu_cluster(probe);
@@ -838,8 +838,13 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
        if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
                if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
+                        // don't track preemptions or locking protocol suspensions.
+                        TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
                        stop_gpu_tracker(entry->scheduled);
                }
+                else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
+                        TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
+                }
        }
 #endif
@@ -1596,6 +1601,10 @@ static void cleanup_cedf(void)
 {
        int i;
+#ifdef CONFIG_LITMUS_NVIDIA
+        shutdown_nvidia_info();
+#endif
        if (clusters_allocated) {
                for (i = 0; i < num_clusters; i++) {
                        kfree(cedf[i].cpus);
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 2433297b7482..245e41c25a5d 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -13,6 +13,10 @@
 #include <litmus/preempt.h>
 #include <litmus/jobs.h>
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/nvidia_info.h>
+#endif
 /*
 * Generic function to trigger preemption on either local or remote cpu
 * from scheduler plugins. The key feature is that this function is
@@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void)
 static long litmus_dummy_activate_plugin(void)
 {
+#ifdef CONFIG_LITMUS_NVIDIA
+        shutdown_nvidia_info();
+#endif
        return 0;
 }
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 2bd3a787611b..f7f575346b54 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -247,6 +247,53 @@ feather_callback void do_sched_trace_action(unsigned long id,
 }
+feather_callback void do_sched_trace_prediction_err(unsigned long id,
+                                                                                                        unsigned long _task,
+                                                                                                        unsigned long _distance,
+                                                                                                        unsigned long _rel_err)
+{
+        struct task_struct *t = (struct task_struct*) _task;
+        struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t);
+        if (rec) {
+                gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance;
+                fp_t* rel_err = (fp_t*) _rel_err;
+                rec->data.prediction_err.distance = *distance;
+                rec->data.prediction_err.rel_err = rel_err->val;
+                put_record(rec);
+        }
+}
+feather_callback void do_sched_trace_migration(unsigned long id,
+                                                                                                        unsigned long _task,
+                                                                                                        unsigned long _mig_info)
+{
+        struct task_struct *t = (struct task_struct*) _task;
+        struct st_event_record *rec = get_record(ST_MIGRATION, t);
+        if (rec) {
+                struct migration_info* mig_info = (struct migration_info*) _mig_info;
+                rec->hdr.extra = mig_info->distance;
+                rec->data.migration.observed = mig_info->observed;
+                rec->data.migration.estimated = mig_info->estimated;
+                put_record(rec);
+        }
+}
 feather_callback void do_sched_trace_tasklet_release(unsigned long id,
                                                                                                   unsigned long _owner)
 {
@@ -457,3 +504,6 @@ EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);