17 files changed, 261 insertions, 46 deletions
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
index d4db2003ad86..6b3fb8b28745 100644
--- a/include/litmus/gpu_affinity.h
+++ b/include/litmus/gpu_affinity.h
@@ -43,10 +43,7 @@ static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t
                val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
        }
-        // minimum value is 1 (val is 0 if we haven't run with local affinity yet)
+        return ((val > 0) ? val : dist+1);
-        // TODO: pick a better default min-value.  1 is too small.  perhaps
-        // task execution time?
-        return ((val > 0) ? val : 1);
 }
-#endif
-\ No newline at end of file
+#endif
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
index 580728051d4e..97c9577141db 100644
--- a/include/litmus/nvidia_info.h
+++ b/include/litmus/nvidia_info.h
@@ -12,6 +12,7 @@
 #define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS
 int init_nvidia_info(void);
+void shutdown_nvidia_info(void);
 int is_nvidia_func(void* func_addr);
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 4553521146cc..0198884eab86 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -26,6 +26,7 @@ static inline int lt_after_eq(lt_t a, lt_t b)
 typedef enum {
        RT_CLASS_HARD,
        RT_CLASS_SOFT,
+        RT_CLASS_SOFT_W_SLIP,
        RT_CLASS_BEST_EFFORT
 } task_class_t;
@@ -189,8 +190,8 @@ struct rt_param {
        long unsigned int                       held_gpus;  // bitmap of held GPUs.
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-        fp_t    gpu_fb_param_a;
+        fp_t    gpu_fb_param_a[MIG_LAST+1];
-        fp_t    gpu_fb_param_b;
+        fp_t    gpu_fb_param_b[MIG_LAST+1];
        gpu_migration_dist_t    gpu_migration;
        int                             last_gpu;
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
index 232c7588d103..b1b71f6c5f0c 100644
--- a/include/litmus/sched_trace.h
+++ b/include/litmus/sched_trace.h
@@ -10,7 +10,8 @@ struct st_trace_header {
        u8      type;           /* Of what type is this record?  */
        u8      cpu;            /* On which CPU was it recorded? */
        u16     pid;            /* PID of the task.              */
-        u32     job;            /* The job sequence number.      */
+        u32 job:24;             /* The job sequence number.      */
+        u8  extra;
 } __attribute__((packed));
 #define ST_NAME_LEN 16
@@ -136,6 +137,22 @@ struct st_nv_interrupt_end_data {
        u32 serialNumber;
 } __attribute__((packed));
+struct st_prediction_err_data {
+        u64 distance;
+        u64 rel_err;
+} __attribute__((packed));
+struct st_migration_data {
+        u64 observed;
+        u64 estimated;
+} __attribute__((packed));
+struct migration_info {
+        u64 observed;
+        u64 estimated;
+        u8 distance;
+} __attribute__((packed));
 #define DATA(x) struct st_ ## x ## _data x;
 typedef enum {
@@ -160,6 +177,9 @@ typedef enum {
        ST_EFF_PRIO_CHANGE,
        ST_NV_INTERRUPT_BEGIN,
        ST_NV_INTERRUPT_END,
+        ST_PREDICTION_ERR,
+        ST_MIGRATION,
 } st_event_record_type_t;
 struct st_event_record {
@@ -187,6 +207,9 @@ struct st_event_record {
                DATA(effective_priority_change);
                DATA(nv_interrupt_begin);
                DATA(nv_interrupt_end);
+                DATA(prediction_err);
+                DATA(migration);
        } data;
 } __attribute__((packed));
@@ -259,6 +282,19 @@ feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
 feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
                                                                                                unsigned long unused);
+feather_callback void do_sched_trace_prediction_err(unsigned long id,
+                                                                                                          struct task_struct* task,
+                                                                                                          gpu_migration_dist_t* distance,
+                                                                                                          fp_t* rel_err);
+feather_callback void do_sched_trace_migration(unsigned long id,
+                                                                                          struct task_struct* task,
+                                                                                          struct migration_info* mig_info);
 /* returns true if we're tracing an interrupt on current CPU */
 /* int is_interrupt_tracing_active(void); */
@@ -331,6 +367,12 @@ feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
 #define sched_trace_nv_interrupt_end(d) \
        SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d)
+#define sched_trace_prediction_err(t, dist, rel_err) \
+        SCHED_TRACE3(SCHED_TRACE_BASE_ID + 20, do_sched_trace_prediction_err, t, dist, rel_err)
+#define sched_trace_migration(t, mig_info) \
+        SCHED_TRACE2(SCHED_TRACE_BASE_ID + 21, do_sched_trace_migration, t, mig_info)
 #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
 #endif /* __KERNEL__ */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 1c42e08fdfaa..5ce271675662 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -412,12 +412,44 @@ struct tasklet_head
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
+#ifdef CONFIG_LITMUS_NVIDIA
+static int __do_nv_now(struct tasklet_struct* tasklet)
+{
+        int success = 1;
+        if(tasklet_trylock(tasklet)) {
+                if (!atomic_read(&tasklet->count)) {
+                        if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) {    
+                                BUG();
+                        }
+                        tasklet->func(tasklet->data);
+                        tasklet_unlock(tasklet);
+                }
+                else {
+                        success = 0;
+                }
+                tasklet_unlock(tasklet);
+        }
+        else {
+                success = 0;
+        }
+        return success;
+}
+#endif
 void __tasklet_schedule(struct tasklet_struct *t)
 {
 #ifdef CONFIG_LITMUS_NVIDIA
        if(is_nvidia_func(t->func))
        {
+#if 0
+                // do nvidia tasklets right away and return
+                if(__do_nv_now(t))
+                        return;
+#else
                u32 nvidia_device = get_tasklet_nv_device_num(t);       
                //              TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
                //                        __FUNCTION__, nvidia_device,litmus_clock());
@@ -461,7 +493,9 @@ void __tasklet_schedule(struct tasklet_struct *t)
                        }
                }
                unlock_nv_registry(nvidia_device, &flags);
+#endif
        }
 #endif
        ___tasklet_schedule(t);
diff --git a/litmus/Kconfig b/litmus/Kconfig
index a34440f3d8bc..8c156e4da528 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -157,7 +157,7 @@ config SCHED_TASK_TRACE
 config SCHED_TASK_TRACE_SHIFT
       int "Buffer size for sched_trace_xxx() events"
       depends on SCHED_TASK_TRACE
-       range 8 13
+       range 8 15
       default 9
       help
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
index 70a86bdd9aec..9762be1a085e 100644
--- a/litmus/gpu_affinity.c
+++ b/litmus/gpu_affinity.c
@@ -5,25 +5,32 @@
 #include <litmus/litmus.h>
 #include <litmus/gpu_affinity.h>
+#include <litmus/sched_trace.h>
 #define OBSERVATION_CAP 2*1e9
-static void update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
+static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
 {
+        fp_t relative_err;
        fp_t err, new;
        fp_t actual = _integer_to_fp(observed);
        err = _sub(actual, fb->est);
        new = _add(_mul(a, err), _mul(b, fb->accum_err));
+        relative_err = _div(err, actual);
        fb->est = new;
        fb->accum_err = _add(fb->accum_err, err);
+        return relative_err;
 }
 void update_gpu_estimate(struct task_struct *t, lt_t observed)
 {
        feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
-        WARN_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
+        BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
        if(unlikely(fb->est.val == 0)) {
                // kludge-- cap observed values to prevent whacky estimations.
@@ -40,18 +47,29 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
                fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
        }
        else {
-                update_estimate(fb,
+                fp_t rel_err = update_estimate(fb,
-                                                tsk_rt(t)->gpu_fb_param_a,
+                                                                           tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
-                                                tsk_rt(t)->gpu_fb_param_b,
+                                                                           tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
-                                                observed);
+                                                                           observed);
-                if(_fp_to_integer(fb->est) <= 0) {
+                if(unlikely(_fp_to_integer(fb->est) <= 0)) {
-                        // TODO: talk to Jonathan about how well this works.
-                        // Maybe we should average the observed and est instead?
                        TRACE_TASK(t, "Invalid estimate. Patching.\n");
                        fb->est = _integer_to_fp(observed);
                        fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
                }
+                else {
+//                      struct migration_info mig_info;
+                        sched_trace_prediction_err(t,
+                                                                           &(tsk_rt(t)->gpu_migration),
+                                                                           &rel_err);
+//                      mig_info.observed = observed;
+//                      mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
+//                      mig_info.distance = tsk_rt(t)->gpu_migration;
+//
+//                      sched_trace_migration(t, &mig_info);
+                }
        }
        TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 023443014d4b..83b708ab85cb 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1346,6 +1346,10 @@ int ikglp_unlock(struct litmus_lock* l)
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
                if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
                        fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+                        if(fq_of_new_on_fq->count == 0) {
+                                // ignore it?
+//                              fq_of_new_on_fq = fq;
+                        }
                }
                else {
                        fq_of_new_on_fq = fq;
@@ -1383,6 +1387,10 @@ int ikglp_unlock(struct litmus_lock* l)
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
                if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
                        fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+                        if(fq_of_new_on_fq->count == 0) {
+                                // ignore it?
+//                              fq_of_new_on_fq = fq;
+                        }
                }
                else {
                        fq_of_new_on_fq = fq;
@@ -1409,6 +1417,10 @@ int ikglp_unlock(struct litmus_lock* l)
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
                if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
                        fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+                        if(fq_of_new_on_fq->count == 0) {
+                                // ignore it?
+//                              fq_of_new_on_fq = fq;
+                        }
                }
                else {
                        fq_of_new_on_fq = fq;
@@ -1569,7 +1581,7 @@ int ikglp_unlock(struct litmus_lock* l)
                }
        }
+wake_kludge:
        if(waitqueue_active(&fq->wait))
        {
                wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
@@ -1672,6 +1684,16 @@ int ikglp_unlock(struct litmus_lock* l)
                // wake up the new resource holder!
                wake_up_process(next);
        }
+        if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) {
+                // The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?)
+                // Wake up the new guy too.
+                BUG_ON(fq_of_new_on_fq->owner != NULL);
+                fq = fq_of_new_on_fq;
+                fq_of_new_on_fq = NULL;
+                goto wake_kludge;
+        }
        unlock_fine_irqrestore(&sem->lock, flags);
        unlock_global_irqrestore(dgl_lock, flags);
@@ -1917,7 +1939,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
        if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
                TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
                                  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-                return(NULL);
+//              return(NULL);
        }
        ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
@@ -2600,8 +2622,8 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
        tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
-        TRACE_CUR("%s/%d acquired gpu %d.  migration type = %d\n",
+        TRACE_CUR("%s/%d acquired gpu %d (prev = %d).  migration type = %d\n",
-                          t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
+                          t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration);
        // count the number or resource holders
        ++(*(aff->q_info[replica].nr_cur_users));
@@ -2626,8 +2648,6 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
        est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
-        tsk_rt(t)->last_gpu = gpu;
        // count the number or resource holders
        --(*(aff->q_info[replica].nr_cur_users));
@@ -2636,12 +2656,15 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
        // update estimates
        update_gpu_estimate(t, get_gpu_time(t));
-        TRACE_CUR("%s/%d freed gpu %d.  actual time was %llu.  "
+        TRACE_CUR("%s/%d freed gpu %d (prev = %d).  mig type = %d.  actual time was %llu.  "
                          "estimated was %llu.  diff is %d\n",
-                          t->comm, t->pid, gpu,
+                          t->comm, t->pid, gpu, tsk_rt(t)->last_gpu,
+                          tsk_rt(t)->gpu_migration,
                          get_gpu_time(t),
                          est_time,
                          (long long)get_gpu_time(t) - (long long)est_time);
+        tsk_rt(t)->last_gpu = gpu;
 }
 struct ikglp_affinity_ops gpu_ikglp_affinity =
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 36e314625d86..1d97462cc128 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -10,8 +10,21 @@ void prepare_for_next_period(struct task_struct *t)
 {
        BUG_ON(!t);
        /* prepare next release */
-        t->rt_param.job_params.release   = t->rt_param.job_params.deadline;
-        t->rt_param.job_params.deadline += get_rt_period(t);
+        if(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT_W_SLIP) {
+                /* allow the release point to slip if we've passed our deadline. */
+                lt_t now = litmus_clock();
+                t->rt_param.job_params.release =
+                        (t->rt_param.job_params.deadline < now) ?
+                                now : t->rt_param.job_params.deadline;
+                t->rt_param.job_params.deadline =
+                        t->rt_param.job_params.release + get_rt_period(t);
+        }
+        else {
+                t->rt_param.job_params.release   = t->rt_param.job_params.deadline;
+                t->rt_param.job_params.deadline += get_rt_period(t);
+        }
        t->rt_param.job_params.exec_time = 0;
        /* update job sequence number */
        t->rt_param.job_params.job_no++;
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index 0b64977789a6..bff857ed8d4e 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -590,7 +590,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
        if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
                TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
                                  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-                return(NULL);
+//              return(NULL);
        }
        kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 5b301c418b96..d1f836c8af6e 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -318,11 +318,21 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
 void init_gpu_affinity_state(struct task_struct* p)
 {
        // under-damped
-        p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
+        //p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
-        p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
+        //p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
-        // critically-damped
-        //      p->rt_param.gpu_fb_param_a = _frac(102, 1000);
+        // emperical;
-        //      p->rt_param.gpu_fb_param_b = _frac(303, 1000);
+        p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
+        p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
+        p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000);
+        p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000);
+        p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000);
+        p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000);
+        p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
+        p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
        p->rt_param.gpu_migration = MIG_NONE;
        p->rt_param.last_gpu = -1;
diff --git a/litmus/locking.c b/litmus/locking.c
index cb11c04ed0d4..718a5a3281d7 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -349,10 +349,10 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
 all_acquired:
        // FOR SANITY CHECK FOR TESTING
-        for(i = 0; i < dgl_wait->size; ++i) {
+//      for(i = 0; i < dgl_wait->size; ++i) {
-                struct litmus_lock *l = dgl_wait->locks[i];
+//              struct litmus_lock *l = dgl_wait->locks[i];
-                BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
+//              BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
-        }
+//      }
        TRACE_CUR("Acquired entire DGL\n");
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 3d8c50882066..4b86a50d3bd1 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -265,6 +265,11 @@ int init_nvidia_info(void)
        }
 }
+void shutdown_nvidia_info(void)
+{
+        nvidia_mod = NULL;
+        mb();
+}
 /* works with pointers to static data inside the module too. */
 int is_nvidia_func(void* func_addr)
@@ -319,14 +324,11 @@ u32 get_work_nv_device_num(const struct work_struct *t)
 }
-#define MAX_NR_OWNERS 3
 typedef struct {
        raw_spinlock_t  lock;
        int     nr_owners;
        struct task_struct* max_prio_owner;
-        struct task_struct*     owners[MAX_NR_OWNERS];
+        struct task_struct*     owners[NV_MAX_SIMULT_USERS];
 }nv_device_registry_t;
 static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
@@ -431,6 +433,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
                return ret;  // assume already registered.
        }
        raw_spin_lock_irqsave(&reg->lock, flags);
        if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
@@ -461,7 +464,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
        else
        {
                TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
-                ret = -EBUSY;
+                //ret = -EBUSY;
        }
        raw_spin_unlock_irqrestore(&reg->lock, flags);
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
index 965164c43537..75ed87c5ed48 100644
--- a/litmus/rsm_lock.c
+++ b/litmus/rsm_lock.c
@@ -502,6 +502,13 @@ int rsm_mutex_unlock(struct litmus_lock* l)
                        tsk_rt(next)->blocked_lock = NULL;
                        mb();
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+                        // re-enable tracking
+                        if(tsk_rt(next)->held_gpus) {
+                                tsk_rt(next)->suspend_gpu_tracker_on_block = 0;
+                        }
+#endif
                        wake_up_process(next);
                }
                else {
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 99f7620925ba..be14dbec6ed2 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -733,11 +733,11 @@ static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
        unsigned long flags;
        cedf_domain_t *cluster;
        struct task_struct *probe;
-        
        // identify the cluster by the assignment of these tasks.  one should
        // be non-NULL.
        probe = (old_prio) ? old_prio : new_prio;
-        
        if(probe) {
                cluster = task_cpu_cluster(probe);
@@ -838,8 +838,13 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
        if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
                if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
+                        // don't track preemptions or locking protocol suspensions.
+                        TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
                        stop_gpu_tracker(entry->scheduled);
                }
+                else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
+                        TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
+                }
        }
 #endif
@@ -1596,6 +1601,10 @@ static void cleanup_cedf(void)
 {
        int i;
+#ifdef CONFIG_LITMUS_NVIDIA
+        shutdown_nvidia_info();
+#endif
        if (clusters_allocated) {
                for (i = 0; i < num_clusters; i++) {
                        kfree(cedf[i].cpus);
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 2433297b7482..245e41c25a5d 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -13,6 +13,10 @@
 #include <litmus/preempt.h>
 #include <litmus/jobs.h>
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/nvidia_info.h>
+#endif
 /*
 * Generic function to trigger preemption on either local or remote cpu
 * from scheduler plugins. The key feature is that this function is
@@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void)
 static long litmus_dummy_activate_plugin(void)
 {
+#ifdef CONFIG_LITMUS_NVIDIA
+        shutdown_nvidia_info();
+#endif
        return 0;
 }
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 2bd3a787611b..f7f575346b54 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -247,6 +247,53 @@ feather_callback void do_sched_trace_action(unsigned long id,
 }
+feather_callback void do_sched_trace_prediction_err(unsigned long id,
+                                                                                                        unsigned long _task,
+                                                                                                        unsigned long _distance,
+                                                                                                        unsigned long _rel_err)
+{
+        struct task_struct *t = (struct task_struct*) _task;
+        struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t);
+        if (rec) {
+                gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance;
+                fp_t* rel_err = (fp_t*) _rel_err;
+                rec->data.prediction_err.distance = *distance;
+                rec->data.prediction_err.rel_err = rel_err->val;
+                put_record(rec);
+        }
+}
+feather_callback void do_sched_trace_migration(unsigned long id,
+                                                                                                        unsigned long _task,
+                                                                                                        unsigned long _mig_info)
+{
+        struct task_struct *t = (struct task_struct*) _task;
+        struct st_event_record *rec = get_record(ST_MIGRATION, t);
+        if (rec) {
+                struct migration_info* mig_info = (struct migration_info*) _mig_info;
+                rec->hdr.extra = mig_info->distance;
+                rec->data.migration.observed = mig_info->observed;
+                rec->data.migration.estimated = mig_info->estimated;
+                put_record(rec);
+        }
+}
 feather_callback void do_sched_trace_tasklet_release(unsigned long id,
                                                                                                   unsigned long _owner)
 {
@@ -457,3 +504,6 @@ EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);