From 44326648c2ea81b9a32619644fe9c665ed0d9e0b Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Mon, 14 May 2012 16:51:05 -0400
Subject: Final GPUSync implementation.

---
 include/linux/interrupt.h     |  6 ++--
 include/litmus/gpu_affinity.h |  7 ++---
 include/litmus/nvidia_info.h  |  1 +
 include/litmus/rt_param.h     |  5 ++--
 include/litmus/sched_trace.h  | 44 ++++++++++++++++++++++++++++-
 kernel/mutex.c                | 48 +++++++++++++++----------------
 kernel/sched.c                |  4 +--
 kernel/softirq.c              | 66 ++++++++++++++++++++++++++++++++-----------
 litmus/Kconfig                | 16 +++++------
 litmus/gpu_affinity.c         | 38 ++++++++++++++++++-------
 litmus/ikglp_lock.c           | 39 +++++++++++++++++++------
 litmus/jobs.c                 | 17 +++++++++--
 litmus/kfmlp_lock.c           |  2 +-
 litmus/litmus.c               | 20 +++++++++----
 litmus/locking.c              |  8 +++---
 litmus/nvidia_info.c          | 13 +++++----
 litmus/rsm_lock.c             |  7 +++++
 litmus/sched_cedf.c           | 13 +++++++--
 litmus/sched_plugin.c         |  7 +++++
 litmus/sched_task_trace.c     | 50 ++++++++++++++++++++++++++++++++
 20 files changed, 313 insertions(+), 98 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 57a7bc8807be..8fb3dad55f19 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -528,7 +528,7 @@ static inline int tasklet_trylock(struct tasklet_struct *t)
 
 static inline void tasklet_unlock(struct tasklet_struct *t)
 {
-	smp_mb__before_clear_bit(); 
+	smp_mb__before_clear_bit();
 	clear_bit(TASKLET_STATE_RUN, &(t)->state);
 }
 
@@ -590,7 +590,7 @@ static inline void tasklet_disable(struct tasklet_struct *t)
 }
 
 static inline void tasklet_enable(struct tasklet_struct *t)
-{	
+{
 	smp_mb__before_atomic_dec();
 	atomic_dec(&t->count);
 }
@@ -659,7 +659,7 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer)
  * if more than one irq occurred.
  */
 
-#if defined(CONFIG_GENERIC_HARDIRQS) && !defined(CONFIG_GENERIC_IRQ_PROBE) 
+#if defined(CONFIG_GENERIC_HARDIRQS) && !defined(CONFIG_GENERIC_IRQ_PROBE)
 static inline unsigned long probe_irq_on(void)
 {
 	return 0;
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
index d4db2003ad86..6b3fb8b28745 100644
--- a/include/litmus/gpu_affinity.h
+++ b/include/litmus/gpu_affinity.h
@@ -43,10 +43,7 @@ static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t
 		val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
 	}
 
-	// minimum value is 1 (val is 0 if we haven't run with local affinity yet)
-	// TODO: pick a better default min-value.  1 is too small.  perhaps
-	// task execution time?
-	return ((val > 0) ? val : 1);
+	return ((val > 0) ? val : dist+1);
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
index 580728051d4e..97c9577141db 100644
--- a/include/litmus/nvidia_info.h
+++ b/include/litmus/nvidia_info.h
@@ -12,6 +12,7 @@
 #define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS
 
 int init_nvidia_info(void);
+void shutdown_nvidia_info(void);
 
 int is_nvidia_func(void* func_addr);
 
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 4553521146cc..0198884eab86 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -26,6 +26,7 @@ static inline int lt_after_eq(lt_t a, lt_t b)
 typedef enum {
 	RT_CLASS_HARD,
 	RT_CLASS_SOFT,
+	RT_CLASS_SOFT_W_SLIP,
 	RT_CLASS_BEST_EFFORT
 } task_class_t;
 
@@ -189,8 +190,8 @@ struct rt_param {
 	long unsigned int			held_gpus;  // bitmap of held GPUs.
 
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-	fp_t	gpu_fb_param_a;
-	fp_t	gpu_fb_param_b;
+	fp_t	gpu_fb_param_a[MIG_LAST+1];
+	fp_t	gpu_fb_param_b[MIG_LAST+1];
 
 	gpu_migration_dist_t	gpu_migration;
 	int				last_gpu;
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
index 232c7588d103..b1b71f6c5f0c 100644
--- a/include/litmus/sched_trace.h
+++ b/include/litmus/sched_trace.h
@@ -10,7 +10,8 @@ struct st_trace_header {
 	u8	type;		/* Of what type is this record?  */
 	u8	cpu;		/* On which CPU was it recorded? */
 	u16	pid;		/* PID of the task.              */
-	u32	job;		/* The job sequence number.      */
+	u32 job:24;		/* The job sequence number.      */
+	u8  extra;
 } __attribute__((packed));
 
 #define ST_NAME_LEN 16
@@ -136,6 +137,22 @@ struct st_nv_interrupt_end_data {
 	u32 serialNumber;
 } __attribute__((packed));
 
+struct st_prediction_err_data {
+	u64 distance;
+	u64 rel_err;
+} __attribute__((packed));
+
+struct st_migration_data {
+	u64 observed;
+	u64 estimated;
+} __attribute__((packed));
+
+struct migration_info {
+	u64 observed;
+	u64 estimated;
+	u8 distance;
+} __attribute__((packed));
+
 #define DATA(x) struct st_ ## x ## _data x;
 
 typedef enum {
@@ -160,6 +177,9 @@ typedef enum {
 	ST_EFF_PRIO_CHANGE,
 	ST_NV_INTERRUPT_BEGIN,
 	ST_NV_INTERRUPT_END,
+
+	ST_PREDICTION_ERR,
+	ST_MIGRATION,
 } st_event_record_type_t;
 
 struct st_event_record {
@@ -187,6 +207,9 @@ struct st_event_record {
 		DATA(effective_priority_change);
 		DATA(nv_interrupt_begin);
 		DATA(nv_interrupt_end);
+
+		DATA(prediction_err);
+		DATA(migration);
 	} data;
 } __attribute__((packed));
 
@@ -259,6 +282,19 @@ feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
 feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
 												unsigned long unused);
 
+feather_callback void do_sched_trace_prediction_err(unsigned long id,
+													  struct task_struct* task,
+													  gpu_migration_dist_t* distance,
+													  fp_t* rel_err);
+
+
+
+
+
+feather_callback void do_sched_trace_migration(unsigned long id,
+											  struct task_struct* task,
+											  struct migration_info* mig_info);
+
 
 /* returns true if we're tracing an interrupt on current CPU */
 /* int is_interrupt_tracing_active(void); */
@@ -331,6 +367,12 @@ feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
 #define sched_trace_nv_interrupt_end(d) \
 	SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d)
 
+#define sched_trace_prediction_err(t, dist, rel_err) \
+	SCHED_TRACE3(SCHED_TRACE_BASE_ID + 20, do_sched_trace_prediction_err, t, dist, rel_err)
+
+#define sched_trace_migration(t, mig_info) \
+	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 21, do_sched_trace_migration, t, mig_info)
+
 #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
 
 #endif /* __KERNEL__ */
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 2f363b9bfc1f..96bcecd385d3 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -511,12 +511,12 @@ void mutex_lock_sfx(struct mutex *lock,
 	struct task_struct *task = current;
 	struct mutex_waiter waiter;
 	unsigned long flags;
-	
+
 	preempt_disable();
 	mutex_acquire(&lock->dep_map, subclass, 0, ip);
 
 	spin_lock_mutex(&lock->wait_lock, flags);
-	
+
 	if(pre)
 	{
 		if(unlikely(pre(pre_arg)))
@@ -530,16 +530,16 @@ void mutex_lock_sfx(struct mutex *lock,
 
 	debug_mutex_lock_common(lock, &waiter);
 	debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
-	
+
 	/* add waiting tasks to the end of the waitqueue (FIFO): */
 	list_add_tail(&waiter.list, &lock->wait_list);
 	waiter.task = task;
-	
+
 	if (atomic_xchg(&lock->count, -1) == 1)
 		goto done;
-	
+
 	lock_contended(&lock->dep_map, ip);
-	
+
 	for (;;) {
 		/*
 		 * Lets try to take the lock again - this is needed even if
@@ -552,9 +552,9 @@ void mutex_lock_sfx(struct mutex *lock,
 		 */
 		if (atomic_xchg(&lock->count, -1) == 1)
 			break;
-		
+
 		__set_task_state(task, state);
-		
+
 		/* didnt get the lock, go to sleep: */
 		spin_unlock_mutex(&lock->wait_lock, flags);
 		preempt_enable_no_resched();
@@ -562,22 +562,22 @@ void mutex_lock_sfx(struct mutex *lock,
 		preempt_disable();
 		spin_lock_mutex(&lock->wait_lock, flags);
 	}
-	
+
 done:
 	lock_acquired(&lock->dep_map, ip);
 	/* got the lock - rejoice! */
 	mutex_remove_waiter(lock, &waiter, current_thread_info());
 	mutex_set_owner(lock);
-	
+
 	/* set it to 0 if there are no waiters left: */
 	if (likely(list_empty(&lock->wait_list)))
 		atomic_set(&lock->count, 0);
-	
+
 	if(post)
-		post(post_arg);	
-	
+		post(post_arg);
+
 	spin_unlock_mutex(&lock->wait_lock, flags);
-	
+
 	debug_mutex_free_waiter(&waiter);
 	preempt_enable();
 }
@@ -588,16 +588,16 @@ void mutex_unlock_sfx(struct mutex *lock,
 					side_effect_t post, unsigned long post_arg)
 {
 	unsigned long flags;
-	
+
 	spin_lock_mutex(&lock->wait_lock, flags);
-	
+
 	if(pre)
 		pre(pre_arg);
-	
+
 	//mutex_release(&lock->dep_map, nested, _RET_IP_);
 	mutex_release(&lock->dep_map, 1, _RET_IP_);
 	debug_mutex_unlock(lock);
-	
+
 	/*
 	 * some architectures leave the lock unlocked in the fastpath failure
 	 * case, others need to leave it locked. In the later case we have to
@@ -605,21 +605,21 @@ void mutex_unlock_sfx(struct mutex *lock,
 	 */
 	if (__mutex_slowpath_needs_to_unlock())
 		atomic_set(&lock->count, 1);
-	
+
 	if (!list_empty(&lock->wait_list)) {
 		/* get the first entry from the wait-list: */
 		struct mutex_waiter *waiter =
 		list_entry(lock->wait_list.next,
 				   struct mutex_waiter, list);
-		
+
 		debug_mutex_wake_waiter(lock, waiter);
-		
+
 		wake_up_process(waiter->task);
 	}
-	
+
 	if(post)
 		post(post_arg);
-	
-	spin_unlock_mutex(&lock->wait_lock, flags);	
+
+	spin_unlock_mutex(&lock->wait_lock, flags);
 }
 EXPORT_SYMBOL(mutex_unlock_sfx);
diff --git a/kernel/sched.c b/kernel/sched.c
index f3d9a69a3777..2f990b4b24f9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4430,8 +4430,8 @@ litmus_need_resched_nonpreemptible:
 
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 	litmus->run_tasklets(prev);
-#endif	
-	
+#endif
+
 	srp_ceiling_block();
 }
 EXPORT_SYMBOL(schedule);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 1c42e08fdfaa..4d7b1a3e4d01 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -216,7 +216,7 @@ EXPORT_SYMBOL(local_bh_enable_ip);
 
 asmlinkage void __do_softirq(void)
 {
-    struct softirq_action *h; 
+    struct softirq_action *h;
     __u32 pending;
     int max_restart = MAX_SOFTIRQ_RESTART;
     int cpu;
@@ -254,10 +254,10 @@ restart:
                        softirq_to_name[vec_nr], h->action,
                        prev_count, preempt_count());
                 preempt_count() = prev_count;
-            }   
+            }
 
             rcu_bh_qs(cpu);
-        }   
+        }
         h++;
         pending >>= 1;
     } while (pending);
@@ -412,13 +412,45 @@ struct tasklet_head
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
 
+#ifdef CONFIG_LITMUS_NVIDIA
+static int __do_nv_now(struct tasklet_struct* tasklet)
+{
+	int success = 1;
+
+	if(tasklet_trylock(tasklet)) {
+		if (!atomic_read(&tasklet->count)) {
+			if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) {
+				BUG();
+			}
+			tasklet->func(tasklet->data);
+			tasklet_unlock(tasklet);
+		}
+		else {
+			success = 0;
+		}
+
+		tasklet_unlock(tasklet);
+	}
+	else {
+		success = 0;
+	}
+
+	return success;
+}
+#endif
+
 
 void __tasklet_schedule(struct tasklet_struct *t)
 {
 #ifdef CONFIG_LITMUS_NVIDIA
 	if(is_nvidia_func(t->func))
 	{
-		u32 nvidia_device = get_tasklet_nv_device_num(t);	
+#if 0
+		// do nvidia tasklets right away and return
+		if(__do_nv_now(t))
+			return;
+#else
+		u32 nvidia_device = get_tasklet_nv_device_num(t);
 		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
 		//			  __FUNCTION__, nvidia_device,litmus_clock());
 
@@ -438,7 +470,7 @@ void __tasklet_schedule(struct tasklet_struct *t)
 			if(is_realtime(device_owner))
 			{
 				TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
-					  __FUNCTION__, nvidia_device,litmus_clock());				
+					  __FUNCTION__, nvidia_device,litmus_clock());
 				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
 					  __FUNCTION__,device_owner->pid,nvidia_device);
 
@@ -461,7 +493,9 @@ void __tasklet_schedule(struct tasklet_struct *t)
 			}
 		}
 		unlock_nv_registry(nvidia_device, &flags);
+#endif
 	}
+
 #endif
 
 	___tasklet_schedule(t);
@@ -487,19 +521,19 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
 {
 #ifdef CONFIG_LITMUS_NVIDIA
 	if(is_nvidia_func(t->func))
-	{	
+	{
 		u32 nvidia_device = get_tasklet_nv_device_num(t);
 		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
 		//			  __FUNCTION__, nvidia_device,litmus_clock());
 
 		unsigned long flags;
 		struct task_struct* device_owner;
-		
+
 		lock_nv_registry(nvidia_device, &flags);
-		
+
 		device_owner = get_nv_max_device_owner(nvidia_device);
 
-		if(device_owner==NULL) 
+		if(device_owner==NULL)
 		{
 			t->owner = NULL;
 		}
@@ -508,10 +542,10 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
 			if( is_realtime(device_owner))
 			{
 				TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n",
-					  __FUNCTION__, nvidia_device,litmus_clock());				
+					  __FUNCTION__, nvidia_device,litmus_clock());
 				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
 					  __FUNCTION__,device_owner->pid,nvidia_device);
-				
+
 				t->owner = device_owner;
 				sched_trace_tasklet_release(t->owner);
 				if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device)))
@@ -553,15 +587,15 @@ EXPORT_SYMBOL(___tasklet_hi_schedule);
 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
 {
 	BUG_ON(!irqs_disabled());
-#ifdef CONFIG_LITMUS_NVIDIA	
+#ifdef CONFIG_LITMUS_NVIDIA
 	if(is_nvidia_func(t->func))
-	{	
+	{
 		u32 nvidia_device = get_tasklet_nv_device_num(t);
 		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
 		//			  __FUNCTION__, nvidia_device,litmus_clock());
 		unsigned long flags;
 		struct task_struct* device_owner;
-		
+
 		lock_nv_registry(nvidia_device, &flags);
 
 		device_owner = get_nv_max_device_owner(nvidia_device);
@@ -576,10 +610,10 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
 			{
 				TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
 					  __FUNCTION__, nvidia_device,litmus_clock());
-				
+
 				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
 					  __FUNCTION__,device_owner->pid,nvidia_device);
-				
+
 				t->owner = device_owner;
 				sched_trace_tasklet_release(t->owner);
 				if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device)))
diff --git a/litmus/Kconfig b/litmus/Kconfig
index a34440f3d8bc..03cc92c50eb9 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -157,7 +157,7 @@ config SCHED_TASK_TRACE
 config SCHED_TASK_TRACE_SHIFT
        int "Buffer size for sched_trace_xxx() events"
        depends on SCHED_TASK_TRACE
-       range 8 13
+       range 8 15
        default 9
        help
 
@@ -253,7 +253,7 @@ endmenu
 
 menu "Interrupt Handling"
 
-choice 
+choice
 	prompt "Scheduling of interrupt bottom-halves in Litmus."
 	default LITMUS_SOFTIRQD_NONE
 	depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ
@@ -282,11 +282,11 @@ config LITMUS_PAI_SOFTIRQD
 	  scheduling points.  Trades context switch overhead
 	  at the cost of non-preemptive durations of bottom half
 	  processing.
-		 
-	  G-EDF/RM, C-EDF/RM ONLY for now!	 
-		 
-endchoice	   
-	   
+
+	  G-EDF/RM, C-EDF/RM ONLY for now!
+
+endchoice
+
 
 config NR_LITMUS_SOFTIRQD
 	   int "Number of klitirqd."
@@ -344,7 +344,7 @@ choice
 	  depends on LITMUS_NVIDIA
 	  help
 	  	Select the version of CUDA/driver to support.
-	
+
 config CUDA_4_0
 	  bool "CUDA 4.0"
 	  depends on LITMUS_NVIDIA
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
index 70a86bdd9aec..9762be1a085e 100644
--- a/litmus/gpu_affinity.c
+++ b/litmus/gpu_affinity.c
@@ -5,25 +5,32 @@
 #include <litmus/litmus.h>
 #include <litmus/gpu_affinity.h>
 
+#include <litmus/sched_trace.h>
+
 #define OBSERVATION_CAP 2*1e9
 
-static void update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
+static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
 {
+	fp_t relative_err;
 	fp_t err, new;
 	fp_t actual = _integer_to_fp(observed);
 
 	err = _sub(actual, fb->est);
 	new = _add(_mul(a, err), _mul(b, fb->accum_err));
 
+	relative_err = _div(err, actual);
+
 	fb->est = new;
 	fb->accum_err = _add(fb->accum_err, err);
+
+	return relative_err;
 }
 
 void update_gpu_estimate(struct task_struct *t, lt_t observed)
 {
 	feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
 
-	WARN_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
+	BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
 
 	if(unlikely(fb->est.val == 0)) {
 		// kludge-- cap observed values to prevent whacky estimations.
@@ -40,18 +47,29 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
 		fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
 	}
 	else {
-		update_estimate(fb,
-						tsk_rt(t)->gpu_fb_param_a,
-						tsk_rt(t)->gpu_fb_param_b,
-						observed);
-
-		if(_fp_to_integer(fb->est) <= 0) {
-			// TODO: talk to Jonathan about how well this works.
-			// Maybe we should average the observed and est instead?
+		fp_t rel_err = update_estimate(fb,
+									   tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
+									   tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
+									   observed);
+
+		if(unlikely(_fp_to_integer(fb->est) <= 0)) {
 			TRACE_TASK(t, "Invalid estimate. Patching.\n");
 			fb->est = _integer_to_fp(observed);
 			fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
 		}
+		else {
+//			struct migration_info mig_info;
+
+			sched_trace_prediction_err(t,
+									   &(tsk_rt(t)->gpu_migration),
+									   &rel_err);
+
+//			mig_info.observed = observed;
+//			mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
+//			mig_info.distance = tsk_rt(t)->gpu_migration;
+//
+//			sched_trace_migration(t, &mig_info);
+		}
 	}
 
 	TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 023443014d4b..83b708ab85cb 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1346,6 +1346,10 @@ int ikglp_unlock(struct litmus_lock* l)
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 		if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
 			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+			if(fq_of_new_on_fq->count == 0) {
+				// ignore it?
+//				fq_of_new_on_fq = fq;
+			}
 		}
 		else {
 			fq_of_new_on_fq = fq;
@@ -1383,6 +1387,10 @@ int ikglp_unlock(struct litmus_lock* l)
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 		if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
 			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+			if(fq_of_new_on_fq->count == 0) {
+				// ignore it?
+//				fq_of_new_on_fq = fq;
+			}
 		}
 		else {
 			fq_of_new_on_fq = fq;
@@ -1409,6 +1417,10 @@ int ikglp_unlock(struct litmus_lock* l)
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 		if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
 			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+			if(fq_of_new_on_fq->count == 0) {
+				// ignore it?
+//				fq_of_new_on_fq = fq;
+			}
 		}
 		else {
 			fq_of_new_on_fq = fq;
@@ -1569,7 +1581,7 @@ int ikglp_unlock(struct litmus_lock* l)
 		}
 	}
 
-
+wake_kludge:
 	if(waitqueue_active(&fq->wait))
 	{
 		wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
@@ -1672,6 +1684,16 @@ int ikglp_unlock(struct litmus_lock* l)
 		// wake up the new resource holder!
 		wake_up_process(next);
 	}
+	if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) {
+		// The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?)
+		// Wake up the new guy too.
+
+		BUG_ON(fq_of_new_on_fq->owner != NULL);
+
+		fq = fq_of_new_on_fq;
+		fq_of_new_on_fq = NULL;
+		goto wake_kludge;
+	}
 
 	unlock_fine_irqrestore(&sem->lock, flags);
 	unlock_global_irqrestore(dgl_lock, flags);
@@ -1917,7 +1939,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
 		TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
 				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-		return(NULL);
+//		return(NULL);
 	}
 
 	ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
@@ -2600,8 +2622,8 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
 
 	tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
 
-	TRACE_CUR("%s/%d acquired gpu %d.  migration type = %d\n",
-			  t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
+	TRACE_CUR("%s/%d acquired gpu %d (prev = %d).  migration type = %d\n",
+			  t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration);
 
 	// count the number or resource holders
 	++(*(aff->q_info[replica].nr_cur_users));
@@ -2626,8 +2648,6 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
 
 	est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
 
-	tsk_rt(t)->last_gpu = gpu;
-
 	// count the number or resource holders
 	--(*(aff->q_info[replica].nr_cur_users));
 
@@ -2636,12 +2656,15 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
 	// update estimates
 	update_gpu_estimate(t, get_gpu_time(t));
 
-	TRACE_CUR("%s/%d freed gpu %d.  actual time was %llu.  "
+	TRACE_CUR("%s/%d freed gpu %d (prev = %d).  mig type = %d.  actual time was %llu.  "
 			  "estimated was %llu.  diff is %d\n",
-			  t->comm, t->pid, gpu,
+			  t->comm, t->pid, gpu, tsk_rt(t)->last_gpu,
+			  tsk_rt(t)->gpu_migration,
 			  get_gpu_time(t),
 			  est_time,
 			  (long long)get_gpu_time(t) - (long long)est_time);
+
+	tsk_rt(t)->last_gpu = gpu;
 }
 
 struct ikglp_affinity_ops gpu_ikglp_affinity =
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 36e314625d86..1d97462cc128 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -10,8 +10,21 @@ void prepare_for_next_period(struct task_struct *t)
 {
 	BUG_ON(!t);
 	/* prepare next release */
-	t->rt_param.job_params.release   = t->rt_param.job_params.deadline;
-	t->rt_param.job_params.deadline += get_rt_period(t);
+
+	if(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT_W_SLIP) {
+		/* allow the release point to slip if we've passed our deadline. */
+		lt_t now = litmus_clock();
+		t->rt_param.job_params.release =
+			(t->rt_param.job_params.deadline < now) ?
+				now : t->rt_param.job_params.deadline;
+		t->rt_param.job_params.deadline =
+			t->rt_param.job_params.release + get_rt_period(t);
+	}
+	else {
+		t->rt_param.job_params.release   = t->rt_param.job_params.deadline;
+		t->rt_param.job_params.deadline += get_rt_period(t);
+	}
+
 	t->rt_param.job_params.exec_time = 0;
 	/* update job sequence number */
 	t->rt_param.job_params.job_no++;
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index 0b64977789a6..bff857ed8d4e 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -590,7 +590,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
 	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
 		TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
 				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-		return(NULL);
+//		return(NULL);
 	}
 
 	kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 5b301c418b96..d1f836c8af6e 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -318,11 +318,21 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
 void init_gpu_affinity_state(struct task_struct* p)
 {
 	// under-damped
-	p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
-	p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
-	// critically-damped
-	//	p->rt_param.gpu_fb_param_a = _frac(102, 1000);
-	//	p->rt_param.gpu_fb_param_b = _frac(303, 1000);
+	//p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
+	//p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
+
+	// emperical;
+	p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
+	p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
+
+	p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000);
+	p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000);
+
+	p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000);
+	p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000);
+
+	p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
+	p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
 
 	p->rt_param.gpu_migration = MIG_NONE;
 	p->rt_param.last_gpu = -1;
diff --git a/litmus/locking.c b/litmus/locking.c
index cb11c04ed0d4..718a5a3281d7 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -349,10 +349,10 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
 all_acquired:
 
 	// FOR SANITY CHECK FOR TESTING
-	for(i = 0; i < dgl_wait->size; ++i) {
-		struct litmus_lock *l = dgl_wait->locks[i];
-		BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
-	}
+//	for(i = 0; i < dgl_wait->size; ++i) {
+//		struct litmus_lock *l = dgl_wait->locks[i];
+//		BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
+//	}
 
 	TRACE_CUR("Acquired entire DGL\n");
 
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 3d8c50882066..4b86a50d3bd1 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -265,6 +265,11 @@ int init_nvidia_info(void)
 	}
 }
 
+void shutdown_nvidia_info(void)
+{
+	nvidia_mod = NULL;
+	mb();
+}
 
 /* works with pointers to static data inside the module too. */
 int is_nvidia_func(void* func_addr)
@@ -319,14 +324,11 @@ u32 get_work_nv_device_num(const struct work_struct *t)
 }
 
 
-
-#define MAX_NR_OWNERS 3
-
 typedef struct {
 	raw_spinlock_t	lock;
 	int	nr_owners;
 	struct task_struct* max_prio_owner;
-	struct task_struct*	owners[MAX_NR_OWNERS];
+	struct task_struct*	owners[NV_MAX_SIMULT_USERS];
 }nv_device_registry_t;
 
 static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
@@ -431,6 +433,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
 		return ret;  // assume already registered.
 	}
 
+
 	raw_spin_lock_irqsave(&reg->lock, flags);
 
 	if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
@@ -461,7 +464,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
 	else
 	{
 		TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
-		ret = -EBUSY;
+		//ret = -EBUSY;
 	}
 
 	raw_spin_unlock_irqrestore(&reg->lock, flags);
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
index 965164c43537..75ed87c5ed48 100644
--- a/litmus/rsm_lock.c
+++ b/litmus/rsm_lock.c
@@ -502,6 +502,13 @@ int rsm_mutex_unlock(struct litmus_lock* l)
 			tsk_rt(next)->blocked_lock = NULL;
 			mb();
 
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+			// re-enable tracking
+			if(tsk_rt(next)->held_gpus) {
+				tsk_rt(next)->suspend_gpu_tracker_on_block = 0;
+			}
+#endif
+
 			wake_up_process(next);
 		}
 		else {
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 99f7620925ba..be14dbec6ed2 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -733,11 +733,11 @@ static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
 	unsigned long flags;
 	cedf_domain_t *cluster;
 	struct task_struct *probe;
-	
+
 	// identify the cluster by the assignment of these tasks.  one should
 	// be non-NULL.
 	probe = (old_prio) ? old_prio : new_prio;
-	
+
 	if(probe) {
 		cluster = task_cpu_cluster(probe);
 
@@ -838,8 +838,13 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
 	if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
 		if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
+			// don't track preemptions or locking protocol suspensions.
+			TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
 			stop_gpu_tracker(entry->scheduled);
 		}
+		else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
+			TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
+		}
 	}
 #endif
 
@@ -1596,6 +1601,10 @@ static void cleanup_cedf(void)
 {
 	int i;
 
+#ifdef CONFIG_LITMUS_NVIDIA
+	shutdown_nvidia_info();
+#endif
+
 	if (clusters_allocated) {
 		for (i = 0; i < num_clusters; i++) {
 			kfree(cedf[i].cpus);
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 2433297b7482..245e41c25a5d 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -13,6 +13,10 @@
 #include <litmus/preempt.h>
 #include <litmus/jobs.h>
 
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/nvidia_info.h>
+#endif
+
 /*
  * Generic function to trigger preemption on either local or remote cpu
  * from scheduler plugins. The key feature is that this function is
@@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void)
 
 static long litmus_dummy_activate_plugin(void)
 {
+#ifdef CONFIG_LITMUS_NVIDIA
+	shutdown_nvidia_info();
+#endif
 	return 0;
 }
 
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 2bd3a787611b..f7f575346b54 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -247,6 +247,53 @@ feather_callback void do_sched_trace_action(unsigned long id,
 }
 
 
+
+
+feather_callback void do_sched_trace_prediction_err(unsigned long id,
+													unsigned long _task,
+													unsigned long _distance,
+													unsigned long _rel_err)
+{
+	struct task_struct *t = (struct task_struct*) _task;
+	struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t);
+
+	if (rec) {
+		gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance;
+		fp_t* rel_err = (fp_t*) _rel_err;
+
+		rec->data.prediction_err.distance = *distance;
+		rec->data.prediction_err.rel_err = rel_err->val;
+		put_record(rec);
+	}
+}
+
+
+feather_callback void do_sched_trace_migration(unsigned long id,
+													unsigned long _task,
+													unsigned long _mig_info)
+{
+	struct task_struct *t = (struct task_struct*) _task;
+	struct st_event_record *rec = get_record(ST_MIGRATION, t);
+
+	if (rec) {
+		struct migration_info* mig_info = (struct migration_info*) _mig_info;
+
+		rec->hdr.extra = mig_info->distance;
+		rec->data.migration.observed = mig_info->observed;
+		rec->data.migration.estimated = mig_info->estimated;
+
+		put_record(rec);
+	}
+}
+
+
+
+
+
+
+
+
+
 feather_callback void do_sched_trace_tasklet_release(unsigned long id,
 												   unsigned long _owner)
 {
@@ -457,3 +504,6 @@ EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);
 
 
 
+
+
+
-- 
cgit v1.2.2