From 6827bb817faecede51838e2fcc8b6283e54fe872 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Mon, 14 May 2012 16:51:05 -0400
Subject: Final GPUSync implementation.

---
 litmus/Kconfig            |  2 +-
 litmus/gpu_affinity.c     | 38 +++++++++++++++++++++++++----------
 litmus/ikglp_lock.c       | 39 ++++++++++++++++++++++++++++--------
 litmus/jobs.c             | 17 ++++++++++++++--
 litmus/kfmlp_lock.c       |  2 +-
 litmus/litmus.c           | 20 ++++++++++++++-----
 litmus/locking.c          |  8 ++++----
 litmus/nvidia_info.c      | 13 +++++++-----
 litmus/rsm_lock.c         |  7 +++++++
 litmus/sched_cedf.c       | 13 ++++++++++--
 litmus/sched_plugin.c     |  7 +++++++
 litmus/sched_task_trace.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++
 12 files changed, 178 insertions(+), 38 deletions(-)

(limited to 'litmus')

diff --git a/litmus/Kconfig b/litmus/Kconfig
index a34440f3d8bc..8c156e4da528 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -157,7 +157,7 @@ config SCHED_TASK_TRACE
 config SCHED_TASK_TRACE_SHIFT
        int "Buffer size for sched_trace_xxx() events"
        depends on SCHED_TASK_TRACE
-       range 8 13
+       range 8 15
        default 9
        help
 
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
index 70a86bdd9aec..9762be1a085e 100644
--- a/litmus/gpu_affinity.c
+++ b/litmus/gpu_affinity.c
@@ -5,25 +5,32 @@
 #include <litmus/litmus.h>
 #include <litmus/gpu_affinity.h>
 
+#include <litmus/sched_trace.h>
+
 #define OBSERVATION_CAP 2*1e9
 
-static void update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
+static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
 {
+	fp_t relative_err;
 	fp_t err, new;
 	fp_t actual = _integer_to_fp(observed);
 
 	err = _sub(actual, fb->est);
 	new = _add(_mul(a, err), _mul(b, fb->accum_err));
 
+	relative_err = _div(err, actual);
+
 	fb->est = new;
 	fb->accum_err = _add(fb->accum_err, err);
+
+	return relative_err;
 }
 
 void update_gpu_estimate(struct task_struct *t, lt_t observed)
 {
 	feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
 
-	WARN_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
+	BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
 
 	if(unlikely(fb->est.val == 0)) {
 		// kludge-- cap observed values to prevent whacky estimations.
@@ -40,18 +47,29 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
 		fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
 	}
 	else {
-		update_estimate(fb,
-						tsk_rt(t)->gpu_fb_param_a,
-						tsk_rt(t)->gpu_fb_param_b,
-						observed);
-
-		if(_fp_to_integer(fb->est) <= 0) {
-			// TODO: talk to Jonathan about how well this works.
-			// Maybe we should average the observed and est instead?
+		fp_t rel_err = update_estimate(fb,
+									   tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
+									   tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
+									   observed);
+
+		if(unlikely(_fp_to_integer(fb->est) <= 0)) {
 			TRACE_TASK(t, "Invalid estimate. Patching.\n");
 			fb->est = _integer_to_fp(observed);
 			fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
 		}
+		else {
+//			struct migration_info mig_info;
+
+			sched_trace_prediction_err(t,
+									   &(tsk_rt(t)->gpu_migration),
+									   &rel_err);
+
+//			mig_info.observed = observed;
+//			mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
+//			mig_info.distance = tsk_rt(t)->gpu_migration;
+//
+//			sched_trace_migration(t, &mig_info);
+		}
 	}
 
 	TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 023443014d4b..83b708ab85cb 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1346,6 +1346,10 @@ int ikglp_unlock(struct litmus_lock* l)
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 		if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
 			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+			if(fq_of_new_on_fq->count == 0) {
+				// ignore it?
+//				fq_of_new_on_fq = fq;
+			}
 		}
 		else {
 			fq_of_new_on_fq = fq;
@@ -1383,6 +1387,10 @@ int ikglp_unlock(struct litmus_lock* l)
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 		if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
 			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+			if(fq_of_new_on_fq->count == 0) {
+				// ignore it?
+//				fq_of_new_on_fq = fq;
+			}
 		}
 		else {
 			fq_of_new_on_fq = fq;
@@ -1409,6 +1417,10 @@ int ikglp_unlock(struct litmus_lock* l)
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 		if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
 			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
+			if(fq_of_new_on_fq->count == 0) {
+				// ignore it?
+//				fq_of_new_on_fq = fq;
+			}
 		}
 		else {
 			fq_of_new_on_fq = fq;
@@ -1569,7 +1581,7 @@ int ikglp_unlock(struct litmus_lock* l)
 		}
 	}
 
-
+wake_kludge:
 	if(waitqueue_active(&fq->wait))
 	{
 		wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
@@ -1672,6 +1684,16 @@ int ikglp_unlock(struct litmus_lock* l)
 		// wake up the new resource holder!
 		wake_up_process(next);
 	}
+	if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) {
+		// The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?)
+		// Wake up the new guy too.
+
+		BUG_ON(fq_of_new_on_fq->owner != NULL);
+
+		fq = fq_of_new_on_fq;
+		fq_of_new_on_fq = NULL;
+		goto wake_kludge;
+	}
 
 	unlock_fine_irqrestore(&sem->lock, flags);
 	unlock_global_irqrestore(dgl_lock, flags);
@@ -1917,7 +1939,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
 	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
 		TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
 				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-		return(NULL);
+//		return(NULL);
 	}
 
 	ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
@@ -2600,8 +2622,8 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
 
 	tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
 
-	TRACE_CUR("%s/%d acquired gpu %d.  migration type = %d\n",
-			  t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
+	TRACE_CUR("%s/%d acquired gpu %d (prev = %d).  migration type = %d\n",
+			  t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration);
 
 	// count the number or resource holders
 	++(*(aff->q_info[replica].nr_cur_users));
@@ -2626,8 +2648,6 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
 
 	est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
 
-	tsk_rt(t)->last_gpu = gpu;
-
 	// count the number or resource holders
 	--(*(aff->q_info[replica].nr_cur_users));
 
@@ -2636,12 +2656,15 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
 	// update estimates
 	update_gpu_estimate(t, get_gpu_time(t));
 
-	TRACE_CUR("%s/%d freed gpu %d.  actual time was %llu.  "
+	TRACE_CUR("%s/%d freed gpu %d (prev = %d).  mig type = %d.  actual time was %llu.  "
 			  "estimated was %llu.  diff is %d\n",
-			  t->comm, t->pid, gpu,
+			  t->comm, t->pid, gpu, tsk_rt(t)->last_gpu,
+			  tsk_rt(t)->gpu_migration,
 			  get_gpu_time(t),
 			  est_time,
 			  (long long)get_gpu_time(t) - (long long)est_time);
+
+	tsk_rt(t)->last_gpu = gpu;
 }
 
 struct ikglp_affinity_ops gpu_ikglp_affinity =
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 36e314625d86..1d97462cc128 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -10,8 +10,21 @@ void prepare_for_next_period(struct task_struct *t)
 {
 	BUG_ON(!t);
 	/* prepare next release */
-	t->rt_param.job_params.release   = t->rt_param.job_params.deadline;
-	t->rt_param.job_params.deadline += get_rt_period(t);
+
+	if(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT_W_SLIP) {
+		/* allow the release point to slip if we've passed our deadline. */
+		lt_t now = litmus_clock();
+		t->rt_param.job_params.release =
+			(t->rt_param.job_params.deadline < now) ?
+				now : t->rt_param.job_params.deadline;
+		t->rt_param.job_params.deadline =
+			t->rt_param.job_params.release + get_rt_period(t);
+	}
+	else {
+		t->rt_param.job_params.release   = t->rt_param.job_params.deadline;
+		t->rt_param.job_params.deadline += get_rt_period(t);
+	}
+
 	t->rt_param.job_params.exec_time = 0;
 	/* update job sequence number */
 	t->rt_param.job_params.job_no++;
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index 0b64977789a6..bff857ed8d4e 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -590,7 +590,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
 	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
 		TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
 				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
-		return(NULL);
+//		return(NULL);
 	}
 
 	kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 5b301c418b96..d1f836c8af6e 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -318,11 +318,21 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
 void init_gpu_affinity_state(struct task_struct* p)
 {
 	// under-damped
-	p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
-	p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
-	// critically-damped
-	//	p->rt_param.gpu_fb_param_a = _frac(102, 1000);
-	//	p->rt_param.gpu_fb_param_b = _frac(303, 1000);
+	//p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
+	//p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
+
+	// emperical;
+	p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
+	p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
+
+	p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000);
+	p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000);
+
+	p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000);
+	p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000);
+
+	p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
+	p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
 
 	p->rt_param.gpu_migration = MIG_NONE;
 	p->rt_param.last_gpu = -1;
diff --git a/litmus/locking.c b/litmus/locking.c
index cb11c04ed0d4..718a5a3281d7 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -349,10 +349,10 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
 all_acquired:
 
 	// FOR SANITY CHECK FOR TESTING
-	for(i = 0; i < dgl_wait->size; ++i) {
-		struct litmus_lock *l = dgl_wait->locks[i];
-		BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
-	}
+//	for(i = 0; i < dgl_wait->size; ++i) {
+//		struct litmus_lock *l = dgl_wait->locks[i];
+//		BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
+//	}
 
 	TRACE_CUR("Acquired entire DGL\n");
 
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 3d8c50882066..4b86a50d3bd1 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -265,6 +265,11 @@ int init_nvidia_info(void)
 	}
 }
 
+void shutdown_nvidia_info(void)
+{
+	nvidia_mod = NULL;
+	mb();
+}
 
 /* works with pointers to static data inside the module too. */
 int is_nvidia_func(void* func_addr)
@@ -319,14 +324,11 @@ u32 get_work_nv_device_num(const struct work_struct *t)
 }
 
 
-
-#define MAX_NR_OWNERS 3
-
 typedef struct {
 	raw_spinlock_t	lock;
 	int	nr_owners;
 	struct task_struct* max_prio_owner;
-	struct task_struct*	owners[MAX_NR_OWNERS];
+	struct task_struct*	owners[NV_MAX_SIMULT_USERS];
 }nv_device_registry_t;
 
 static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
@@ -431,6 +433,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
 		return ret;  // assume already registered.
 	}
 
+
 	raw_spin_lock_irqsave(&reg->lock, flags);
 
 	if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
@@ -461,7 +464,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
 	else
 	{
 		TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
-		ret = -EBUSY;
+		//ret = -EBUSY;
 	}
 
 	raw_spin_unlock_irqrestore(&reg->lock, flags);
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
index 965164c43537..75ed87c5ed48 100644
--- a/litmus/rsm_lock.c
+++ b/litmus/rsm_lock.c
@@ -502,6 +502,13 @@ int rsm_mutex_unlock(struct litmus_lock* l)
 			tsk_rt(next)->blocked_lock = NULL;
 			mb();
 
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+			// re-enable tracking
+			if(tsk_rt(next)->held_gpus) {
+				tsk_rt(next)->suspend_gpu_tracker_on_block = 0;
+			}
+#endif
+
 			wake_up_process(next);
 		}
 		else {
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 99f7620925ba..be14dbec6ed2 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -733,11 +733,11 @@ static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
 	unsigned long flags;
 	cedf_domain_t *cluster;
 	struct task_struct *probe;
-	
+
 	// identify the cluster by the assignment of these tasks.  one should
 	// be non-NULL.
 	probe = (old_prio) ? old_prio : new_prio;
-	
+
 	if(probe) {
 		cluster = task_cpu_cluster(probe);
 
@@ -838,8 +838,13 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
 	if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
 		if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
+			// don't track preemptions or locking protocol suspensions.
+			TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
 			stop_gpu_tracker(entry->scheduled);
 		}
+		else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
+			TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
+		}
 	}
 #endif
 
@@ -1596,6 +1601,10 @@ static void cleanup_cedf(void)
 {
 	int i;
 
+#ifdef CONFIG_LITMUS_NVIDIA
+	shutdown_nvidia_info();
+#endif
+
 	if (clusters_allocated) {
 		for (i = 0; i < num_clusters; i++) {
 			kfree(cedf[i].cpus);
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 2433297b7482..245e41c25a5d 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -13,6 +13,10 @@
 #include <litmus/preempt.h>
 #include <litmus/jobs.h>
 
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <litmus/nvidia_info.h>
+#endif
+
 /*
  * Generic function to trigger preemption on either local or remote cpu
  * from scheduler plugins. The key feature is that this function is
@@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void)
 
 static long litmus_dummy_activate_plugin(void)
 {
+#ifdef CONFIG_LITMUS_NVIDIA
+	shutdown_nvidia_info();
+#endif
 	return 0;
 }
 
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 2bd3a787611b..f7f575346b54 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -247,6 +247,53 @@ feather_callback void do_sched_trace_action(unsigned long id,
 }
 
 
+
+
+feather_callback void do_sched_trace_prediction_err(unsigned long id,
+													unsigned long _task,
+													unsigned long _distance,
+													unsigned long _rel_err)
+{
+	struct task_struct *t = (struct task_struct*) _task;
+	struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t);
+
+	if (rec) {
+		gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance;
+		fp_t* rel_err = (fp_t*) _rel_err;
+
+		rec->data.prediction_err.distance = *distance;
+		rec->data.prediction_err.rel_err = rel_err->val;
+		put_record(rec);
+	}
+}
+
+
+feather_callback void do_sched_trace_migration(unsigned long id,
+													unsigned long _task,
+													unsigned long _mig_info)
+{
+	struct task_struct *t = (struct task_struct*) _task;
+	struct st_event_record *rec = get_record(ST_MIGRATION, t);
+
+	if (rec) {
+		struct migration_info* mig_info = (struct migration_info*) _mig_info;
+
+		rec->hdr.extra = mig_info->distance;
+		rec->data.migration.observed = mig_info->observed;
+		rec->data.migration.estimated = mig_info->estimated;
+
+		put_record(rec);
+	}
+}
+
+
+
+
+
+
+
+
+
 feather_callback void do_sched_trace_tasklet_release(unsigned long id,
 												   unsigned long _owner)
 {
@@ -457,3 +504,6 @@ EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);
 
 
 
+
+
+
-- 
cgit v1.2.2