From e20223fcfd6ad9274e9e1aab11a73eaa72c7a4f5 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Thu, 24 Jan 2013 17:34:45 -0500
Subject: Fix inheritance propagation for klmirqd&aux tasks.

Bug: Inheritance not propagated to klmirqd and aux
tasks when the task these threads inherit from
has a change in its own priority.

(Also removed per-task NV interrupt tracking since
 we cannnot identify exact interrupt ownership under
 GPUSync.)
---
 include/litmus/rt_param.h    |   2 -
 include/litmus/sched_trace.h |   3 +-
 litmus/aux_tasks.c           |  14 +++++-
 litmus/litmus.c              |   3 --
 litmus/nvidia_info.c         |  54 +++++++++++++++++------
 litmus/sched_cedf.c          | 100 ++++++++++++++++++++++++++++++++++---------
 litmus/sched_gsn_edf.c       |   4 --
 litmus/sched_task_trace.c    |   3 --
 8 files changed, 133 insertions(+), 50 deletions(-)

diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index dd1ef076a4b2..49b2b45396e4 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -303,8 +303,6 @@ struct rt_param {
 #endif
 
 #ifdef CONFIG_LITMUS_NVIDIA
-	/* number of top-half interrupts handled on behalf of current job */
-	atomic_t					nv_int_count;
 	long unsigned int			held_gpus;  // bitmap of held GPUs.
 	struct binheap_node			gpu_owner_node; // just one GPU for now...
 	unsigned int				hide_from_gpu:1;
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
index 5fbd7f37b26d..2598cdf6088e 100644
--- a/include/litmus/sched_trace.h
+++ b/include/litmus/sched_trace.h
@@ -56,8 +56,7 @@ struct st_completion_data {	/* A job completed. */
 				 * next task automatically; set to 0 otherwise.
 				 */
 	u8	__uflags:7;
-	u16 nv_int_count;
-	u8	__unused[5];
+	u8	__unused[7];
 } __attribute__((packed));
 
 struct st_block_data {		/* A task blocks. */
diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c
index 07907e22bc09..5aa9f7634fbf 100644
--- a/litmus/aux_tasks.c
+++ b/litmus/aux_tasks.c
@@ -133,6 +133,8 @@ int aux_task_owner_increase_priority(struct task_struct *t)
 	struct task_struct *hp = NULL;
 	struct task_struct *hp_eff = NULL;
 
+	int increase_aux = 0;
+
 	BUG_ON(!is_realtime(t));
 	BUG_ON(!tsk_rt(t)->has_aux_tasks);
 
@@ -155,12 +157,19 @@ int aux_task_owner_increase_priority(struct task_struct *t)
 	if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */
 		binheap_decrease(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners);
 	}
+	else {
+		/* unconditionally propagate - t already has the updated eff and is at the root,
+		   so we can't detect a change in inheritance, but we know that priority has
+		   indeed increased/changed. */
+		increase_aux = 1;
+	}
 
 	hp = container_of(
 			binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 			struct task_struct, rt_param);
 
-	if (effective_priority(hp) != hp_eff) { /* the eff. prio. of hp has changed */
+	/* check if the eff. prio. of hp has changed */
+	if (increase_aux || (effective_priority(hp) != hp_eff)) {
 		hp_eff = effective_priority(hp);
 		TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
 		retval = aux_tasks_increase_priority(leader, hp_eff);
@@ -207,7 +216,8 @@ int aux_task_owner_decrease_priority(struct task_struct *t)
 			container_of(
 				binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
 				struct task_struct, rt_param);
-		if (effective_priority(new_hp) != hp_eff) { /* eff prio. of hp has changed */
+		/* if the new_hp is still t, or if the effective priority has changed */
+		if ((new_hp == t) || (effective_priority(new_hp) != hp_eff)) {
 			hp_eff = effective_priority(new_hp);
 			TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
 			retval = aux_tasks_decrease_priority(leader, hp_eff);
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 2911e7ec7029..35bc70455425 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -493,9 +493,6 @@ long __litmus_admit_task(struct task_struct* tsk)
 		bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
 	}
 
-#ifdef CONFIG_LITMUS_NVIDIA
-	atomic_set(&tsk_rt(tsk)->nv_int_count, 0);
-#endif
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
 	init_gpu_affinity_state(tsk);
 #endif
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index dda863009fee..ab62ca1b5b11 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -553,7 +553,10 @@ static int gpu_klmirqd_increase_priority(struct task_struct *klmirqd, struct tas
 {
 	int retval = 0;
 
-	TRACE_CUR("Increasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid);
+	TRACE_CUR("Increasing priority of %s/%d to %s/%d.\n",
+			klmirqd->comm, klmirqd->pid,
+			(hp) ? hp->comm : "nil",
+			(hp) ? hp->pid : -1);
 
 	/* the klmirqd thread should never attempt to hold a litmus-level real-time
 	 * so nested support is not required */
@@ -566,7 +569,10 @@ static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct tas
 {
 	int retval = 0;
 
-	TRACE_CUR("Decreasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid);
+	TRACE_CUR("Decreasing priority of %s/%d to %s/%d.\n",
+			klmirqd->comm, klmirqd->pid,
+			(hp) ? hp->comm : "nil",
+			(hp) ? hp->pid : -1);
 
 	/* the klmirqd thread should never attempt to hold a litmus-level real-time
 	 * so nested support is not required */
@@ -617,9 +623,12 @@ long enable_gpu_owner(struct task_struct *t)
 
 	if (hp == t) {
 		/* we're the new hp */
-		TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu);
+		TRACE_CUR("%s/%d (eff_prio = %s/%d) is new hp on GPU %d.\n",
+				t->comm, t->pid,
+				effective_priority(t)->comm, effective_priority(t)->pid,
+				gpu);
 
-		retval = gpu_klmirqd_increase_priority(reg->thread, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
+		retval = gpu_klmirqd_increase_priority(reg->thread, effective_priority(t));
 	}
 #endif
 
@@ -671,13 +680,15 @@ long disable_gpu_owner(struct task_struct *t)
 	}
 
 	if (hp == t && new_hp != t) {
-		struct task_struct *to_inh = NULL;
+		struct task_struct *to_inh = (new_hp) ? effective_priority(new_hp) : NULL;
 
-		TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu);
-
-		if (new_hp) {
-			to_inh = (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp;
-		}
+		TRACE_CUR("%s/%d is no longer hp on GPU %d; new hp = %s/%d (eff_prio = %s/%d).\n",
+						t->comm, t->pid,
+						gpu,
+						(new_hp) ? new_hp->comm : "nil",
+						(new_hp) ? new_hp->pid : -1,
+						(to_inh) ? to_inh->comm : "nil",
+						(to_inh) ? to_inh->pid : -1);
 
 		retval = gpu_klmirqd_decrease_priority(reg->thread, to_inh);
 	}
@@ -707,6 +718,10 @@ int gpu_owner_increase_priority(struct task_struct *t)
 	struct task_struct *hp = NULL;
 	struct task_struct *hp_eff = NULL;
 
+#ifdef CONFIG_LITMUS_SOFTIRQD
+	int increase_klmirqd = 0;
+#endif
+
 	BUG_ON(!is_realtime(t));
 	BUG_ON(!tsk_rt(t)->held_gpus);
 
@@ -728,14 +743,24 @@ int gpu_owner_increase_priority(struct task_struct *t)
 	if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */
 		binheap_decrease(&tsk_rt(t)->gpu_owner_node, &reg->owners);
 	}
-
 #ifdef CONFIG_LITMUS_SOFTIRQD
+	else {
+		/* unconditionally propagate - t already has the updated eff and is at the root,
+		   so we can't detect a change in inheritance, but we know that priority has
+		   indeed increased/changed. */
+		increase_klmirqd = 1;
+	}
+
 	hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
 					  struct task_struct, rt_param);
 
-	if (effective_priority(hp) != hp_eff) { /* the eff. prio. of hp has changed */
+	/* check if the eff. prio. of hp has changed */
+	if (increase_klmirqd || (effective_priority(hp) != hp_eff)) {
 		hp_eff = effective_priority(hp);
-		TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu);
+		TRACE_CUR("%s/%d (eff_prio = %s/%d) is new hp on GPU %d.\n",
+						t->comm, t->pid,
+						hp_eff->comm, hp_eff->pid,
+						gpu);
 
 		retval = gpu_klmirqd_increase_priority(reg->thread, hp_eff);
 	}
@@ -781,7 +806,8 @@ int gpu_owner_decrease_priority(struct task_struct *t)
 		struct task_struct *new_hp =
 			container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
 					 struct task_struct, rt_param);
-		if (effective_priority(new_hp) != hp_eff) { /* eff prio. of hp has changed */
+		/* if the new_hp is still t, or if the effective priority has changed */
+		if ((new_hp == t) || (effective_priority(new_hp) != hp_eff)) {
 			hp_eff = effective_priority(new_hp);
 			TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu);
 			retval = gpu_klmirqd_decrease_priority(reg->thread, hp_eff);
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index dd64211a1402..b3281e40df52 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -415,12 +415,19 @@ static noinline void job_completion(struct task_struct *t, int forced)
 
 	sched_trace_task_completion(t, forced);
 
-#ifdef CONFIG_LITMUS_NVIDIA
-	atomic_set(&tsk_rt(t)->nv_int_count, 0);
-#endif
-
 	TRACE_TASK(t, "job_completion().\n");
 
+#ifdef CONFIG_LITMUS_LOCKING
+	if (!is_persistent(t) && tsk_rt(t)->inh_task) {
+		/* job completing while inheriting a priority */
+		TRACE_TASK(t,
+			"WARNING: Completing job while still inheriting a "
+			"priority (%s/%d)!\n",
+			tsk_rt(t)->inh_task->comm,
+			tsk_rt(t)->inh_task->pid);
+	}
+#endif
+
 	/* set flags */
 	tsk_rt(t)->completed = 1;
 	/* prepare for next period */
@@ -1027,7 +1034,7 @@ static void cedf_task_wake_up(struct task_struct *task)
 		release_at(task, now);
 		sched_trace_task_release(task);
 	}
-	else if (task->rt.time_slice) {
+	else {
 		/* periodic task model.  don't force job to end.
 		 * rely on user to say when jobs complete or when budget expires. */
 		tsk_rt(task)->completed = 0;
@@ -1156,6 +1163,27 @@ static int __increase_priority_inheritance(struct task_struct* t,
 	int check_preempt = 0;
 	cedf_domain_t* cluster;
 
+	if (prio_inh && (effective_priority(prio_inh) != prio_inh)) {
+		TRACE_TASK(t, "Inheriting from %s/%d instead of the eff_prio = %s/%d!\n",
+			prio_inh->comm, prio_inh->pid,
+			effective_priority(prio_inh)->comm,
+			effective_priority(prio_inh)->pid);
+#ifndef CONFIG_LITMUS_NESTED_LOCKING
+		/* Tasks should only inherit the base priority of a task.
+		   If 't' inherits a priority, then tsk_rt(t)->inh_task should
+		   be passed to this function instead. This includes transitive
+		   inheritance relations (tsk_rt(tsk_rt(...)->inh_task)->inh_task). */
+		BUG();
+#else
+		/* Not a bug with nested locking since inheritance propagation is
+		   not atomic. */
+
+		/* TODO: Is the following 'helping' short-cut safe?
+		prio_inh = effective_priority(prio_inh);
+		*/
+#endif
+	}
+
 	if (prio_inh && prio_inh == effective_priority(t)) {
 		/* relationship already established. */
 		TRACE_TASK(t, "already has effective priority of %s/%d\n",
@@ -1211,6 +1239,20 @@ static int __increase_priority_inheritance(struct task_struct* t,
 			}
 			raw_spin_unlock(&cluster->domain.release_lock);
 
+#ifdef CONFIG_REALTIME_AUX_TASKS
+			/* propagate to aux tasks */
+			if (tsk_rt(t)->has_aux_tasks) {
+				aux_task_owner_increase_priority(t);
+			}
+#endif
+
+#ifdef CONFIG_LITMUS_NVIDIA
+			/* propagate to gpu klmirqd */
+			if (tsk_rt(t)->held_gpus) {
+				gpu_owner_increase_priority(t);
+			}
+#endif
+
 			/* If holder was enqueued in a release heap, then the following
 			 * preemption check is pointless, but we can't easily detect
 			 * that case. If you want to fix this, then consider that
@@ -1225,20 +1267,6 @@ static int __increase_priority_inheritance(struct task_struct* t,
 								  &cluster->domain.ready_queue);
 				check_for_preemptions(cluster);
 			}
-
-#ifdef CONFIG_REALTIME_AUX_TASKS
-			/* propagate to aux tasks */
-			if (tsk_rt(t)->has_aux_tasks) {
-				aux_task_owner_increase_priority(t);
-			}
-#endif
-
-#ifdef CONFIG_LITMUS_NVIDIA
-			/* propagate to gpu klmirqd */
-			if (tsk_rt(t)->held_gpus) {
-				gpu_owner_increase_priority(t);
-			}
-#endif
 		}
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	}
@@ -1266,6 +1294,8 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str
 
 	raw_spin_lock(&cluster->cluster_lock);
 
+	TRACE_TASK(t, "to inherit from %s/%d\n", prio_inh->comm, prio_inh->pid);
+
 	__increase_priority_inheritance(t, prio_inh);
 
 	raw_spin_unlock(&cluster->cluster_lock);
@@ -1288,11 +1318,32 @@ static int __decrease_priority_inheritance(struct task_struct* t,
 {
 	int success = 1;
 
+	if (prio_inh && (effective_priority(prio_inh) != prio_inh)) {
+		TRACE_TASK(t, "Inheriting from %s/%d instead of the eff_prio = %s/%d!\n",
+			prio_inh->comm, prio_inh->pid,
+			effective_priority(prio_inh)->comm,
+			effective_priority(prio_inh)->pid);
+#ifndef CONFIG_LITMUS_NESTED_LOCKING
+		/* Tasks should only inherit the base priority of a task.
+		   If 't' inherits a priority, then tsk_rt(t)->inh_task should
+		   be passed to this function instead. This includes transitive
+		   inheritance relations (tsk_rt(tsk_rt(...)->inh_task)->inh_task). */
+		BUG();
+#else
+		/* Not a bug with nested locking since inheritance propagation is
+		   not atomic. */
+
+		/* TODO: Is the following 'helping' short-cut safe?
+		prio_inh = effective_priority(prio_inh);
+		*/
+#endif
+	}
+
 	if (prio_inh == tsk_rt(t)->inh_task) {
 		/* relationship already established. */
 		TRACE_TASK(t, "already inherits priority from %s/%d\n",
 				   (prio_inh) ? prio_inh->comm : "(nil)",
-				   (prio_inh) ? prio_inh->pid : 0);
+				   (prio_inh) ? prio_inh->pid : -1);
 		goto out;
 	}
 
@@ -1372,6 +1423,11 @@ static void decrease_priority_inheritance(struct task_struct* t,
 	cedf_domain_t* cluster = task_cpu_cluster(t);
 
 	raw_spin_lock(&cluster->cluster_lock);
+
+	TRACE_TASK(t, "to inherit from %s/%d (decrease)\n",
+			(prio_inh) ? prio_inh->comm : "nil",
+			(prio_inh) ? prio_inh->pid : -1);
+
 	__decrease_priority_inheritance(t, prio_inh);
 
 	raw_spin_unlock(&cluster->cluster_lock);
@@ -1407,6 +1463,10 @@ static void nested_increase_priority_inheritance(struct task_struct* t,
 		increase_priority_inheritance(t, prio_inh);  // increase our prio.
 	}
 
+	/* note: cluster lock is not held continuously during propagation, so there
+	   may be momentary inconsistencies while nested priority propagation 'chases'
+	   other updates. */
+
 	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);  // unlock the t's heap.
 
 
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index b3309ee2561e..8a8a6f1c306b 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -405,10 +405,6 @@ static noinline void job_completion(struct task_struct *t, int forced)
 
 	sched_trace_task_completion(t, forced);
 
-#ifdef CONFIG_LITMUS_NVIDIA
-	atomic_set(&tsk_rt(t)->nv_int_count, 0);
-#endif
-
 	TRACE_TASK(t, "job_completion().\n");
 
 	/* set flags */
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index b14b0100e09c..1693d70d0911 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -191,9 +191,6 @@ feather_callback void do_sched_trace_task_completion(unsigned long id,
 	if (rec) {
 		rec->data.completion.when   = now();
 		rec->data.completion.forced = forced;
-#ifdef LITMUS_NVIDIA
-		rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count);
-#endif
 		put_record(rec);
 	}
 }
-- 
cgit v1.2.2