From 5bd89a34d89f252619d83fef3c9325e24311389e Mon Sep 17 00:00:00 2001
From: "Bjoern B. Brandenburg" <bbb@cs.unc.edu>
Date: Thu, 28 Jul 2011 01:15:58 -0400
Subject: Litmus core: simplify np-section protocol

User a 32-bit word for all non-preemptive section flags.
Set the "please yield soon" flag atomically when
accessing it on remotely-scheduled tasks.
---
 include/litmus/litmus.h   | 47 ++++++++++++++++++++++++++++++++++++++---------
 include/litmus/rt_param.h | 16 +++++++++++-----
 litmus/litmus.c           |  2 ++
 litmus/sched_plugin.c     | 23 ++++++-----------------
 4 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index e7769ca36ec0..12af22266331 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -137,7 +137,7 @@ static inline int is_kernel_np(struct task_struct *t)
 
 static inline int is_user_np(struct task_struct *t)
 {
-	return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->np_flag : 0;
+	return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->sched.np.flag : 0;
 }
 
 static inline void request_exit_np(struct task_struct *t)
@@ -147,17 +147,11 @@ static inline void request_exit_np(struct task_struct *t)
 		 * into the kernel at the end of a critical section. */
 		if (likely(tsk_rt(t)->ctrl_page)) {
 			TRACE_TASK(t, "setting delayed_preemption flag\n");
-			tsk_rt(t)->ctrl_page->delayed_preemption = 1;
+			tsk_rt(t)->ctrl_page->sched.np.preempt = 1;
 		}
 	}
 }
 
-static inline void clear_exit_np(struct task_struct *t)
-{
-	if (likely(tsk_rt(t)->ctrl_page))
-		tsk_rt(t)->ctrl_page->delayed_preemption = 0;
-}
-
 static inline void make_np(struct task_struct *t)
 {
 	tsk_rt(t)->kernel_np++;
@@ -171,6 +165,34 @@ static inline int take_np(struct task_struct *t)
 	return --tsk_rt(t)->kernel_np;
 }
 
+/* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */
+static inline int request_exit_np_atomic(struct task_struct *t)
+{
+	union np_flag old, new;
+
+	if (tsk_rt(t)->ctrl_page) {
+		old.raw = tsk_rt(t)->ctrl_page->sched.raw;
+		if (old.np.flag == 0) {
+			/* no longer non-preemptive */
+			return 0;
+		} else if (old.np.preempt) {
+			/* already set, nothing for us to do */
+			return 1;
+		} else {
+			/* non preemptive and flag not set */
+			new.raw = old.raw;
+			new.np.preempt = 1;
+			/* if we get old back, then we atomically set the flag */
+			return cmpxchg(&tsk_rt(t)->ctrl_page->sched.raw, old.raw, new.raw) == old.raw;
+			/* If we raced with a concurrent change, then so be
+			 * it. Deliver it by IPI.  We don't want an unbounded
+			 * retry loop here since tasks might exploit that to
+			 * keep the kernel busy indefinitely. */
+		}
+	} else
+		return 0;
+}
+
 #else
 
 static inline int is_kernel_np(struct task_struct* t)
@@ -189,12 +211,19 @@ static inline void request_exit_np(struct task_struct *t)
 	BUG();
 }
 
-static inline void clear_exit_np(struct task_struct* t)
+static inline int request_exist_np_atomic(struct task_struct *t)
 {
+	return 0;
 }
 
 #endif
 
+static inline void clear_exit_np(struct task_struct *t)
+{
+	if (likely(tsk_rt(t)->ctrl_page))
+		tsk_rt(t)->ctrl_page->sched.np.preempt = 0;
+}
+
 static inline int is_np(struct task_struct *t)
 {
 #ifdef CONFIG_SCHED_DEBUG_TRACE
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 389be0775869..d6d799174160 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -42,6 +42,16 @@ struct rt_task {
 	budget_policy_t budget_policy; /* ignored by pfair */
 };
 
+union np_flag {
+	uint32_t raw;
+	struct {
+		/* Is the task currently in a non-preemptive section? */
+		uint32_t flag:31;
+		/* Should the task call into the scheduler? */
+		uint32_t preempt:1;
+	} np;
+};
+
 /* The definition of the data that is shared between the kernel and real-time
  * tasks via a shared page (see litmus/ctrldev.c).
  *
@@ -57,11 +67,7 @@ struct rt_task {
  * determining preemption/migration overheads).
  */
 struct control_page {
-	/* Is the task currently in a non-preemptive section? */
-	int np_flag;
-	/* Should the task call into the kernel when it leaves
-	 * its non-preemptive section? */
-	int delayed_preemption;
+	volatile union np_flag sched;
 
 	/* to be extended */
 };
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 73af6c3010d6..301390148d02 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -529,6 +529,8 @@ static int __init _init_litmus(void)
 	 */
 	printk("Starting LITMUS^RT kernel\n");
 
+	BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
+
 	register_sched_plugin(&linux_sched_plugin);
 
 	bheap_node_cache    = KMEM_CACHE(bheap_node, SLAB_PANIC);
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index d54886df1f57..00a1900d6457 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -35,29 +35,18 @@ void preempt_if_preemptable(struct task_struct* t, int cpu)
 			/* local CPU case */
 			/* check if we need to poke userspace */
 			if (is_user_np(t))
-				/* yes, poke it */
+				/* Yes, poke it. This doesn't have to be atomic since
+				 * the task is definitely not executing. */
 				request_exit_np(t);
 			else if (!is_kernel_np(t))
 				/* only if we are allowed to preempt the
 				 * currently-executing task */
 				reschedule = 1;
 		} else {
-			/* remote CPU case */
-			if (is_user_np(t)) {
-				/* need to notify user space of delayed
-				 * preemption */
-
-				/* to avoid a race, set the flag, then test
-				 * again */
-				request_exit_np(t);
-				/* make sure it got written */
-				mb();
-			}
-			/* Only send an ipi if remote task might have raced our
-			 * request, i.e., send an IPI to make sure in case it
-			 * exited its critical section.
-			 */
-			reschedule = !is_np(t) && !is_kernel_np(t);
+			/* Remote CPU case.  Only notify if it's not a kernel
+			 * NP section and if we didn't set the userspace
+			 * flag. */
+			reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
 		}
 	}
 	if (likely(reschedule))
-- 
cgit v1.2.2