tasks, sched/core: With a grace period after finish_task_switch(), remove unnecessary code

Remove work arounds that were written before there was a grace period after tasks left the runqueue in finish_task_switch(). In particular now that there tasks exiting the runqueue exprience a RCU grace period none of the work performed by task_rcu_dereference() excpet the rcu_dereference() is necessary so replace task_rcu_dereference() with rcu_dereference(). Remove the code in rcuwait_wait_event() that checks to ensure the current task has not exited. It is no longer necessary as it is guaranteed that any running task will experience a RCU grace period after it leaves the run queueue. Remove the comment in rcuwait_wake_up() as it is no longer relevant. Ref: 8f95c90ceb54 ("sched/wait, RCU: Introduce rcuwait machinery") Ref: 150593bf8693 ("sched/api: Introduce task_rcu_dereference() and try_get_task_struct()") Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Chris Metcalf <cmetcalf@ezchip.com> Cc: Christoph Lameter <cl@linux.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Kirill Tkhai <tkhai@yandex.ru> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Russell King - ARM Linux admin <linux@armlinux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Link: https://lkml.kernel.org/r/87lfurdpk9.fsf_-_@x220.int.ebiederm.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Eric W. Biederman <ebiederm@xmission.com> 2019-09-14 08:34:30 -0400
committer: Ingo Molnar <mingo@kernel.org> 2019-09-25 11:42:29 -0400
commit: 154abafc68bfb7c2ef2ad5308a3b2de8968c3f61 (patch)
tree: 764142945e91bb8943633d7e8eee33b6c13f7bbf
parent: 0ff7b2cfbae36ebcd216c6a5ad7f8534eebeaee2 (diff)
5 files changed, 7 insertions, 87 deletions
diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 563290fc194f..75c97e4bbc57 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -6,16 +6,11 @@
 /*
 * rcuwait provides a way of blocking and waking up a single
- * task in an rcu-safe manner; where it is forbidden to use
+ * task in an rcu-safe manner.
- * after exit_notify(). task_struct is not properly rcu protected,
- * unless dealing with rcu-aware lists, ie: find_task_by_*().
 *
- * Alternatively we have task_rcu_dereference(), but the return
+ * The only time @task is non-nil is when a user is blocked (or
- * semantics have different implications which would break the
+ * checking if it needs to) on a condition, and reset as soon as we
- * wakeup side. The only time @task is non-nil is when a user is
+ * know that the condition has succeeded and are awoken.
- * blocked (or checking if it needs to) on a condition, and reset
- * as soon as we know that the condition has succeeded and are
- * awoken.
 */
 struct rcuwait {
        struct task_struct __rcu *task;
@@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
 */
 #define rcuwait_wait_event(w, condition)                                \
 ({                                                                      \
-        /*                                                              \
-         * Complain if we are called after do_exit()/exit_notify(),     \
-         * as we cannot rely on the rcu critical region for the         \
-         * wakeup side.                                                 \
-         */                                                             \
-        WARN_ON(current->exit_state);                                   \
-                                                                        \
        rcu_assign_pointer((w)->task, current);                         \
        for (;;) {                                                      \
                /*                                                      \
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 153a683646ac..4b1c3b664f51 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -119,7 +119,6 @@ static inline void put_task_struct(struct task_struct *t)
                __put_task_struct(t);
 }
-struct task_struct *task_rcu_dereference(struct task_struct **ptask);
 void put_task_struct_rcu_user(struct task_struct *task);
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
diff --git a/kernel/exit.c b/kernel/exit.c
index 3bcaec2ea3ba..a46a50d67002 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -234,69 +234,6 @@ repeat:
                goto repeat;
 }
-/*
- * Note that if this function returns a valid task_struct pointer (!NULL)
- * task->usage must remain >0 for the duration of the RCU critical section.
- */
-struct task_struct *task_rcu_dereference(struct task_struct **ptask)
-{
-        struct sighand_struct *sighand;
-        struct task_struct *task;
-        /*
-         * We need to verify that release_task() was not called and thus
-         * delayed_put_task_struct() can't run and drop the last reference
-         * before rcu_read_unlock(). We check task->sighand != NULL,
-         * but we can read the already freed and reused memory.
-         */
-retry:
-        task = rcu_dereference(*ptask);
-        if (!task)
-                return NULL;
-        probe_kernel_address(&task->sighand, sighand);
-        /*
-         * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
-         * was already freed we can not miss the preceding update of this
-         * pointer.
-         */
-        smp_rmb();
-        if (unlikely(task != READ_ONCE(*ptask)))
-                goto retry;
-        /*
-         * We've re-checked that "task == *ptask", now we have two different
-         * cases:
-         *
-         * 1. This is actually the same task/task_struct. In this case
-         *    sighand != NULL tells us it is still alive.
-         *
-         * 2. This is another task which got the same memory for task_struct.
-         *    We can't know this of course, and we can not trust
-         *    sighand != NULL.
-         *
-         *    In this case we actually return a random value, but this is
-         *    correct.
-         *
-         *    If we return NULL - we can pretend that we actually noticed that
-         *    *ptask was updated when the previous task has exited. Or pretend
-         *    that probe_slab_address(&sighand) reads NULL.
-         *
-         *    If we return the new task (because sighand is not NULL for any
-         *    reason) - this is fine too. This (new) task can't go away before
-         *    another gp pass.
-         *
-         *    And note: We could even eliminate the false positive if re-read
-         *    task->sighand once again to avoid the falsely NULL. But this case
-         *    is very unlikely so we don't care.
-         */
-        if (!sighand)
-                return NULL;
-        return task;
-}
 void rcuwait_wake_up(struct rcuwait *w)
 {
        struct task_struct *task;
@@ -316,10 +253,6 @@ void rcuwait_wake_up(struct rcuwait *w)
         */
        smp_mb(); /* (B) */
-        /*
-         * Avoid using task_rcu_dereference() magic as long as we are careful,
-         * see comment in rcuwait_wait_event() regarding ->exit_state.
-         */
        task = rcu_dereference(w->task);
        if (task)
                wake_up_process(task);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3101c662426d..5bc23996ffae 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1602,7 +1602,7 @@ static void task_numa_compare(struct task_numa_env *env,
                return;
        rcu_read_lock();
-        cur = task_rcu_dereference(&dst_rq->curr);
+        cur = rcu_dereference(dst_rq->curr);
        if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
                cur = NULL;
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index aa8d75804108..b14250a11608 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -71,7 +71,7 @@ static int membarrier_global_expedited(void)
                        continue;
                rcu_read_lock();
-                p = task_rcu_dereference(&cpu_rq(cpu)->curr);
+                p = rcu_dereference(cpu_rq(cpu)->curr);
                if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
                                   MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
                        if (!fallback)
@@ -150,7 +150,7 @@ static int membarrier_private_expedited(int flags)
                if (cpu == raw_smp_processor_id())
                        continue;
                rcu_read_lock();
-                p = task_rcu_dereference(&cpu_rq(cpu)->curr);
+                p = rcu_dereference(cpu_rq(cpu)->curr);
                if (p && p->mm == current->mm) {
                        if (!fallback)
                                __cpumask_set_cpu(cpu, tmpmask);
author	Eric W. Biederman <ebiederm@xmission.com>	2019-09-14 08:34:30 -0400
committer	Ingo Molnar <mingo@kernel.org>	2019-09-25 11:42:29 -0400
commit	154abafc68bfb7c2ef2ad5308a3b2de8968c3f61 (patch)
tree	764142945e91bb8943633d7e8eee33b6c13f7bbf
parent	0ff7b2cfbae36ebcd216c6a5ad7f8534eebeaee2 (diff)

diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h index 563290fc194f..75c97e4bbc57 100644 --- a/include/linux/rcuwait.h +++ b/include/linux/rcuwait.h
@@ -6,16 +6,11 @@
6		6
7	/*	7	/*
8	* rcuwait provides a way of blocking and waking up a single	8	* rcuwait provides a way of blocking and waking up a single
9	* task in an rcu-safe manner; where it is forbidden to use	9	* task in an rcu-safe manner.
10	* after exit_notify(). task_struct is not properly rcu protected,
11	* unless dealing with rcu-aware lists, ie: find_task_by_*().
12	*	10	*
13	* Alternatively we have task_rcu_dereference(), but the return	11	* The only time @task is non-nil is when a user is blocked (or
14	* semantics have different implications which would break the	12	* checking if it needs to) on a condition, and reset as soon as we
15	* wakeup side. The only time @task is non-nil is when a user is	13	* know that the condition has succeeded and are awoken.
16	* blocked (or checking if it needs to) on a condition, and reset
17	* as soon as we know that the condition has succeeded and are
18	* awoken.
19	*/	14	*/
20	struct rcuwait {	15	struct rcuwait {
21	struct task_struct __rcu *task;	16	struct task_struct __rcu *task;
@@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
37	*/	32	*/
38	#define rcuwait_wait_event(w, condition) \	33	#define rcuwait_wait_event(w, condition) \
39	({ \	34	({ \
40	/* \
41	* Complain if we are called after do_exit()/exit_notify(), \
42	* as we cannot rely on the rcu critical region for the \
43	* wakeup side. \
44	*/ \
45	WARN_ON(current->exit_state); \
46	\
47	rcu_assign_pointer((w)->task, current); \	35	rcu_assign_pointer((w)->task, current); \
48	for (;;) { \	36	for (;;) { \
49	/* \	37	/* \


diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 153a683646ac..4b1c3b664f51 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h
@@ -119,7 +119,6 @@ static inline void put_task_struct(struct task_struct *t)
119	__put_task_struct(t);	119	__put_task_struct(t);
120	}	120	}
121		121
122	struct task_struct task_rcu_dereference(struct task_struct *ptask);
123	void put_task_struct_rcu_user(struct task_struct *task);	122	void put_task_struct_rcu_user(struct task_struct *task);
124		123
125	#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT	124	#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT


diff --git a/kernel/exit.c b/kernel/exit.c index 3bcaec2ea3ba..a46a50d67002 100644 --- a/kernel/exit.c +++ b/kernel/exit.c
@@ -234,69 +234,6 @@ repeat:
234	goto repeat;	234	goto repeat;
235	}	235	}
236		236
237	/*
238	* Note that if this function returns a valid task_struct pointer (!NULL)
239	* task->usage must remain >0 for the duration of the RCU critical section.
240	*/
241	struct task_struct task_rcu_dereference(struct task_struct *ptask)
242	{
243	struct sighand_struct *sighand;
244	struct task_struct *task;
245
246	/*
247	* We need to verify that release_task() was not called and thus
248	* delayed_put_task_struct() can't run and drop the last reference
249	* before rcu_read_unlock(). We check task->sighand != NULL,
250	* but we can read the already freed and reused memory.
251	*/
252	retry:
253	task = rcu_dereference(*ptask);
254	if (!task)
255	return NULL;
256
257	probe_kernel_address(&task->sighand, sighand);
258
259	/*
260	* Pairs with atomic_dec_and_test() in put_task_struct(). If this task
261	* was already freed we can not miss the preceding update of this
262	* pointer.
263	*/
264	smp_rmb();
265	if (unlikely(task != READ_ONCE(*ptask)))
266	goto retry;
267
268	/*
269	* We've re-checked that "task == *ptask", now we have two different
270	* cases:
271	*
272	* 1. This is actually the same task/task_struct. In this case
273	* sighand != NULL tells us it is still alive.
274	*
275	* 2. This is another task which got the same memory for task_struct.
276	* We can't know this of course, and we can not trust
277	* sighand != NULL.
278	*
279	* In this case we actually return a random value, but this is
280	* correct.
281	*
282	* If we return NULL - we can pretend that we actually noticed that
283	* *ptask was updated when the previous task has exited. Or pretend
284	* that probe_slab_address(&sighand) reads NULL.
285	*
286	* If we return the new task (because sighand is not NULL for any
287	* reason) - this is fine too. This (new) task can't go away before
288	* another gp pass.
289	*
290	* And note: We could even eliminate the false positive if re-read
291	* task->sighand once again to avoid the falsely NULL. But this case
292	* is very unlikely so we don't care.
293	*/
294	if (!sighand)
295	return NULL;
296
297	return task;
298	}
299
300	void rcuwait_wake_up(struct rcuwait *w)	237	void rcuwait_wake_up(struct rcuwait *w)
301	{	238	{
302	struct task_struct *task;	239	struct task_struct *task;
@@ -316,10 +253,6 @@ void rcuwait_wake_up(struct rcuwait *w)
316	*/	253	*/
317	smp_mb(); /* (B) */	254	smp_mb(); /* (B) */
318		255
319	/*
320	* Avoid using task_rcu_dereference() magic as long as we are careful,
321	* see comment in rcuwait_wait_event() regarding ->exit_state.
322	*/
323	task = rcu_dereference(w->task);	256	task = rcu_dereference(w->task);
324	if (task)	257	if (task)
325	wake_up_process(task);	258	wake_up_process(task);


diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3101c662426d..5bc23996ffae 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c
@@ -1602,7 +1602,7 @@ static void task_numa_compare(struct task_numa_env *env,
1602	return;	1602	return;
1603		1603
1604	rcu_read_lock();	1604	rcu_read_lock();
1605	cur = task_rcu_dereference(&dst_rq->curr);	1605	cur = rcu_dereference(dst_rq->curr);
1606	if (cur && ((cur->flags & PF_EXITING) \|\| is_idle_task(cur)))	1606	if (cur && ((cur->flags & PF_EXITING) \|\| is_idle_task(cur)))
1607	cur = NULL;	1607	cur = NULL;
1608		1608


diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index aa8d75804108..b14250a11608 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c
@@ -71,7 +71,7 @@ static int membarrier_global_expedited(void)
71	continue;	71	continue;
72		72
73	rcu_read_lock();	73	rcu_read_lock();
74	p = task_rcu_dereference(&cpu_rq(cpu)->curr);	74	p = rcu_dereference(cpu_rq(cpu)->curr);
75	if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &	75	if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
76	MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {	76	MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
77	if (!fallback)	77	if (!fallback)
@@ -150,7 +150,7 @@ static int membarrier_private_expedited(int flags)
150	if (cpu == raw_smp_processor_id())	150	if (cpu == raw_smp_processor_id())
151	continue;	151	continue;
152	rcu_read_lock();	152	rcu_read_lock();
153	p = task_rcu_dereference(&cpu_rq(cpu)->curr);	153	p = rcu_dereference(cpu_rq(cpu)->curr);
154	if (p && p->mm == current->mm) {	154	if (p && p->mm == current->mm) {
155	if (!fallback)	155	if (!fallback)
156	__cpumask_set_cpu(cpu, tmpmask);	156	__cpumask_set_cpu(cpu, tmpmask);