summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2019-09-14 08:34:30 -0400
committerIngo Molnar <mingo@kernel.org>2019-09-25 11:42:29 -0400
commit154abafc68bfb7c2ef2ad5308a3b2de8968c3f61 (patch)
tree764142945e91bb8943633d7e8eee33b6c13f7bbf
parent0ff7b2cfbae36ebcd216c6a5ad7f8534eebeaee2 (diff)
tasks, sched/core: With a grace period after finish_task_switch(), remove unnecessary code
Remove work arounds that were written before there was a grace period after tasks left the runqueue in finish_task_switch(). In particular now that there tasks exiting the runqueue exprience a RCU grace period none of the work performed by task_rcu_dereference() excpet the rcu_dereference() is necessary so replace task_rcu_dereference() with rcu_dereference(). Remove the code in rcuwait_wait_event() that checks to ensure the current task has not exited. It is no longer necessary as it is guaranteed that any running task will experience a RCU grace period after it leaves the run queueue. Remove the comment in rcuwait_wake_up() as it is no longer relevant. Ref: 8f95c90ceb54 ("sched/wait, RCU: Introduce rcuwait machinery") Ref: 150593bf8693 ("sched/api: Introduce task_rcu_dereference() and try_get_task_struct()") Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Chris Metcalf <cmetcalf@ezchip.com> Cc: Christoph Lameter <cl@linux.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Kirill Tkhai <tkhai@yandex.ru> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Russell King - ARM Linux admin <linux@armlinux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Link: https://lkml.kernel.org/r/87lfurdpk9.fsf_-_@x220.int.ebiederm.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/linux/rcuwait.h20
-rw-r--r--include/linux/sched/task.h1
-rw-r--r--kernel/exit.c67
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/membarrier.c4
5 files changed, 7 insertions, 87 deletions
diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 563290fc194f..75c97e4bbc57 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -6,16 +6,11 @@
6 6
7/* 7/*
8 * rcuwait provides a way of blocking and waking up a single 8 * rcuwait provides a way of blocking and waking up a single
9 * task in an rcu-safe manner; where it is forbidden to use 9 * task in an rcu-safe manner.
10 * after exit_notify(). task_struct is not properly rcu protected,
11 * unless dealing with rcu-aware lists, ie: find_task_by_*().
12 * 10 *
13 * Alternatively we have task_rcu_dereference(), but the return 11 * The only time @task is non-nil is when a user is blocked (or
14 * semantics have different implications which would break the 12 * checking if it needs to) on a condition, and reset as soon as we
15 * wakeup side. The only time @task is non-nil is when a user is 13 * know that the condition has succeeded and are awoken.
16 * blocked (or checking if it needs to) on a condition, and reset
17 * as soon as we know that the condition has succeeded and are
18 * awoken.
19 */ 14 */
20struct rcuwait { 15struct rcuwait {
21 struct task_struct __rcu *task; 16 struct task_struct __rcu *task;
@@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
37 */ 32 */
38#define rcuwait_wait_event(w, condition) \ 33#define rcuwait_wait_event(w, condition) \
39({ \ 34({ \
40 /* \
41 * Complain if we are called after do_exit()/exit_notify(), \
42 * as we cannot rely on the rcu critical region for the \
43 * wakeup side. \
44 */ \
45 WARN_ON(current->exit_state); \
46 \
47 rcu_assign_pointer((w)->task, current); \ 35 rcu_assign_pointer((w)->task, current); \
48 for (;;) { \ 36 for (;;) { \
49 /* \ 37 /* \
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 153a683646ac..4b1c3b664f51 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -119,7 +119,6 @@ static inline void put_task_struct(struct task_struct *t)
119 __put_task_struct(t); 119 __put_task_struct(t);
120} 120}
121 121
122struct task_struct *task_rcu_dereference(struct task_struct **ptask);
123void put_task_struct_rcu_user(struct task_struct *task); 122void put_task_struct_rcu_user(struct task_struct *task);
124 123
125#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT 124#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
diff --git a/kernel/exit.c b/kernel/exit.c
index 3bcaec2ea3ba..a46a50d67002 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -234,69 +234,6 @@ repeat:
234 goto repeat; 234 goto repeat;
235} 235}
236 236
237/*
238 * Note that if this function returns a valid task_struct pointer (!NULL)
239 * task->usage must remain >0 for the duration of the RCU critical section.
240 */
241struct task_struct *task_rcu_dereference(struct task_struct **ptask)
242{
243 struct sighand_struct *sighand;
244 struct task_struct *task;
245
246 /*
247 * We need to verify that release_task() was not called and thus
248 * delayed_put_task_struct() can't run and drop the last reference
249 * before rcu_read_unlock(). We check task->sighand != NULL,
250 * but we can read the already freed and reused memory.
251 */
252retry:
253 task = rcu_dereference(*ptask);
254 if (!task)
255 return NULL;
256
257 probe_kernel_address(&task->sighand, sighand);
258
259 /*
260 * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
261 * was already freed we can not miss the preceding update of this
262 * pointer.
263 */
264 smp_rmb();
265 if (unlikely(task != READ_ONCE(*ptask)))
266 goto retry;
267
268 /*
269 * We've re-checked that "task == *ptask", now we have two different
270 * cases:
271 *
272 * 1. This is actually the same task/task_struct. In this case
273 * sighand != NULL tells us it is still alive.
274 *
275 * 2. This is another task which got the same memory for task_struct.
276 * We can't know this of course, and we can not trust
277 * sighand != NULL.
278 *
279 * In this case we actually return a random value, but this is
280 * correct.
281 *
282 * If we return NULL - we can pretend that we actually noticed that
283 * *ptask was updated when the previous task has exited. Or pretend
284 * that probe_slab_address(&sighand) reads NULL.
285 *
286 * If we return the new task (because sighand is not NULL for any
287 * reason) - this is fine too. This (new) task can't go away before
288 * another gp pass.
289 *
290 * And note: We could even eliminate the false positive if re-read
291 * task->sighand once again to avoid the falsely NULL. But this case
292 * is very unlikely so we don't care.
293 */
294 if (!sighand)
295 return NULL;
296
297 return task;
298}
299
300void rcuwait_wake_up(struct rcuwait *w) 237void rcuwait_wake_up(struct rcuwait *w)
301{ 238{
302 struct task_struct *task; 239 struct task_struct *task;
@@ -316,10 +253,6 @@ void rcuwait_wake_up(struct rcuwait *w)
316 */ 253 */
317 smp_mb(); /* (B) */ 254 smp_mb(); /* (B) */
318 255
319 /*
320 * Avoid using task_rcu_dereference() magic as long as we are careful,
321 * see comment in rcuwait_wait_event() regarding ->exit_state.
322 */
323 task = rcu_dereference(w->task); 256 task = rcu_dereference(w->task);
324 if (task) 257 if (task)
325 wake_up_process(task); 258 wake_up_process(task);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3101c662426d..5bc23996ffae 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1602,7 +1602,7 @@ static void task_numa_compare(struct task_numa_env *env,
1602 return; 1602 return;
1603 1603
1604 rcu_read_lock(); 1604 rcu_read_lock();
1605 cur = task_rcu_dereference(&dst_rq->curr); 1605 cur = rcu_dereference(dst_rq->curr);
1606 if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur))) 1606 if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
1607 cur = NULL; 1607 cur = NULL;
1608 1608
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index aa8d75804108..b14250a11608 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -71,7 +71,7 @@ static int membarrier_global_expedited(void)
71 continue; 71 continue;
72 72
73 rcu_read_lock(); 73 rcu_read_lock();
74 p = task_rcu_dereference(&cpu_rq(cpu)->curr); 74 p = rcu_dereference(cpu_rq(cpu)->curr);
75 if (p && p->mm && (atomic_read(&p->mm->membarrier_state) & 75 if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
76 MEMBARRIER_STATE_GLOBAL_EXPEDITED)) { 76 MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
77 if (!fallback) 77 if (!fallback)
@@ -150,7 +150,7 @@ static int membarrier_private_expedited(int flags)
150 if (cpu == raw_smp_processor_id()) 150 if (cpu == raw_smp_processor_id())
151 continue; 151 continue;
152 rcu_read_lock(); 152 rcu_read_lock();
153 p = task_rcu_dereference(&cpu_rq(cpu)->curr); 153 p = rcu_dereference(cpu_rq(cpu)->curr);
154 if (p && p->mm == current->mm) { 154 if (p && p->mm == current->mm) {
155 if (!fallback) 155 if (!fallback)
156 __cpumask_set_cpu(cpu, tmpmask); 156 __cpumask_set_cpu(cpu, tmpmask);