diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2019-09-14 08:34:30 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2019-09-25 11:42:29 -0400 |
commit | 154abafc68bfb7c2ef2ad5308a3b2de8968c3f61 (patch) | |
tree | 764142945e91bb8943633d7e8eee33b6c13f7bbf | |
parent | 0ff7b2cfbae36ebcd216c6a5ad7f8534eebeaee2 (diff) |
tasks, sched/core: With a grace period after finish_task_switch(), remove unnecessary code
Remove work arounds that were written before there was a grace period
after tasks left the runqueue in finish_task_switch().
In particular now that there tasks exiting the runqueue exprience
a RCU grace period none of the work performed by task_rcu_dereference()
excpet the rcu_dereference() is necessary so replace task_rcu_dereference()
with rcu_dereference().
Remove the code in rcuwait_wait_event() that checks to ensure the current
task has not exited. It is no longer necessary as it is guaranteed
that any running task will experience a RCU grace period after it
leaves the run queueue.
Remove the comment in rcuwait_wake_up() as it is no longer relevant.
Ref: 8f95c90ceb54 ("sched/wait, RCU: Introduce rcuwait machinery")
Ref: 150593bf8693 ("sched/api: Introduce task_rcu_dereference() and try_get_task_struct()")
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Kirill Tkhai <tkhai@yandex.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King - ARM Linux admin <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/87lfurdpk9.fsf_-_@x220.int.ebiederm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | include/linux/rcuwait.h | 20 | ||||
-rw-r--r-- | include/linux/sched/task.h | 1 | ||||
-rw-r--r-- | kernel/exit.c | 67 | ||||
-rw-r--r-- | kernel/sched/fair.c | 2 | ||||
-rw-r--r-- | kernel/sched/membarrier.c | 4 |
5 files changed, 7 insertions, 87 deletions
diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h index 563290fc194f..75c97e4bbc57 100644 --- a/include/linux/rcuwait.h +++ b/include/linux/rcuwait.h | |||
@@ -6,16 +6,11 @@ | |||
6 | 6 | ||
7 | /* | 7 | /* |
8 | * rcuwait provides a way of blocking and waking up a single | 8 | * rcuwait provides a way of blocking and waking up a single |
9 | * task in an rcu-safe manner; where it is forbidden to use | 9 | * task in an rcu-safe manner. |
10 | * after exit_notify(). task_struct is not properly rcu protected, | ||
11 | * unless dealing with rcu-aware lists, ie: find_task_by_*(). | ||
12 | * | 10 | * |
13 | * Alternatively we have task_rcu_dereference(), but the return | 11 | * The only time @task is non-nil is when a user is blocked (or |
14 | * semantics have different implications which would break the | 12 | * checking if it needs to) on a condition, and reset as soon as we |
15 | * wakeup side. The only time @task is non-nil is when a user is | 13 | * know that the condition has succeeded and are awoken. |
16 | * blocked (or checking if it needs to) on a condition, and reset | ||
17 | * as soon as we know that the condition has succeeded and are | ||
18 | * awoken. | ||
19 | */ | 14 | */ |
20 | struct rcuwait { | 15 | struct rcuwait { |
21 | struct task_struct __rcu *task; | 16 | struct task_struct __rcu *task; |
@@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w); | |||
37 | */ | 32 | */ |
38 | #define rcuwait_wait_event(w, condition) \ | 33 | #define rcuwait_wait_event(w, condition) \ |
39 | ({ \ | 34 | ({ \ |
40 | /* \ | ||
41 | * Complain if we are called after do_exit()/exit_notify(), \ | ||
42 | * as we cannot rely on the rcu critical region for the \ | ||
43 | * wakeup side. \ | ||
44 | */ \ | ||
45 | WARN_ON(current->exit_state); \ | ||
46 | \ | ||
47 | rcu_assign_pointer((w)->task, current); \ | 35 | rcu_assign_pointer((w)->task, current); \ |
48 | for (;;) { \ | 36 | for (;;) { \ |
49 | /* \ | 37 | /* \ |
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 153a683646ac..4b1c3b664f51 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h | |||
@@ -119,7 +119,6 @@ static inline void put_task_struct(struct task_struct *t) | |||
119 | __put_task_struct(t); | 119 | __put_task_struct(t); |
120 | } | 120 | } |
121 | 121 | ||
122 | struct task_struct *task_rcu_dereference(struct task_struct **ptask); | ||
123 | void put_task_struct_rcu_user(struct task_struct *task); | 122 | void put_task_struct_rcu_user(struct task_struct *task); |
124 | 123 | ||
125 | #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT | 124 | #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT |
diff --git a/kernel/exit.c b/kernel/exit.c index 3bcaec2ea3ba..a46a50d67002 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -234,69 +234,6 @@ repeat: | |||
234 | goto repeat; | 234 | goto repeat; |
235 | } | 235 | } |
236 | 236 | ||
237 | /* | ||
238 | * Note that if this function returns a valid task_struct pointer (!NULL) | ||
239 | * task->usage must remain >0 for the duration of the RCU critical section. | ||
240 | */ | ||
241 | struct task_struct *task_rcu_dereference(struct task_struct **ptask) | ||
242 | { | ||
243 | struct sighand_struct *sighand; | ||
244 | struct task_struct *task; | ||
245 | |||
246 | /* | ||
247 | * We need to verify that release_task() was not called and thus | ||
248 | * delayed_put_task_struct() can't run and drop the last reference | ||
249 | * before rcu_read_unlock(). We check task->sighand != NULL, | ||
250 | * but we can read the already freed and reused memory. | ||
251 | */ | ||
252 | retry: | ||
253 | task = rcu_dereference(*ptask); | ||
254 | if (!task) | ||
255 | return NULL; | ||
256 | |||
257 | probe_kernel_address(&task->sighand, sighand); | ||
258 | |||
259 | /* | ||
260 | * Pairs with atomic_dec_and_test() in put_task_struct(). If this task | ||
261 | * was already freed we can not miss the preceding update of this | ||
262 | * pointer. | ||
263 | */ | ||
264 | smp_rmb(); | ||
265 | if (unlikely(task != READ_ONCE(*ptask))) | ||
266 | goto retry; | ||
267 | |||
268 | /* | ||
269 | * We've re-checked that "task == *ptask", now we have two different | ||
270 | * cases: | ||
271 | * | ||
272 | * 1. This is actually the same task/task_struct. In this case | ||
273 | * sighand != NULL tells us it is still alive. | ||
274 | * | ||
275 | * 2. This is another task which got the same memory for task_struct. | ||
276 | * We can't know this of course, and we can not trust | ||
277 | * sighand != NULL. | ||
278 | * | ||
279 | * In this case we actually return a random value, but this is | ||
280 | * correct. | ||
281 | * | ||
282 | * If we return NULL - we can pretend that we actually noticed that | ||
283 | * *ptask was updated when the previous task has exited. Or pretend | ||
284 | * that probe_slab_address(&sighand) reads NULL. | ||
285 | * | ||
286 | * If we return the new task (because sighand is not NULL for any | ||
287 | * reason) - this is fine too. This (new) task can't go away before | ||
288 | * another gp pass. | ||
289 | * | ||
290 | * And note: We could even eliminate the false positive if re-read | ||
291 | * task->sighand once again to avoid the falsely NULL. But this case | ||
292 | * is very unlikely so we don't care. | ||
293 | */ | ||
294 | if (!sighand) | ||
295 | return NULL; | ||
296 | |||
297 | return task; | ||
298 | } | ||
299 | |||
300 | void rcuwait_wake_up(struct rcuwait *w) | 237 | void rcuwait_wake_up(struct rcuwait *w) |
301 | { | 238 | { |
302 | struct task_struct *task; | 239 | struct task_struct *task; |
@@ -316,10 +253,6 @@ void rcuwait_wake_up(struct rcuwait *w) | |||
316 | */ | 253 | */ |
317 | smp_mb(); /* (B) */ | 254 | smp_mb(); /* (B) */ |
318 | 255 | ||
319 | /* | ||
320 | * Avoid using task_rcu_dereference() magic as long as we are careful, | ||
321 | * see comment in rcuwait_wait_event() regarding ->exit_state. | ||
322 | */ | ||
323 | task = rcu_dereference(w->task); | 256 | task = rcu_dereference(w->task); |
324 | if (task) | 257 | if (task) |
325 | wake_up_process(task); | 258 | wake_up_process(task); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3101c662426d..5bc23996ffae 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1602,7 +1602,7 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1602 | return; | 1602 | return; |
1603 | 1603 | ||
1604 | rcu_read_lock(); | 1604 | rcu_read_lock(); |
1605 | cur = task_rcu_dereference(&dst_rq->curr); | 1605 | cur = rcu_dereference(dst_rq->curr); |
1606 | if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur))) | 1606 | if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur))) |
1607 | cur = NULL; | 1607 | cur = NULL; |
1608 | 1608 | ||
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index aa8d75804108..b14250a11608 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c | |||
@@ -71,7 +71,7 @@ static int membarrier_global_expedited(void) | |||
71 | continue; | 71 | continue; |
72 | 72 | ||
73 | rcu_read_lock(); | 73 | rcu_read_lock(); |
74 | p = task_rcu_dereference(&cpu_rq(cpu)->curr); | 74 | p = rcu_dereference(cpu_rq(cpu)->curr); |
75 | if (p && p->mm && (atomic_read(&p->mm->membarrier_state) & | 75 | if (p && p->mm && (atomic_read(&p->mm->membarrier_state) & |
76 | MEMBARRIER_STATE_GLOBAL_EXPEDITED)) { | 76 | MEMBARRIER_STATE_GLOBAL_EXPEDITED)) { |
77 | if (!fallback) | 77 | if (!fallback) |
@@ -150,7 +150,7 @@ static int membarrier_private_expedited(int flags) | |||
150 | if (cpu == raw_smp_processor_id()) | 150 | if (cpu == raw_smp_processor_id()) |
151 | continue; | 151 | continue; |
152 | rcu_read_lock(); | 152 | rcu_read_lock(); |
153 | p = task_rcu_dereference(&cpu_rq(cpu)->curr); | 153 | p = rcu_dereference(cpu_rq(cpu)->curr); |
154 | if (p && p->mm == current->mm) { | 154 | if (p && p->mm == current->mm) { |
155 | if (!fallback) | 155 | if (!fallback) |
156 | __cpumask_set_cpu(cpu, tmpmask); | 156 | __cpumask_set_cpu(cpu, tmpmask); |