diff options
| author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2014-08-04 09:10:23 -0400 |
|---|---|---|
| committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2014-09-07 19:27:22 -0400 |
| commit | 3f95aa81d265223fdb13ea2b59883766a05adbdf (patch) | |
| tree | 5b6d2c42aaf8b20397bd09c0ac31738618f57046 /kernel | |
| parent | 53c6d4edf874d3cbc031a53738c6cba9277faea5 (diff) | |
rcu: Make TASKS_RCU handle tasks that are almost done exiting
Once a task has passed exit_notify() in the do_exit() code path, it
is no longer on the task lists, and is therefore no longer visible
to rcu_tasks_kthread(). This means that an almost-exited task might
be preempted while within a trampoline, and this task won't be waited
on by rcu_tasks_kthread(). This commit fixes this bug by adding an
srcu_struct. An exiting task does srcu_read_lock() just before calling
exit_notify(), and does the corresponding srcu_read_unlock() after
doing the final preempt_disable(). This means that rcu_tasks_kthread()
can do synchronize_srcu() to wait for all mostly-exited tasks to reach
their final preempt_disable() region, and then use synchronize_sched()
to wait for those tasks to finish exiting.
Reported-by: Oleg Nesterov <oleg@redhat.com>
Suggested-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/exit.c | 3 | ||||
| -rw-r--r-- | kernel/rcu/update.c | 21 |
2 files changed, 24 insertions, 0 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 32c58f7433a3..d13f2eec4bb8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -667,6 +667,7 @@ void do_exit(long code) | |||
| 667 | { | 667 | { |
| 668 | struct task_struct *tsk = current; | 668 | struct task_struct *tsk = current; |
| 669 | int group_dead; | 669 | int group_dead; |
| 670 | TASKS_RCU(int tasks_rcu_i); | ||
| 670 | 671 | ||
| 671 | profile_task_exit(tsk); | 672 | profile_task_exit(tsk); |
| 672 | 673 | ||
| @@ -775,6 +776,7 @@ void do_exit(long code) | |||
| 775 | */ | 776 | */ |
| 776 | flush_ptrace_hw_breakpoint(tsk); | 777 | flush_ptrace_hw_breakpoint(tsk); |
| 777 | 778 | ||
| 779 | TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu)); | ||
| 778 | exit_notify(tsk, group_dead); | 780 | exit_notify(tsk, group_dead); |
| 779 | proc_exit_connector(tsk); | 781 | proc_exit_connector(tsk); |
| 780 | #ifdef CONFIG_NUMA | 782 | #ifdef CONFIG_NUMA |
| @@ -814,6 +816,7 @@ void do_exit(long code) | |||
| 814 | if (tsk->nr_dirtied) | 816 | if (tsk->nr_dirtied) |
| 815 | __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); | 817 | __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); |
| 816 | exit_rcu(); | 818 | exit_rcu(); |
| 819 | TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i)); | ||
| 817 | 820 | ||
| 818 | /* | 821 | /* |
| 819 | * The setting of TASK_RUNNING by try_to_wake_up() may be delayed | 822 | * The setting of TASK_RUNNING by try_to_wake_up() may be delayed |
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 5fd1ddbfcc55..403fc4ae539e 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c | |||
| @@ -367,6 +367,13 @@ static struct rcu_head *rcu_tasks_cbs_head; | |||
| 367 | static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; | 367 | static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; |
| 368 | static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); | 368 | static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); |
| 369 | 369 | ||
| 370 | /* Track exiting tasks in order to allow them to be waited for. */ | ||
| 371 | DEFINE_SRCU(tasks_rcu_exit_srcu); | ||
| 372 | |||
| 373 | /* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ | ||
| 374 | static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 3; | ||
| 375 | module_param(rcu_task_stall_timeout, int, 0644); | ||
| 376 | |||
| 370 | /* Post an RCU-tasks callback. */ | 377 | /* Post an RCU-tasks callback. */ |
| 371 | void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) | 378 | void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) |
| 372 | { | 379 | { |
| @@ -518,6 +525,15 @@ static int __noreturn rcu_tasks_kthread(void *arg) | |||
| 518 | rcu_read_unlock(); | 525 | rcu_read_unlock(); |
| 519 | 526 | ||
| 520 | /* | 527 | /* |
| 528 | * Wait for tasks that are in the process of exiting. | ||
| 529 | * This does only part of the job, ensuring that all | ||
| 530 | * tasks that were previously exiting reach the point | ||
| 531 | * where they have disabled preemption, allowing the | ||
| 532 | * later synchronize_sched() to finish the job. | ||
| 533 | */ | ||
| 534 | synchronize_srcu(&tasks_rcu_exit_srcu); | ||
| 535 | |||
| 536 | /* | ||
| 521 | * Each pass through the following loop scans the list | 537 | * Each pass through the following loop scans the list |
| 522 | * of holdout tasks, removing any that are no longer | 538 | * of holdout tasks, removing any that are no longer |
| 523 | * holdouts. When the list is empty, we are done. | 539 | * holdouts. When the list is empty, we are done. |
| @@ -546,6 +562,11 @@ static int __noreturn rcu_tasks_kthread(void *arg) | |||
| 546 | * ->rcu_tasks_holdout accesses to be within the grace | 562 | * ->rcu_tasks_holdout accesses to be within the grace |
| 547 | * period, avoiding the need for memory barriers for | 563 | * period, avoiding the need for memory barriers for |
| 548 | * ->rcu_tasks_holdout accesses. | 564 | * ->rcu_tasks_holdout accesses. |
| 565 | * | ||
| 566 | * In addition, this synchronize_sched() waits for exiting | ||
| 567 | * tasks to complete their final preempt_disable() region | ||
| 568 | * of execution, cleaning up after the synchronize_srcu() | ||
| 569 | * above. | ||
| 549 | */ | 570 | */ |
| 550 | synchronize_sched(); | 571 | synchronize_sched(); |
| 551 | 572 | ||
