aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-08-04 09:10:23 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-09-07 19:27:22 -0400
commit3f95aa81d265223fdb13ea2b59883766a05adbdf (patch)
tree5b6d2c42aaf8b20397bd09c0ac31738618f57046 /kernel
parent53c6d4edf874d3cbc031a53738c6cba9277faea5 (diff)
rcu: Make TASKS_RCU handle tasks that are almost done exiting
Once a task has passed exit_notify() in the do_exit() code path, it is no longer on the task lists, and is therefore no longer visible to rcu_tasks_kthread(). This means that an almost-exited task might be preempted while within a trampoline, and this task won't be waited on by rcu_tasks_kthread(). This commit fixes this bug by adding an srcu_struct. An exiting task does srcu_read_lock() just before calling exit_notify(), and does the corresponding srcu_read_unlock() after doing the final preempt_disable(). This means that rcu_tasks_kthread() can do synchronize_srcu() to wait for all mostly-exited tasks to reach their final preempt_disable() region, and then use synchronize_sched() to wait for those tasks to finish exiting. Reported-by: Oleg Nesterov <oleg@redhat.com> Suggested-by: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/rcu/update.c21
2 files changed, 24 insertions, 0 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 32c58f7433a3..d13f2eec4bb8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -667,6 +667,7 @@ void do_exit(long code)
667{ 667{
668 struct task_struct *tsk = current; 668 struct task_struct *tsk = current;
669 int group_dead; 669 int group_dead;
670 TASKS_RCU(int tasks_rcu_i);
670 671
671 profile_task_exit(tsk); 672 profile_task_exit(tsk);
672 673
@@ -775,6 +776,7 @@ void do_exit(long code)
775 */ 776 */
776 flush_ptrace_hw_breakpoint(tsk); 777 flush_ptrace_hw_breakpoint(tsk);
777 778
779 TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
778 exit_notify(tsk, group_dead); 780 exit_notify(tsk, group_dead);
779 proc_exit_connector(tsk); 781 proc_exit_connector(tsk);
780#ifdef CONFIG_NUMA 782#ifdef CONFIG_NUMA
@@ -814,6 +816,7 @@ void do_exit(long code)
814 if (tsk->nr_dirtied) 816 if (tsk->nr_dirtied)
815 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); 817 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
816 exit_rcu(); 818 exit_rcu();
819 TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
817 820
818 /* 821 /*
819 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed 822 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 5fd1ddbfcc55..403fc4ae539e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -367,6 +367,13 @@ static struct rcu_head *rcu_tasks_cbs_head;
367static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; 367static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
368static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); 368static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
369 369
370/* Track exiting tasks in order to allow them to be waited for. */
371DEFINE_SRCU(tasks_rcu_exit_srcu);
372
373/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
374static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 3;
375module_param(rcu_task_stall_timeout, int, 0644);
376
370/* Post an RCU-tasks callback. */ 377/* Post an RCU-tasks callback. */
371void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) 378void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
372{ 379{
@@ -518,6 +525,15 @@ static int __noreturn rcu_tasks_kthread(void *arg)
518 rcu_read_unlock(); 525 rcu_read_unlock();
519 526
520 /* 527 /*
528 * Wait for tasks that are in the process of exiting.
529 * This does only part of the job, ensuring that all
530 * tasks that were previously exiting reach the point
531 * where they have disabled preemption, allowing the
532 * later synchronize_sched() to finish the job.
533 */
534 synchronize_srcu(&tasks_rcu_exit_srcu);
535
536 /*
521 * Each pass through the following loop scans the list 537 * Each pass through the following loop scans the list
522 * of holdout tasks, removing any that are no longer 538 * of holdout tasks, removing any that are no longer
523 * holdouts. When the list is empty, we are done. 539 * holdouts. When the list is empty, we are done.
@@ -546,6 +562,11 @@ static int __noreturn rcu_tasks_kthread(void *arg)
546 * ->rcu_tasks_holdout accesses to be within the grace 562 * ->rcu_tasks_holdout accesses to be within the grace
547 * period, avoiding the need for memory barriers for 563 * period, avoiding the need for memory barriers for
548 * ->rcu_tasks_holdout accesses. 564 * ->rcu_tasks_holdout accesses.
565 *
566 * In addition, this synchronize_sched() waits for exiting
567 * tasks to complete their final preempt_disable() region
568 * of execution, cleaning up after the synchronize_srcu()
569 * above.
549 */ 570 */
550 synchronize_sched(); 571 synchronize_sched();
551 572