aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2014-04-07 18:38:42 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 19:36:05 -0400
commitabd50b39e783e1b6c75c7534c37f1eb2d94a89cd (patch)
treed7ff165c7bf97d54d3ff8bd1f925194a12784c46
parentdfccbb5e49a621c1b21a62527d61fc4305617aca (diff)
wait: introduce EXIT_TRACE to avoid the racy EXIT_DEAD->EXIT_ZOMBIE transition
wait_task_zombie() first does EXIT_ZOMBIE->EXIT_DEAD transition and drops tasklist_lock. If this task is not the natural child and it is traced, we change its state back to EXIT_ZOMBIE for ->real_parent. The last transition is racy, this is even documented in 50b8d257486a "ptrace: partially fix the do_wait(WEXITED) vs EXIT_DEAD->EXIT_ZOMBIE race". wait_consider_task() tries to detect this transition and clear ->notask_error but we can't rely on ptrace_reparented(), debugger can exit and do ptrace_unlink() before its sub-thread sets EXIT_ZOMBIE. And there is another problem which were missed before: this transition can also race with reparent_leader() which doesn't reset >exit_signal if EXIT_DEAD, assuming that this task must be reaped by someone else. So the tracee can be re-parented with ->exit_signal != SIGCHLD, and if /sbin/init doesn't use __WALL it becomes unreapable. This was fixed by the previous commit, but it was the temporary hack. 1. Add the new exit_state, EXIT_TRACE. It means that the task is the traced zombie, debugger is going to detach and notify its natural parent. This new state is actually EXIT_ZOMBIE | EXIT_DEAD. This way we can avoid the changes in proc/kgdb code, get_task_state() still reports "X (dead)" in this case. Note: with or without this change userspace can see Z -> X -> Z transition. Not really bad, but probably makes sense to fix. 2. Change wait_task_zombie() to use EXIT_TRACE instead of EXIT_DEAD if we need to notify the ->real_parent. 3. Revert the previous hack in reparent_leader(), now that EXIT_DEAD is always the final state we can safely ignore such a task. 4. Change wait_consider_task() to check EXIT_TRACE separately and kill the racy and no longer needed ptrace_reparented() case. If ptrace == T an EXIT_TRACE thread should be simply ignored, the owner of this state is going to ptrace_unlink() this task. We can pretend that it was already removed from ->ptraced list. Otherwise we should skip this thread too but clear ->notask_error, we must be the natural parent and debugger is going to untrace and notify us. IOW, this doesn't differ from "EXIT_ZOMBIE && p->ptrace" even if the task was already untraced. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Reported-by: Jan Kratochvil <jan.kratochvil@redhat.com> Reported-by: Michal Schmidt <mschmidt@redhat.com> Tested-by: Michal Schmidt <mschmidt@redhat.com> Cc: Al Viro <viro@ZenIV.linux.org.uk> Cc: Lennart Poettering <lpoetter@redhat.com> Cc: Roland McGrath <roland@hack.frob.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/exit.c50
2 files changed, 22 insertions, 29 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f8497059f88c..7781de5e5e7b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -212,6 +212,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
212/* in tsk->exit_state */ 212/* in tsk->exit_state */
213#define EXIT_ZOMBIE 16 213#define EXIT_ZOMBIE 16
214#define EXIT_DEAD 32 214#define EXIT_DEAD 32
215#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
215/* in tsk->state again */ 216/* in tsk->state again */
216#define TASK_DEAD 64 217#define TASK_DEAD 64
217#define TASK_WAKEKILL 128 218#define TASK_WAKEKILL 128
diff --git a/kernel/exit.c b/kernel/exit.c
index e354cbb13a9b..022a0ff17318 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -560,6 +560,9 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
560 struct list_head *dead) 560 struct list_head *dead)
561{ 561{
562 list_move_tail(&p->sibling, &p->real_parent->children); 562 list_move_tail(&p->sibling, &p->real_parent->children);
563
564 if (p->exit_state == EXIT_DEAD)
565 return;
563 /* 566 /*
564 * If this is a threaded reparent there is no need to 567 * If this is a threaded reparent there is no need to
565 * notify anyone anything has happened. 568 * notify anyone anything has happened.
@@ -567,19 +570,9 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
567 if (same_thread_group(p->real_parent, father)) 570 if (same_thread_group(p->real_parent, father))
568 return; 571 return;
569 572
570 /* 573 /* We don't want people slaying init. */
571 * We don't want people slaying init.
572 *
573 * Note: we do this even if it is EXIT_DEAD, wait_task_zombie()
574 * can change ->exit_state to EXIT_ZOMBIE. If this is the final
575 * state, do_notify_parent() was already called and ->exit_signal
576 * doesn't matter.
577 */
578 p->exit_signal = SIGCHLD; 574 p->exit_signal = SIGCHLD;
579 575
580 if (p->exit_state == EXIT_DEAD)
581 return;
582
583 /* If it has exited notify the new parent about this child's death. */ 576 /* If it has exited notify the new parent about this child's death. */
584 if (!p->ptrace && 577 if (!p->ptrace &&
585 p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { 578 p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
@@ -1043,17 +1036,13 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1043 return wait_noreap_copyout(wo, p, pid, uid, why, status); 1036 return wait_noreap_copyout(wo, p, pid, uid, why, status);
1044 } 1037 }
1045 1038
1039 traced = ptrace_reparented(p);
1046 /* 1040 /*
1047 * Try to move the task's state to DEAD 1041 * Move the task's state to DEAD/TRACE, only one thread can do this.
1048 * only one thread is allowed to do this:
1049 */ 1042 */
1050 state = xchg(&p->exit_state, EXIT_DEAD); 1043 state = traced ? EXIT_TRACE : EXIT_DEAD;
1051 if (state != EXIT_ZOMBIE) { 1044 if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
1052 BUG_ON(state != EXIT_DEAD);
1053 return 0; 1045 return 0;
1054 }
1055
1056 traced = ptrace_reparented(p);
1057 /* 1046 /*
1058 * It can be ptraced but not reparented, check 1047 * It can be ptraced but not reparented, check
1059 * thread_group_leader() to filter out sub-threads. 1048 * thread_group_leader() to filter out sub-threads.
@@ -1114,7 +1103,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1114 1103
1115 /* 1104 /*
1116 * Now we are sure this task is interesting, and no other 1105 * Now we are sure this task is interesting, and no other
1117 * thread can reap it because we set its state to EXIT_DEAD. 1106 * thread can reap it because we its state == DEAD/TRACE.
1118 */ 1107 */
1119 read_unlock(&tasklist_lock); 1108 read_unlock(&tasklist_lock);
1120 1109
@@ -1159,14 +1148,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1159 * If this is not a sub-thread, notify the parent. 1148 * If this is not a sub-thread, notify the parent.
1160 * If parent wants a zombie, don't release it now. 1149 * If parent wants a zombie, don't release it now.
1161 */ 1150 */
1151 state = EXIT_DEAD;
1162 if (thread_group_leader(p) && 1152 if (thread_group_leader(p) &&
1163 !do_notify_parent(p, p->exit_signal)) { 1153 !do_notify_parent(p, p->exit_signal))
1164 p->exit_state = EXIT_ZOMBIE; 1154 state = EXIT_ZOMBIE;
1165 p = NULL; 1155 p->exit_state = state;
1166 }
1167 write_unlock_irq(&tasklist_lock); 1156 write_unlock_irq(&tasklist_lock);
1168 } 1157 }
1169 if (p != NULL) 1158 if (state == EXIT_DEAD)
1170 release_task(p); 1159 release_task(p);
1171 1160
1172 return retval; 1161 return retval;
@@ -1362,12 +1351,15 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
1362 } 1351 }
1363 1352
1364 /* dead body doesn't have much to contribute */ 1353 /* dead body doesn't have much to contribute */
1365 if (unlikely(p->exit_state == EXIT_DEAD)) { 1354 if (unlikely(p->exit_state == EXIT_DEAD))
1355 return 0;
1356
1357 if (unlikely(p->exit_state == EXIT_TRACE)) {
1366 /* 1358 /*
1367 * But do not ignore this task until the tracer does 1359 * ptrace == 0 means we are the natural parent. In this case
1368 * wait_task_zombie()->do_notify_parent(). 1360 * we should clear notask_error, debugger will notify us.
1369 */ 1361 */
1370 if (likely(!ptrace) && unlikely(ptrace_reparented(p))) 1362 if (likely(!ptrace))
1371 wo->notask_error = 0; 1363 wo->notask_error = 0;
1372 return 0; 1364 return 0;
1373 } 1365 }