diff options
author | Oleg Nesterov <oleg@redhat.com> | 2014-04-07 18:38:42 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-07 19:36:05 -0400 |
commit | abd50b39e783e1b6c75c7534c37f1eb2d94a89cd (patch) | |
tree | d7ff165c7bf97d54d3ff8bd1f925194a12784c46 | |
parent | dfccbb5e49a621c1b21a62527d61fc4305617aca (diff) |
wait: introduce EXIT_TRACE to avoid the racy EXIT_DEAD->EXIT_ZOMBIE transition
wait_task_zombie() first does EXIT_ZOMBIE->EXIT_DEAD transition and
drops tasklist_lock. If this task is not the natural child and it is
traced, we change its state back to EXIT_ZOMBIE for ->real_parent.
The last transition is racy, this is even documented in 50b8d257486a
"ptrace: partially fix the do_wait(WEXITED) vs EXIT_DEAD->EXIT_ZOMBIE
race". wait_consider_task() tries to detect this transition and clear
->notask_error but we can't rely on ptrace_reparented(), debugger can
exit and do ptrace_unlink() before its sub-thread sets EXIT_ZOMBIE.
And there is another problem which were missed before: this transition
can also race with reparent_leader() which doesn't reset >exit_signal if
EXIT_DEAD, assuming that this task must be reaped by someone else. So
the tracee can be re-parented with ->exit_signal != SIGCHLD, and if
/sbin/init doesn't use __WALL it becomes unreapable. This was fixed by
the previous commit, but it was the temporary hack.
1. Add the new exit_state, EXIT_TRACE. It means that the task is the
traced zombie, debugger is going to detach and notify its natural
parent.
This new state is actually EXIT_ZOMBIE | EXIT_DEAD. This way we
can avoid the changes in proc/kgdb code, get_task_state() still
reports "X (dead)" in this case.
Note: with or without this change userspace can see Z -> X -> Z
transition. Not really bad, but probably makes sense to fix.
2. Change wait_task_zombie() to use EXIT_TRACE instead of EXIT_DEAD
if we need to notify the ->real_parent.
3. Revert the previous hack in reparent_leader(), now that EXIT_DEAD
is always the final state we can safely ignore such a task.
4. Change wait_consider_task() to check EXIT_TRACE separately and kill
the racy and no longer needed ptrace_reparented() case.
If ptrace == T an EXIT_TRACE thread should be simply ignored, the
owner of this state is going to ptrace_unlink() this task. We can
pretend that it was already removed from ->ptraced list.
Otherwise we should skip this thread too but clear ->notask_error,
we must be the natural parent and debugger is going to untrace and
notify us. IOW, this doesn't differ from "EXIT_ZOMBIE && p->ptrace"
even if the task was already untraced.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Jan Kratochvil <jan.kratochvil@redhat.com>
Reported-by: Michal Schmidt <mschmidt@redhat.com>
Tested-by: Michal Schmidt <mschmidt@redhat.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Lennart Poettering <lpoetter@redhat.com>
Cc: Roland McGrath <roland@hack.frob.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | kernel/exit.c | 50 |
2 files changed, 22 insertions, 29 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index f8497059f88c..7781de5e5e7b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -212,6 +212,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); | |||
212 | /* in tsk->exit_state */ | 212 | /* in tsk->exit_state */ |
213 | #define EXIT_ZOMBIE 16 | 213 | #define EXIT_ZOMBIE 16 |
214 | #define EXIT_DEAD 32 | 214 | #define EXIT_DEAD 32 |
215 | #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) | ||
215 | /* in tsk->state again */ | 216 | /* in tsk->state again */ |
216 | #define TASK_DEAD 64 | 217 | #define TASK_DEAD 64 |
217 | #define TASK_WAKEKILL 128 | 218 | #define TASK_WAKEKILL 128 |
diff --git a/kernel/exit.c b/kernel/exit.c index e354cbb13a9b..022a0ff17318 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -560,6 +560,9 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, | |||
560 | struct list_head *dead) | 560 | struct list_head *dead) |
561 | { | 561 | { |
562 | list_move_tail(&p->sibling, &p->real_parent->children); | 562 | list_move_tail(&p->sibling, &p->real_parent->children); |
563 | |||
564 | if (p->exit_state == EXIT_DEAD) | ||
565 | return; | ||
563 | /* | 566 | /* |
564 | * If this is a threaded reparent there is no need to | 567 | * If this is a threaded reparent there is no need to |
565 | * notify anyone anything has happened. | 568 | * notify anyone anything has happened. |
@@ -567,19 +570,9 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, | |||
567 | if (same_thread_group(p->real_parent, father)) | 570 | if (same_thread_group(p->real_parent, father)) |
568 | return; | 571 | return; |
569 | 572 | ||
570 | /* | 573 | /* We don't want people slaying init. */ |
571 | * We don't want people slaying init. | ||
572 | * | ||
573 | * Note: we do this even if it is EXIT_DEAD, wait_task_zombie() | ||
574 | * can change ->exit_state to EXIT_ZOMBIE. If this is the final | ||
575 | * state, do_notify_parent() was already called and ->exit_signal | ||
576 | * doesn't matter. | ||
577 | */ | ||
578 | p->exit_signal = SIGCHLD; | 574 | p->exit_signal = SIGCHLD; |
579 | 575 | ||
580 | if (p->exit_state == EXIT_DEAD) | ||
581 | return; | ||
582 | |||
583 | /* If it has exited notify the new parent about this child's death. */ | 576 | /* If it has exited notify the new parent about this child's death. */ |
584 | if (!p->ptrace && | 577 | if (!p->ptrace && |
585 | p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { | 578 | p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { |
@@ -1043,17 +1036,13 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | |||
1043 | return wait_noreap_copyout(wo, p, pid, uid, why, status); | 1036 | return wait_noreap_copyout(wo, p, pid, uid, why, status); |
1044 | } | 1037 | } |
1045 | 1038 | ||
1039 | traced = ptrace_reparented(p); | ||
1046 | /* | 1040 | /* |
1047 | * Try to move the task's state to DEAD | 1041 | * Move the task's state to DEAD/TRACE, only one thread can do this. |
1048 | * only one thread is allowed to do this: | ||
1049 | */ | 1042 | */ |
1050 | state = xchg(&p->exit_state, EXIT_DEAD); | 1043 | state = traced ? EXIT_TRACE : EXIT_DEAD; |
1051 | if (state != EXIT_ZOMBIE) { | 1044 | if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE) |
1052 | BUG_ON(state != EXIT_DEAD); | ||
1053 | return 0; | 1045 | return 0; |
1054 | } | ||
1055 | |||
1056 | traced = ptrace_reparented(p); | ||
1057 | /* | 1046 | /* |
1058 | * It can be ptraced but not reparented, check | 1047 | * It can be ptraced but not reparented, check |
1059 | * thread_group_leader() to filter out sub-threads. | 1048 | * thread_group_leader() to filter out sub-threads. |
@@ -1114,7 +1103,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | |||
1114 | 1103 | ||
1115 | /* | 1104 | /* |
1116 | * Now we are sure this task is interesting, and no other | 1105 | * Now we are sure this task is interesting, and no other |
1117 | * thread can reap it because we set its state to EXIT_DEAD. | 1106 | * thread can reap it because we its state == DEAD/TRACE. |
1118 | */ | 1107 | */ |
1119 | read_unlock(&tasklist_lock); | 1108 | read_unlock(&tasklist_lock); |
1120 | 1109 | ||
@@ -1159,14 +1148,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | |||
1159 | * If this is not a sub-thread, notify the parent. | 1148 | * If this is not a sub-thread, notify the parent. |
1160 | * If parent wants a zombie, don't release it now. | 1149 | * If parent wants a zombie, don't release it now. |
1161 | */ | 1150 | */ |
1151 | state = EXIT_DEAD; | ||
1162 | if (thread_group_leader(p) && | 1152 | if (thread_group_leader(p) && |
1163 | !do_notify_parent(p, p->exit_signal)) { | 1153 | !do_notify_parent(p, p->exit_signal)) |
1164 | p->exit_state = EXIT_ZOMBIE; | 1154 | state = EXIT_ZOMBIE; |
1165 | p = NULL; | 1155 | p->exit_state = state; |
1166 | } | ||
1167 | write_unlock_irq(&tasklist_lock); | 1156 | write_unlock_irq(&tasklist_lock); |
1168 | } | 1157 | } |
1169 | if (p != NULL) | 1158 | if (state == EXIT_DEAD) |
1170 | release_task(p); | 1159 | release_task(p); |
1171 | 1160 | ||
1172 | return retval; | 1161 | return retval; |
@@ -1362,12 +1351,15 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, | |||
1362 | } | 1351 | } |
1363 | 1352 | ||
1364 | /* dead body doesn't have much to contribute */ | 1353 | /* dead body doesn't have much to contribute */ |
1365 | if (unlikely(p->exit_state == EXIT_DEAD)) { | 1354 | if (unlikely(p->exit_state == EXIT_DEAD)) |
1355 | return 0; | ||
1356 | |||
1357 | if (unlikely(p->exit_state == EXIT_TRACE)) { | ||
1366 | /* | 1358 | /* |
1367 | * But do not ignore this task until the tracer does | 1359 | * ptrace == 0 means we are the natural parent. In this case |
1368 | * wait_task_zombie()->do_notify_parent(). | 1360 | * we should clear notask_error, debugger will notify us. |
1369 | */ | 1361 | */ |
1370 | if (likely(!ptrace) && unlikely(ptrace_reparented(p))) | 1362 | if (likely(!ptrace)) |
1371 | wo->notask_error = 0; | 1363 | wo->notask_error = 0; |
1372 | return 0; | 1364 | return 0; |
1373 | } | 1365 | } |