aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2014-12-10 18:55:28 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-10 20:41:18 -0500
commita53b831549141aa060a8b54b76e3a42870d74cc0 (patch)
tree51296a9922f1658a7f4a0a2498b1d9ce9d92bb2b
parent24c037ebf5723d4d9ab0996433cee4f96c292a4d (diff)
exit: pidns: fix/update the comments in zap_pid_ns_processes()
The comments in zap_pid_ns_processes() are not clear, we need to explain how this code actually works. 1. "Ignore SIGCHLD" looks like optimization but it is not, we also need this for correctness. 2. The comment above sys_wait4() could tell more. EXIT_ZOMBIE child is only possible if it has exited before we ignored SIGCHLD. Or if it is traced from the parent namespace, but in this case it will be reaped by debugger after detach, sys_wait4() acts as a synchronization point. 3. The comment about TASK_DEAD (EXIT_DEAD in fact) children is outdated. Contrary to what it says we do not need to make sure they all go away after 0a01f2cc390e "pidns: Make the pidns proc mount/umount logic obvious". At the same time, we do need to wait for nr_hashed==init_pids, but the reasons are quite different and not obvious: setns(). Signed-off-by: Oleg Nesterov <oleg@redhat.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Aaron Tomlin <atomlin@redhat.com> Cc: Pavel Emelyanov <xemul@parallels.com> Cc: Serge Hallyn <serge.hallyn@ubuntu.com> Cc: Sterling Alexander <stalexan@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--kernel/pid_namespace.c28
1 files changed, 24 insertions, 4 deletions
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index db95d8eb761b..bc6d6a89b6e6 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -190,7 +190,11 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
190 /* Don't allow any more processes into the pid namespace */ 190 /* Don't allow any more processes into the pid namespace */
191 disable_pid_allocation(pid_ns); 191 disable_pid_allocation(pid_ns);
192 192
193 /* Ignore SIGCHLD causing any terminated children to autoreap */ 193 /*
194 * Ignore SIGCHLD causing any terminated children to autoreap.
195 * This speeds up the namespace shutdown, plus see the comment
196 * below.
197 */
194 spin_lock_irq(&me->sighand->siglock); 198 spin_lock_irq(&me->sighand->siglock);
195 me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; 199 me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
196 spin_unlock_irq(&me->sighand->siglock); 200 spin_unlock_irq(&me->sighand->siglock);
@@ -223,15 +227,31 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
223 } 227 }
224 read_unlock(&tasklist_lock); 228 read_unlock(&tasklist_lock);
225 229
226 /* Firstly reap the EXIT_ZOMBIE children we may have. */ 230 /*
231 * Reap the EXIT_ZOMBIE children we had before we ignored SIGCHLD.
232 * sys_wait4() will also block until our children traced from the
233 * parent namespace are detached and become EXIT_DEAD.
234 */
227 do { 235 do {
228 clear_thread_flag(TIF_SIGPENDING); 236 clear_thread_flag(TIF_SIGPENDING);
229 rc = sys_wait4(-1, NULL, __WALL, NULL); 237 rc = sys_wait4(-1, NULL, __WALL, NULL);
230 } while (rc != -ECHILD); 238 } while (rc != -ECHILD);
231 239
232 /* 240 /*
233 * sys_wait4() above can't reap the TASK_DEAD children. 241 * sys_wait4() above can't reap the EXIT_DEAD children but we do not
234 * Make sure they all go away, see free_pid(). 242 * really care, we could reparent them to the global init. We could
243 * exit and reap ->child_reaper even if it is not the last thread in
244 * this pid_ns, free_pid(nr_hashed == 0) calls proc_cleanup_work(),
245 * pid_ns can not go away until proc_kill_sb() drops the reference.
246 *
247 * But this ns can also have other tasks injected by setns()+fork().
248 * Again, ignoring the user visible semantics we do not really need
249 * to wait until they are all reaped, but they can be reparented to
250 * us and thus we need to ensure that pid->child_reaper stays valid
251 * until they all go away. See free_pid()->wake_up_process().
252 *
253 * We rely on ignored SIGCHLD, an injected zombie must be autoreaped
254 * if reparented.
235 */ 255 */
236 for (;;) { 256 for (;;) {
237 set_current_state(TASK_UNINTERRUPTIBLE); 257 set_current_state(TASK_UNINTERRUPTIBLE);