aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSukadev Bhattiprolu <sukadev@us.ibm.com>2007-10-19 02:40:13 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-19 14:53:40 -0400
commit3eb07c8c8adb6f0572baba844ba2d9e501654316 (patch)
tree5c3d527f6b003b316d41119320ebd5c589c8afd0 /kernel
parent0fbc26a6cfab9f377e82e28225f2c0c6b4661e5c (diff)
pid namespaces: destroy pid namespace on init's death
Terminate all processes in a namespace when the reaper of the namespace is exiting. We do this by walking the pidmap of the namespace and sending SIGKILL to all processes. Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com> Acked-by: Pavel Emelyanov <xemul@openvz.org> Cc: Oleg Nesterov <oleg@tv-sign.ru> Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com> Cc: Paul Menage <menage@google.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c27
-rw-r--r--kernel/pid.c38
2 files changed, 64 insertions, 1 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index d9e8e5ee9d7f..567909fd6be4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -879,7 +879,32 @@ static inline void exit_child_reaper(struct task_struct *tsk)
879 if (likely(tsk->group_leader != task_child_reaper(tsk))) 879 if (likely(tsk->group_leader != task_child_reaper(tsk)))
880 return; 880 return;
881 881
882 panic("Attempted to kill init!"); 882 if (tsk->nsproxy->pid_ns == &init_pid_ns)
883 panic("Attempted to kill init!");
884
885 /*
886 * @tsk is the last thread in the 'cgroup-init' and is exiting.
887 * Terminate all remaining processes in the namespace and reap them
888 * before exiting @tsk.
889 *
890 * Note that @tsk (last thread of cgroup-init) may not necessarily
891 * be the child-reaper (i.e main thread of cgroup-init) of the
892 * namespace i.e the child_reaper may have already exited.
893 *
894 * Even after a child_reaper exits, we let it inherit orphaned children,
895 * because, pid_ns->child_reaper remains valid as long as there is
896 * at least one living sub-thread in the cgroup init.
897
898 * This living sub-thread of the cgroup-init will be notified when
899 * a child inherited by the 'child-reaper' exits (do_notify_parent()
900 * uses __group_send_sig_info()). Further, when reaping child processes,
901 * do_wait() iterates over children of all living sub threads.
902
903 * i.e even though 'child_reaper' thread is listed as the parent of the
904 * orphaned children, any living sub-thread in the cgroup-init can
905 * perform the role of the child_reaper.
906 */
907 zap_pid_ns_processes(tsk->nsproxy->pid_ns);
883} 908}
884 909
885fastcall NORET_TYPE void do_exit(long code) 910fastcall NORET_TYPE void do_exit(long code)
diff --git a/kernel/pid.c b/kernel/pid.c
index d88b83eb703e..b3e6d7c41b97 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -34,6 +34,7 @@
34#include <linux/hash.h> 34#include <linux/hash.h>
35#include <linux/pid_namespace.h> 35#include <linux/pid_namespace.h>
36#include <linux/init_task.h> 36#include <linux/init_task.h>
37#include <linux/syscalls.h>
37 38
38#define pid_hashfn(nr, ns) \ 39#define pid_hashfn(nr, ns) \
39 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) 40 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -567,6 +568,43 @@ void free_pid_ns(struct kref *kref)
567 put_pid_ns(parent); 568 put_pid_ns(parent);
568} 569}
569 570
571void zap_pid_ns_processes(struct pid_namespace *pid_ns)
572{
573 int nr;
574 int rc;
575
576 /*
577 * The last thread in the cgroup-init thread group is terminating.
578 * Find remaining pid_ts in the namespace, signal and wait for them
579 * to exit.
580 *
581 * Note: This signals each threads in the namespace - even those that
582 * belong to the same thread group, To avoid this, we would have
583 * to walk the entire tasklist looking a processes in this
584 * namespace, but that could be unnecessarily expensive if the
585 * pid namespace has just a few processes. Or we need to
586 * maintain a tasklist for each pid namespace.
587 *
588 */
589 read_lock(&tasklist_lock);
590 nr = next_pidmap(pid_ns, 1);
591 while (nr > 0) {
592 kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
593 nr = next_pidmap(pid_ns, nr);
594 }
595 read_unlock(&tasklist_lock);
596
597 do {
598 clear_thread_flag(TIF_SIGPENDING);
599 rc = sys_wait4(-1, NULL, __WALL, NULL);
600 } while (rc != -ECHILD);
601
602
603 /* Child reaper for the pid namespace is going away */
604 pid_ns->child_reaper = NULL;
605 return;
606}
607
570/* 608/*
571 * The pid hash table is scaled according to the amount of memory in the 609 * The pid hash table is scaled according to the amount of memory in the
572 * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or 610 * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or