diff options
author | Sukadev Bhattiprolu <sukadev@us.ibm.com> | 2007-10-19 02:40:13 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-19 14:53:40 -0400 |
commit | 3eb07c8c8adb6f0572baba844ba2d9e501654316 (patch) | |
tree | 5c3d527f6b003b316d41119320ebd5c589c8afd0 /kernel | |
parent | 0fbc26a6cfab9f377e82e28225f2c0c6b4661e5c (diff) |
pid namespaces: destroy pid namespace on init's death
Terminate all processes in a namespace when the reaper of the namespace is
exiting. We do this by walking the pidmap of the namespace and sending
SIGKILL to all processes.
Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/exit.c | 27 | ||||
-rw-r--r-- | kernel/pid.c | 38 |
2 files changed, 64 insertions, 1 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index d9e8e5ee9d7f..567909fd6be4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -879,7 +879,32 @@ static inline void exit_child_reaper(struct task_struct *tsk) | |||
879 | if (likely(tsk->group_leader != task_child_reaper(tsk))) | 879 | if (likely(tsk->group_leader != task_child_reaper(tsk))) |
880 | return; | 880 | return; |
881 | 881 | ||
882 | panic("Attempted to kill init!"); | 882 | if (tsk->nsproxy->pid_ns == &init_pid_ns) |
883 | panic("Attempted to kill init!"); | ||
884 | |||
885 | /* | ||
886 | * @tsk is the last thread in the 'cgroup-init' and is exiting. | ||
887 | * Terminate all remaining processes in the namespace and reap them | ||
888 | * before exiting @tsk. | ||
889 | * | ||
890 | * Note that @tsk (last thread of cgroup-init) may not necessarily | ||
891 | * be the child-reaper (i.e main thread of cgroup-init) of the | ||
892 | * namespace i.e the child_reaper may have already exited. | ||
893 | * | ||
894 | * Even after a child_reaper exits, we let it inherit orphaned children, | ||
895 | * because, pid_ns->child_reaper remains valid as long as there is | ||
896 | * at least one living sub-thread in the cgroup init. | ||
897 | |||
898 | * This living sub-thread of the cgroup-init will be notified when | ||
899 | * a child inherited by the 'child-reaper' exits (do_notify_parent() | ||
900 | * uses __group_send_sig_info()). Further, when reaping child processes, | ||
901 | * do_wait() iterates over children of all living sub threads. | ||
902 | |||
903 | * i.e even though 'child_reaper' thread is listed as the parent of the | ||
904 | * orphaned children, any living sub-thread in the cgroup-init can | ||
905 | * perform the role of the child_reaper. | ||
906 | */ | ||
907 | zap_pid_ns_processes(tsk->nsproxy->pid_ns); | ||
883 | } | 908 | } |
884 | 909 | ||
885 | fastcall NORET_TYPE void do_exit(long code) | 910 | fastcall NORET_TYPE void do_exit(long code) |
diff --git a/kernel/pid.c b/kernel/pid.c index d88b83eb703e..b3e6d7c41b97 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/hash.h> | 34 | #include <linux/hash.h> |
35 | #include <linux/pid_namespace.h> | 35 | #include <linux/pid_namespace.h> |
36 | #include <linux/init_task.h> | 36 | #include <linux/init_task.h> |
37 | #include <linux/syscalls.h> | ||
37 | 38 | ||
38 | #define pid_hashfn(nr, ns) \ | 39 | #define pid_hashfn(nr, ns) \ |
39 | hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) | 40 | hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) |
@@ -567,6 +568,43 @@ void free_pid_ns(struct kref *kref) | |||
567 | put_pid_ns(parent); | 568 | put_pid_ns(parent); |
568 | } | 569 | } |
569 | 570 | ||
571 | void zap_pid_ns_processes(struct pid_namespace *pid_ns) | ||
572 | { | ||
573 | int nr; | ||
574 | int rc; | ||
575 | |||
576 | /* | ||
577 | * The last thread in the cgroup-init thread group is terminating. | ||
578 | * Find remaining pid_ts in the namespace, signal and wait for them | ||
579 | * to exit. | ||
580 | * | ||
581 | * Note: This signals each threads in the namespace - even those that | ||
582 | * belong to the same thread group, To avoid this, we would have | ||
583 | * to walk the entire tasklist looking a processes in this | ||
584 | * namespace, but that could be unnecessarily expensive if the | ||
585 | * pid namespace has just a few processes. Or we need to | ||
586 | * maintain a tasklist for each pid namespace. | ||
587 | * | ||
588 | */ | ||
589 | read_lock(&tasklist_lock); | ||
590 | nr = next_pidmap(pid_ns, 1); | ||
591 | while (nr > 0) { | ||
592 | kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr); | ||
593 | nr = next_pidmap(pid_ns, nr); | ||
594 | } | ||
595 | read_unlock(&tasklist_lock); | ||
596 | |||
597 | do { | ||
598 | clear_thread_flag(TIF_SIGPENDING); | ||
599 | rc = sys_wait4(-1, NULL, __WALL, NULL); | ||
600 | } while (rc != -ECHILD); | ||
601 | |||
602 | |||
603 | /* Child reaper for the pid namespace is going away */ | ||
604 | pid_ns->child_reaper = NULL; | ||
605 | return; | ||
606 | } | ||
607 | |||
570 | /* | 608 | /* |
571 | * The pid hash table is scaled according to the amount of memory in the | 609 | * The pid hash table is scaled according to the amount of memory in the |
572 | * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or | 610 | * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or |