aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/exit.c
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@tv-sign.ru>2008-09-02 17:35:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-09-02 22:21:38 -0400
commit950bbabb5a804690a0201190de5c22837f72f83f (patch)
tree0d198ac02244138936acdf201c80aa4cd2da0bbc /kernel/exit.c
parentadd0d4dfd660e9e4fd0af3eac3cad23583c9558f (diff)
pid_ns: (BUG 11391) change ->child_reaper when init->group_leader exits
We don't change pid_ns->child_reaper when the main thread of the subnamespace init exits. As Robert Rex <robert.rex@exasol.com> pointed out this is wrong. Yes, the re-parenting itself works correctly, but if the reparented task exits it needs ->parent->nsproxy->pid_ns in do_notify_parent(), and if the main thread is zombie its ->nsproxy was already cleared by exit_task_namespaces(). Introduce the new function, find_new_reaper(), which finds the new ->parent for the re-parenting and changes ->child_reaper if needed. Kill the now unneeded exit_child_reaper(). Also move the changing of ->child_reaper from zap_pid_ns_processes() to find_new_reaper(), this consolidates the games with ->child_reaper and makes it stable under tasklist_lock. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=11391 Reported-by: Robert Rex <robert.rex@exasol.com> Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru> Acked-by: Serge Hallyn <serue@us.ibm.com> Acked-by: Pavel Emelyanov <xemul@openvz.org> Acked-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/exit.c')
-rw-r--r--kernel/exit.c78
1 files changed, 34 insertions, 44 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 75c647387639..25ed2ad986df 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -831,26 +831,50 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father)
831 * the child reaper process (ie "init") in our pid 831 * the child reaper process (ie "init") in our pid
832 * space. 832 * space.
833 */ 833 */
834static struct task_struct *find_new_reaper(struct task_struct *father)
835{
836 struct pid_namespace *pid_ns = task_active_pid_ns(father);
837 struct task_struct *thread;
838
839 thread = father;
840 while_each_thread(father, thread) {
841 if (thread->flags & PF_EXITING)
842 continue;
843 if (unlikely(pid_ns->child_reaper == father))
844 pid_ns->child_reaper = thread;
845 return thread;
846 }
847
848 if (unlikely(pid_ns->child_reaper == father)) {
849 write_unlock_irq(&tasklist_lock);
850 if (unlikely(pid_ns == &init_pid_ns))
851 panic("Attempted to kill init!");
852
853 zap_pid_ns_processes(pid_ns);
854 write_lock_irq(&tasklist_lock);
855 /*
856 * We can not clear ->child_reaper or leave it alone.
857 * There may by stealth EXIT_DEAD tasks on ->children,
858 * forget_original_parent() must move them somewhere.
859 */
860 pid_ns->child_reaper = init_pid_ns.child_reaper;
861 }
862
863 return pid_ns->child_reaper;
864}
865
834static void forget_original_parent(struct task_struct *father) 866static void forget_original_parent(struct task_struct *father)
835{ 867{
836 struct task_struct *p, *n, *reaper = father; 868 struct task_struct *p, *n, *reaper;
837 LIST_HEAD(ptrace_dead); 869 LIST_HEAD(ptrace_dead);
838 870
839 write_lock_irq(&tasklist_lock); 871 write_lock_irq(&tasklist_lock);
840 872 reaper = find_new_reaper(father);
841 /* 873 /*
842 * First clean up ptrace if we were using it. 874 * First clean up ptrace if we were using it.
843 */ 875 */
844 ptrace_exit(father, &ptrace_dead); 876 ptrace_exit(father, &ptrace_dead);
845 877
846 do {
847 reaper = next_thread(reaper);
848 if (reaper == father) {
849 reaper = task_child_reaper(father);
850 break;
851 }
852 } while (reaper->flags & PF_EXITING);
853
854 list_for_each_entry_safe(p, n, &father->children, sibling) { 878 list_for_each_entry_safe(p, n, &father->children, sibling) {
855 p->real_parent = reaper; 879 p->real_parent = reaper;
856 if (p->parent == father) { 880 if (p->parent == father) {
@@ -959,39 +983,6 @@ static void check_stack_usage(void)
959static inline void check_stack_usage(void) {} 983static inline void check_stack_usage(void) {}
960#endif 984#endif
961 985
962static inline void exit_child_reaper(struct task_struct *tsk)
963{
964 if (likely(tsk->group_leader != task_child_reaper(tsk)))
965 return;
966
967 if (tsk->nsproxy->pid_ns == &init_pid_ns)
968 panic("Attempted to kill init!");
969
970 /*
971 * @tsk is the last thread in the 'cgroup-init' and is exiting.
972 * Terminate all remaining processes in the namespace and reap them
973 * before exiting @tsk.
974 *
975 * Note that @tsk (last thread of cgroup-init) may not necessarily
976 * be the child-reaper (i.e main thread of cgroup-init) of the
977 * namespace i.e the child_reaper may have already exited.
978 *
979 * Even after a child_reaper exits, we let it inherit orphaned children,
980 * because, pid_ns->child_reaper remains valid as long as there is
981 * at least one living sub-thread in the cgroup init.
982
983 * This living sub-thread of the cgroup-init will be notified when
984 * a child inherited by the 'child-reaper' exits (do_notify_parent()
985 * uses __group_send_sig_info()). Further, when reaping child processes,
986 * do_wait() iterates over children of all living sub threads.
987
988 * i.e even though 'child_reaper' thread is listed as the parent of the
989 * orphaned children, any living sub-thread in the cgroup-init can
990 * perform the role of the child_reaper.
991 */
992 zap_pid_ns_processes(tsk->nsproxy->pid_ns);
993}
994
995NORET_TYPE void do_exit(long code) 986NORET_TYPE void do_exit(long code)
996{ 987{
997 struct task_struct *tsk = current; 988 struct task_struct *tsk = current;
@@ -1051,7 +1042,6 @@ NORET_TYPE void do_exit(long code)
1051 } 1042 }
1052 group_dead = atomic_dec_and_test(&tsk->signal->live); 1043 group_dead = atomic_dec_and_test(&tsk->signal->live);
1053 if (group_dead) { 1044 if (group_dead) {
1054 exit_child_reaper(tsk);
1055 hrtimer_cancel(&tsk->signal->real_timer); 1045 hrtimer_cancel(&tsk->signal->real_timer);
1056 exit_itimers(tsk->signal); 1046 exit_itimers(tsk->signal);
1057 } 1047 }