1 files changed, 72 insertions, 113 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index ad933bb29ec7..85a83c831856 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -46,6 +46,7 @@
 #include <linux/resource.h>
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
+#include <linux/tracehook.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -111,27 +112,16 @@ static void __exit_signal(struct task_struct *tsk)
                 * We won't ever get here for the group leader, since it
                 * will have been the last reference on the signal_struct.
                 */
-                sig->utime = cputime_add(sig->utime, tsk->utime);
+                sig->utime = cputime_add(sig->utime, task_utime(tsk));
-                sig->stime = cputime_add(sig->stime, tsk->stime);
+                sig->stime = cputime_add(sig->stime, task_stime(tsk));
-                sig->gtime = cputime_add(sig->gtime, tsk->gtime);
+                sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
                sig->min_flt += tsk->min_flt;
                sig->maj_flt += tsk->maj_flt;
                sig->nvcsw += tsk->nvcsw;
                sig->nivcsw += tsk->nivcsw;
                sig->inblock += task_io_get_inblock(tsk);
                sig->oublock += task_io_get_oublock(tsk);
-#ifdef CONFIG_TASK_XACCT
+                task_io_accounting_add(&sig->ioac, &tsk->ioac);
-                sig->rchar += tsk->rchar;
-                sig->wchar += tsk->wchar;
-                sig->syscr += tsk->syscr;
-                sig->syscw += tsk->syscw;
-#endif /* CONFIG_TASK_XACCT */
-#ifdef CONFIG_TASK_IO_ACCOUNTING
-                sig->ioac.read_bytes += tsk->ioac.read_bytes;
-                sig->ioac.write_bytes += tsk->ioac.write_bytes;
-                sig->ioac.cancelled_write_bytes +=
-                                        tsk->ioac.cancelled_write_bytes;
-#endif /* CONFIG_TASK_IO_ACCOUNTING */
                sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
                sig = NULL; /* Marker for below. */
        }
@@ -162,27 +152,17 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
        put_task_struct(container_of(rhp, struct task_struct, rcu));
 }
-/*
- * Do final ptrace-related cleanup of a zombie being reaped.
- *
- * Called with write_lock(&tasklist_lock) held.
- */
-static void ptrace_release_task(struct task_struct *p)
-{
-        BUG_ON(!list_empty(&p->ptraced));
-        ptrace_unlink(p);
-        BUG_ON(!list_empty(&p->ptrace_entry));
-}
 void release_task(struct task_struct * p)
 {
        struct task_struct *leader;
        int zap_leader;
 repeat:
+        tracehook_prepare_release_task(p);
        atomic_dec(&p->user->processes);
        proc_flush_task(p);
        write_lock_irq(&tasklist_lock);
-        ptrace_release_task(p);
+        tracehook_finish_release_task(p);
        __exit_signal(p);
        /*
@@ -204,6 +184,13 @@ repeat:
                 * that case.
                 */
                zap_leader = task_detached(leader);
+                /*
+                 * This maintains the invariant that release_task()
+                 * only runs on a task in EXIT_DEAD, just for sanity.
+                 */
+                if (zap_leader)
+                        leader->exit_state = EXIT_DEAD;
        }
        write_unlock_irq(&tasklist_lock);
@@ -567,8 +554,6 @@ void put_fs_struct(struct fs_struct *fs)
        if (atomic_dec_and_test(&fs->count)) {
                path_put(&fs->root);
                path_put(&fs->pwd);
-                if (fs->altroot.dentry)
-                        path_put(&fs->altroot);
                kmem_cache_free(fs_cachep, fs);
        }
 }
@@ -598,8 +583,6 @@ mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
         * If there are other users of the mm and the owner (us) is exiting
         * we need to find a new owner to take on the responsibility.
         */
-        if (!mm)
-                return 0;
        if (atomic_read(&mm->mm_users) <= 1)
                return 0;
        if (mm->owner != p)
@@ -642,6 +625,16 @@ retry:
        } while_each_thread(g, c);
        read_unlock(&tasklist_lock);
+        /*
+         * We found no owner yet mm_users > 1: this implies that we are
+         * most likely racing with swapoff (try_to_unuse()) or /proc or
+         * ptrace or page migration (get_task_mm()).  Mark owner as NULL,
+         * so that subsystems can understand the callback and take action.
+         */
+        down_write(&mm->mmap_sem);
+        cgroup_mm_owner_callbacks(mm->owner, NULL);
+        mm->owner = NULL;
+        up_write(&mm->mmap_sem);
        return;
 assign_new_owner:
@@ -846,26 +839,50 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father)
 * the child reaper process (ie "init") in our pid
 * space.
 */
+static struct task_struct *find_new_reaper(struct task_struct *father)
+{
+        struct pid_namespace *pid_ns = task_active_pid_ns(father);
+        struct task_struct *thread;
+        thread = father;
+        while_each_thread(father, thread) {
+                if (thread->flags & PF_EXITING)
+                        continue;
+                if (unlikely(pid_ns->child_reaper == father))
+                        pid_ns->child_reaper = thread;
+                return thread;
+        }
+        if (unlikely(pid_ns->child_reaper == father)) {
+                write_unlock_irq(&tasklist_lock);
+                if (unlikely(pid_ns == &init_pid_ns))
+                        panic("Attempted to kill init!");
+                zap_pid_ns_processes(pid_ns);
+                write_lock_irq(&tasklist_lock);
+                /*
+                 * We can not clear ->child_reaper or leave it alone.
+                 * There may by stealth EXIT_DEAD tasks on ->children,
+                 * forget_original_parent() must move them somewhere.
+                 */
+                pid_ns->child_reaper = init_pid_ns.child_reaper;
+        }
+        return pid_ns->child_reaper;
+}
 static void forget_original_parent(struct task_struct *father)
 {
-        struct task_struct *p, *n, *reaper = father;
+        struct task_struct *p, *n, *reaper;
        LIST_HEAD(ptrace_dead);
        write_lock_irq(&tasklist_lock);
+        reaper = find_new_reaper(father);
        /*
         * First clean up ptrace if we were using it.
         */
        ptrace_exit(father, &ptrace_dead);
-        do {
-                reaper = next_thread(reaper);
-                if (reaper == father) {
-                        reaper = task_child_reaper(father);
-                        break;
-                }
-        } while (reaper->flags & PF_EXITING);
        list_for_each_entry_safe(p, n, &father->children, sibling) {
                p->real_parent = reaper;
                if (p->parent == father) {
@@ -887,7 +904,8 @@ static void forget_original_parent(struct task_struct *father)
 */
 static void exit_notify(struct task_struct *tsk, int group_dead)
 {
-        int state;
+        int signal;
+        void *cookie;
        /*
         * This does two things:
@@ -924,33 +942,24 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
            !capable(CAP_KILL))
                tsk->exit_signal = SIGCHLD;
-        /* If something other than our normal parent is ptracing us, then
+        signal = tracehook_notify_death(tsk, &cookie, group_dead);
-         * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
+        if (signal >= 0)
-         * only has special meaning to our real parent.
+                signal = do_notify_parent(tsk, signal);
-         */
-        if (!task_detached(tsk) && thread_group_empty(tsk)) {
-                int signal = ptrace_reparented(tsk) ?
-                                SIGCHLD : tsk->exit_signal;
-                do_notify_parent(tsk, signal);
-        } else if (tsk->ptrace) {
-                do_notify_parent(tsk, SIGCHLD);
-        }
-        state = EXIT_ZOMBIE;
+        tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE;
-        if (task_detached(tsk) && likely(!tsk->ptrace))
-                state = EXIT_DEAD;
-        tsk->exit_state = state;
        /* mt-exec, de_thread() is waiting for us */
        if (thread_group_leader(tsk) &&
-            tsk->signal->notify_count < 0 &&
+            tsk->signal->group_exit_task &&
-            tsk->signal->group_exit_task)
+            tsk->signal->notify_count < 0)
                wake_up_process(tsk->signal->group_exit_task);
        write_unlock_irq(&tasklist_lock);
+        tracehook_report_death(tsk, signal, cookie, group_dead);
        /* If the process is dead, release it - nobody will wait for it */
-        if (state == EXIT_DEAD)
+        if (signal == DEATH_REAP)
                release_task(tsk);
 }
@@ -982,39 +991,6 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
-static inline void exit_child_reaper(struct task_struct *tsk)
-{
-        if (likely(tsk->group_leader != task_child_reaper(tsk)))
-                return;
-        if (tsk->nsproxy->pid_ns == &init_pid_ns)
-                panic("Attempted to kill init!");
-        /*
-         * @tsk is the last thread in the 'cgroup-init' and is exiting.
-         * Terminate all remaining processes in the namespace and reap them
-         * before exiting @tsk.
-         *
-         * Note that @tsk (last thread of cgroup-init) may not necessarily
-         * be the child-reaper (i.e main thread of cgroup-init) of the
-         * namespace i.e the child_reaper may have already exited.
-         *
-         * Even after a child_reaper exits, we let it inherit orphaned children,
-         * because, pid_ns->child_reaper remains valid as long as there is
-         * at least one living sub-thread in the cgroup init.
-         * This living sub-thread of the cgroup-init will be notified when
-         * a child inherited by the 'child-reaper' exits (do_notify_parent()
-         * uses __group_send_sig_info()). Further, when reaping child processes,
-         * do_wait() iterates over children of all living sub threads.
-         * i.e even though 'child_reaper' thread is listed as the parent of the
-         * orphaned children, any living sub-thread in the cgroup-init can
-         * perform the role of the child_reaper.
-         */
-        zap_pid_ns_processes(tsk->nsproxy->pid_ns);
-}
 NORET_TYPE void do_exit(long code)
 {
        struct task_struct *tsk = current;
@@ -1029,10 +1005,7 @@ NORET_TYPE void do_exit(long code)
        if (unlikely(!tsk->pid))
                panic("Attempted to kill the idle task!");
-        if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
+        tracehook_report_exit(&code);
-                current->ptrace_message = code;
-                ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
-        }
        /*
         * We're taking recursive faults here in do_exit. Safest is to just
@@ -1077,7 +1050,6 @@ NORET_TYPE void do_exit(long code)
        }
        group_dead = atomic_dec_and_test(&tsk->signal->live);
        if (group_dead) {
-                exit_child_reaper(tsk);
                hrtimer_cancel(&tsk->signal->real_timer);
                exit_itimers(tsk->signal);
        }
@@ -1378,21 +1350,8 @@ static int wait_task_zombie(struct task_struct *p, int options,
                psig->coublock +=
                        task_io_get_oublock(p) +
                        sig->oublock + sig->coublock;
-#ifdef CONFIG_TASK_XACCT
+                task_io_accounting_add(&psig->ioac, &p->ioac);
-                psig->rchar += p->rchar + sig->rchar;
+                task_io_accounting_add(&psig->ioac, &sig->ioac);
-                psig->wchar += p->wchar + sig->wchar;
-                psig->syscr += p->syscr + sig->syscr;
-                psig->syscw += p->syscw + sig->syscw;
-#endif /* CONFIG_TASK_XACCT */
-#ifdef CONFIG_TASK_IO_ACCOUNTING
-                psig->ioac.read_bytes +=
-                        p->ioac.read_bytes + sig->ioac.read_bytes;
-                psig->ioac.write_bytes +=
-                        p->ioac.write_bytes + sig->ioac.write_bytes;
-                psig->ioac.cancelled_write_bytes +=
-                                p->ioac.cancelled_write_bytes +
-                                sig->ioac.cancelled_write_bytes;
-#endif /* CONFIG_TASK_IO_ACCOUNTING */
                spin_unlock_irq(&p->parent->sighand->siglock);
        }

diff --git a/kernel/exit.c b/kernel/exit.c index ad933bb29ec7..85a83c831856 100644 --- a/kernel/exit.c +++ b/kernel/exit.c
@@ -46,6 +46,7 @@
46	#include <linux/resource.h>	46	#include <linux/resource.h>
47	#include <linux/blkdev.h>	47	#include <linux/blkdev.h>
48	#include <linux/task_io_accounting_ops.h>	48	#include <linux/task_io_accounting_ops.h>
		49	#include <linux/tracehook.h>
49		50
50	#include <asm/uaccess.h>	51	#include <asm/uaccess.h>
51	#include <asm/unistd.h>	52	#include <asm/unistd.h>
@@ -111,27 +112,16 @@ static void __exit_signal(struct task_struct *tsk)
111	* We won't ever get here for the group leader, since it	112	* We won't ever get here for the group leader, since it
112	* will have been the last reference on the signal_struct.	113	* will have been the last reference on the signal_struct.
113	*/	114	*/
114	sig->utime = cputime_add(sig->utime, tsk->utime);	115	sig->utime = cputime_add(sig->utime, task_utime(tsk));
115	sig->stime = cputime_add(sig->stime, tsk->stime);	116	sig->stime = cputime_add(sig->stime, task_stime(tsk));
116	sig->gtime = cputime_add(sig->gtime, tsk->gtime);	117	sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
117	sig->min_flt += tsk->min_flt;	118	sig->min_flt += tsk->min_flt;
118	sig->maj_flt += tsk->maj_flt;	119	sig->maj_flt += tsk->maj_flt;
119	sig->nvcsw += tsk->nvcsw;	120	sig->nvcsw += tsk->nvcsw;
120	sig->nivcsw += tsk->nivcsw;	121	sig->nivcsw += tsk->nivcsw;
121	sig->inblock += task_io_get_inblock(tsk);	122	sig->inblock += task_io_get_inblock(tsk);
122	sig->oublock += task_io_get_oublock(tsk);	123	sig->oublock += task_io_get_oublock(tsk);
123	#ifdef CONFIG_TASK_XACCT	124	task_io_accounting_add(&sig->ioac, &tsk->ioac);
124	sig->rchar += tsk->rchar;
125	sig->wchar += tsk->wchar;
126	sig->syscr += tsk->syscr;
127	sig->syscw += tsk->syscw;
128	#endif /* CONFIG_TASK_XACCT */
129	#ifdef CONFIG_TASK_IO_ACCOUNTING
130	sig->ioac.read_bytes += tsk->ioac.read_bytes;
131	sig->ioac.write_bytes += tsk->ioac.write_bytes;
132	sig->ioac.cancelled_write_bytes +=
133	tsk->ioac.cancelled_write_bytes;
134	#endif /* CONFIG_TASK_IO_ACCOUNTING */
135	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;	125	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
136	sig = NULL; /* Marker for below. */	126	sig = NULL; /* Marker for below. */
137	}	127	}
@@ -162,27 +152,17 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
162	put_task_struct(container_of(rhp, struct task_struct, rcu));	152	put_task_struct(container_of(rhp, struct task_struct, rcu));
163	}	153	}
164		154
165	/*
166	* Do final ptrace-related cleanup of a zombie being reaped.
167	*
168	* Called with write_lock(&tasklist_lock) held.
169	*/
170	static void ptrace_release_task(struct task_struct *p)
171	{
172	BUG_ON(!list_empty(&p->ptraced));
173	ptrace_unlink(p);
174	BUG_ON(!list_empty(&p->ptrace_entry));
175	}
176		155
177	void release_task(struct task_struct * p)	156	void release_task(struct task_struct * p)
178	{	157	{
179	struct task_struct *leader;	158	struct task_struct *leader;
180	int zap_leader;	159	int zap_leader;
181	repeat:	160	repeat:
		161	tracehook_prepare_release_task(p);
182	atomic_dec(&p->user->processes);	162	atomic_dec(&p->user->processes);
183	proc_flush_task(p);	163	proc_flush_task(p);
184	write_lock_irq(&tasklist_lock);	164	write_lock_irq(&tasklist_lock);
185	ptrace_release_task(p);	165	tracehook_finish_release_task(p);
186	__exit_signal(p);	166	__exit_signal(p);
187		167
188	/*	168	/*
@@ -204,6 +184,13 @@ repeat:
204	* that case.	184	* that case.
205	*/	185	*/
206	zap_leader = task_detached(leader);	186	zap_leader = task_detached(leader);
		187
		188	/*
		189	* This maintains the invariant that release_task()
		190	* only runs on a task in EXIT_DEAD, just for sanity.
		191	*/
		192	if (zap_leader)
		193	leader->exit_state = EXIT_DEAD;
207	}	194	}
208		195
209	write_unlock_irq(&tasklist_lock);	196	write_unlock_irq(&tasklist_lock);
@@ -567,8 +554,6 @@ void put_fs_struct(struct fs_struct *fs)
567	if (atomic_dec_and_test(&fs->count)) {	554	if (atomic_dec_and_test(&fs->count)) {
568	path_put(&fs->root);	555	path_put(&fs->root);
569	path_put(&fs->pwd);	556	path_put(&fs->pwd);
570	if (fs->altroot.dentry)
571	path_put(&fs->altroot);
572	kmem_cache_free(fs_cachep, fs);	557	kmem_cache_free(fs_cachep, fs);
573	}	558	}
574	}	559	}
@@ -598,8 +583,6 @@ mm_need_new_owner(struct mm_struct mm, struct task_struct p)
598	* If there are other users of the mm and the owner (us) is exiting	583	* If there are other users of the mm and the owner (us) is exiting
599	* we need to find a new owner to take on the responsibility.	584	* we need to find a new owner to take on the responsibility.
600	*/	585	*/
601	if (!mm)
602	return 0;
603	if (atomic_read(&mm->mm_users) <= 1)	586	if (atomic_read(&mm->mm_users) <= 1)
604	return 0;	587	return 0;
605	if (mm->owner != p)	588	if (mm->owner != p)
@@ -642,6 +625,16 @@ retry:
642	} while_each_thread(g, c);	625	} while_each_thread(g, c);
643		626
644	read_unlock(&tasklist_lock);	627	read_unlock(&tasklist_lock);
		628	/*
		629	* We found no owner yet mm_users > 1: this implies that we are
		630	* most likely racing with swapoff (try_to_unuse()) or /proc or
		631	* ptrace or page migration (get_task_mm()). Mark owner as NULL,
		632	* so that subsystems can understand the callback and take action.
		633	*/
		634	down_write(&mm->mmap_sem);
		635	cgroup_mm_owner_callbacks(mm->owner, NULL);
		636	mm->owner = NULL;
		637	up_write(&mm->mmap_sem);
645	return;	638	return;
646		639
647	assign_new_owner:	640	assign_new_owner:
@@ -846,26 +839,50 @@ static void reparent_thread(struct task_struct p, struct task_struct father)
846	* the child reaper process (ie "init") in our pid	839	* the child reaper process (ie "init") in our pid
847	* space.	840	* space.
848	*/	841	*/
		842	static struct task_struct find_new_reaper(struct task_struct father)
		843	{
		844	struct pid_namespace *pid_ns = task_active_pid_ns(father);
		845	struct task_struct *thread;
		846
		847	thread = father;
		848	while_each_thread(father, thread) {
		849	if (thread->flags & PF_EXITING)
		850	continue;
		851	if (unlikely(pid_ns->child_reaper == father))
		852	pid_ns->child_reaper = thread;
		853	return thread;
		854	}
		855
		856	if (unlikely(pid_ns->child_reaper == father)) {
		857	write_unlock_irq(&tasklist_lock);
		858	if (unlikely(pid_ns == &init_pid_ns))
		859	panic("Attempted to kill init!");
		860
		861	zap_pid_ns_processes(pid_ns);
		862	write_lock_irq(&tasklist_lock);
		863	/*
		864	* We can not clear ->child_reaper or leave it alone.
		865	* There may by stealth EXIT_DEAD tasks on ->children,
		866	* forget_original_parent() must move them somewhere.
		867	*/
		868	pid_ns->child_reaper = init_pid_ns.child_reaper;
		869	}
		870
		871	return pid_ns->child_reaper;
		872	}
		873
849	static void forget_original_parent(struct task_struct *father)	874	static void forget_original_parent(struct task_struct *father)
850	{	875	{
851	struct task_struct p, n, *reaper = father;	876	struct task_struct p, n, *reaper;
852	LIST_HEAD(ptrace_dead);	877	LIST_HEAD(ptrace_dead);
853		878
854	write_lock_irq(&tasklist_lock);	879	write_lock_irq(&tasklist_lock);
855		880	reaper = find_new_reaper(father);
856	/*	881	/*
857	* First clean up ptrace if we were using it.	882	* First clean up ptrace if we were using it.
858	*/	883	*/
859	ptrace_exit(father, &ptrace_dead);	884	ptrace_exit(father, &ptrace_dead);
860		885
861	do {
862	reaper = next_thread(reaper);
863	if (reaper == father) {
864	reaper = task_child_reaper(father);
865	break;
866	}
867	} while (reaper->flags & PF_EXITING);
868
869	list_for_each_entry_safe(p, n, &father->children, sibling) {	886	list_for_each_entry_safe(p, n, &father->children, sibling) {
870	p->real_parent = reaper;	887	p->real_parent = reaper;
871	if (p->parent == father) {	888	if (p->parent == father) {
@@ -887,7 +904,8 @@ static void forget_original_parent(struct task_struct *father)
887	*/	904	*/
888	static void exit_notify(struct task_struct *tsk, int group_dead)	905	static void exit_notify(struct task_struct *tsk, int group_dead)
889	{	906	{
890	int state;	907	int signal;
		908	void *cookie;
891		909
892	/*	910	/*
893	* This does two things:	911	* This does two things:
@@ -924,33 +942,24 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
924	!capable(CAP_KILL))	942	!capable(CAP_KILL))
925	tsk->exit_signal = SIGCHLD;	943	tsk->exit_signal = SIGCHLD;
926		944
927	/* If something other than our normal parent is ptracing us, then	945	signal = tracehook_notify_death(tsk, &cookie, group_dead);
928	* send it a SIGCHLD instead of honoring exit_signal. exit_signal	946	if (signal >= 0)
929	* only has special meaning to our real parent.	947	signal = do_notify_parent(tsk, signal);
930	*/
931	if (!task_detached(tsk) && thread_group_empty(tsk)) {
932	int signal = ptrace_reparented(tsk) ?
933	SIGCHLD : tsk->exit_signal;
934	do_notify_parent(tsk, signal);
935	} else if (tsk->ptrace) {
936	do_notify_parent(tsk, SIGCHLD);
937	}
938		948
939	state = EXIT_ZOMBIE;	949	tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE;
940	if (task_detached(tsk) && likely(!tsk->ptrace))
941	state = EXIT_DEAD;
942	tsk->exit_state = state;
943		950
944	/* mt-exec, de_thread() is waiting for us */	951	/* mt-exec, de_thread() is waiting for us */
945	if (thread_group_leader(tsk) &&	952	if (thread_group_leader(tsk) &&
946	tsk->signal->notify_count < 0 &&	953	tsk->signal->group_exit_task &&
947	tsk->signal->group_exit_task)	954	tsk->signal->notify_count < 0)
948	wake_up_process(tsk->signal->group_exit_task);	955	wake_up_process(tsk->signal->group_exit_task);
949		956
950	write_unlock_irq(&tasklist_lock);	957	write_unlock_irq(&tasklist_lock);
951		958
		959	tracehook_report_death(tsk, signal, cookie, group_dead);
		960
952	/* If the process is dead, release it - nobody will wait for it */	961	/* If the process is dead, release it - nobody will wait for it */
953	if (state == EXIT_DEAD)	962	if (signal == DEATH_REAP)
954	release_task(tsk);	963	release_task(tsk);
955	}	964	}
956		965
@@ -982,39 +991,6 @@ static void check_stack_usage(void)
982	static inline void check_stack_usage(void) {}	991	static inline void check_stack_usage(void) {}
983	#endif	992	#endif
984		993
985	static inline void exit_child_reaper(struct task_struct *tsk)
986	{
987	if (likely(tsk->group_leader != task_child_reaper(tsk)))
988	return;
989
990	if (tsk->nsproxy->pid_ns == &init_pid_ns)
991	panic("Attempted to kill init!");
992
993	/*
994	* @tsk is the last thread in the 'cgroup-init' and is exiting.
995	* Terminate all remaining processes in the namespace and reap them
996	* before exiting @tsk.
997	*
998	* Note that @tsk (last thread of cgroup-init) may not necessarily
999	* be the child-reaper (i.e main thread of cgroup-init) of the
1000	* namespace i.e the child_reaper may have already exited.
1001	*
1002	* Even after a child_reaper exits, we let it inherit orphaned children,
1003	* because, pid_ns->child_reaper remains valid as long as there is
1004	* at least one living sub-thread in the cgroup init.
1005
1006	* This living sub-thread of the cgroup-init will be notified when
1007	* a child inherited by the 'child-reaper' exits (do_notify_parent()
1008	* uses __group_send_sig_info()). Further, when reaping child processes,
1009	* do_wait() iterates over children of all living sub threads.
1010
1011	* i.e even though 'child_reaper' thread is listed as the parent of the
1012	* orphaned children, any living sub-thread in the cgroup-init can
1013	* perform the role of the child_reaper.
1014	*/
1015	zap_pid_ns_processes(tsk->nsproxy->pid_ns);
1016	}
1017
1018	NORET_TYPE void do_exit(long code)	994	NORET_TYPE void do_exit(long code)
1019	{	995	{
1020	struct task_struct *tsk = current;	996	struct task_struct *tsk = current;
@@ -1029,10 +1005,7 @@ NORET_TYPE void do_exit(long code)
1029	if (unlikely(!tsk->pid))	1005	if (unlikely(!tsk->pid))
1030	panic("Attempted to kill the idle task!");	1006	panic("Attempted to kill the idle task!");
1031		1007
1032	if (unlikely(current->ptrace & PT_TRACE_EXIT)) {	1008	tracehook_report_exit(&code);
1033	current->ptrace_message = code;
1034	ptrace_notify((PTRACE_EVENT_EXIT << 8) \| SIGTRAP);
1035	}
1036		1009
1037	/*	1010	/*
1038	* We're taking recursive faults here in do_exit. Safest is to just	1011	* We're taking recursive faults here in do_exit. Safest is to just
@@ -1077,7 +1050,6 @@ NORET_TYPE void do_exit(long code)
1077	}	1050	}
1078	group_dead = atomic_dec_and_test(&tsk->signal->live);	1051	group_dead = atomic_dec_and_test(&tsk->signal->live);
1079	if (group_dead) {	1052	if (group_dead) {
1080	exit_child_reaper(tsk);
1081	hrtimer_cancel(&tsk->signal->real_timer);	1053	hrtimer_cancel(&tsk->signal->real_timer);
1082	exit_itimers(tsk->signal);	1054	exit_itimers(tsk->signal);
1083	}	1055	}
@@ -1378,21 +1350,8 @@ static int wait_task_zombie(struct task_struct *p, int options,
1378	psig->coublock +=	1350	psig->coublock +=
1379	task_io_get_oublock(p) +	1351	task_io_get_oublock(p) +
1380	sig->oublock + sig->coublock;	1352	sig->oublock + sig->coublock;
1381	#ifdef CONFIG_TASK_XACCT	1353	task_io_accounting_add(&psig->ioac, &p->ioac);
1382	psig->rchar += p->rchar + sig->rchar;	1354	task_io_accounting_add(&psig->ioac, &sig->ioac);
1383	psig->wchar += p->wchar + sig->wchar;
1384	psig->syscr += p->syscr + sig->syscr;
1385	psig->syscw += p->syscw + sig->syscw;
1386	#endif /* CONFIG_TASK_XACCT */
1387	#ifdef CONFIG_TASK_IO_ACCOUNTING
1388	psig->ioac.read_bytes +=
1389	p->ioac.read_bytes + sig->ioac.read_bytes;
1390	psig->ioac.write_bytes +=
1391	p->ioac.write_bytes + sig->ioac.write_bytes;
1392	psig->ioac.cancelled_write_bytes +=
1393	p->ioac.cancelled_write_bytes +
1394	sig->ioac.cancelled_write_bytes;
1395	#endif /* CONFIG_TASK_IO_ACCOUNTING */
1396	spin_unlock_irq(&p->parent->sighand->siglock);	1355	spin_unlock_irq(&p->parent->sighand->siglock);
1397	}	1356	}
1398		1357