1 files changed, 61 insertions, 63 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 38ec40630149..059b38cae384 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -112,9 +112,7 @@ static void __exit_signal(struct task_struct *tsk)
                 * We won't ever get here for the group leader, since it
                 * will have been the last reference on the signal_struct.
                 */
-                sig->utime = cputime_add(sig->utime, tsk->utime);
+                sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
-                sig->stime = cputime_add(sig->stime, tsk->stime);
-                sig->gtime = cputime_add(sig->gtime, tsk->gtime);
                sig->min_flt += tsk->min_flt;
                sig->maj_flt += tsk->maj_flt;
                sig->nvcsw += tsk->nvcsw;
@@ -122,7 +120,6 @@ static void __exit_signal(struct task_struct *tsk)
                sig->inblock += task_io_get_inblock(tsk);
                sig->oublock += task_io_get_oublock(tsk);
                task_io_accounting_add(&sig->ioac, &tsk->ioac);
-                sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
                sig = NULL; /* Marker for below. */
        }
@@ -583,8 +580,6 @@ mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
         * If there are other users of the mm and the owner (us) is exiting
         * we need to find a new owner to take on the responsibility.
         */
-        if (!mm)
-                return 0;
        if (atomic_read(&mm->mm_users) <= 1)
                return 0;
        if (mm->owner != p)
@@ -627,29 +622,38 @@ retry:
        } while_each_thread(g, c);
        read_unlock(&tasklist_lock);
+        /*
+         * We found no owner yet mm_users > 1: this implies that we are
+         * most likely racing with swapoff (try_to_unuse()) or /proc or
+         * ptrace or page migration (get_task_mm()).  Mark owner as NULL,
+         * so that subsystems can understand the callback and take action.
+         */
+        down_write(&mm->mmap_sem);
+        cgroup_mm_owner_callbacks(mm->owner, NULL);
+        mm->owner = NULL;
+        up_write(&mm->mmap_sem);
        return;
 assign_new_owner:
        BUG_ON(c == p);
        get_task_struct(c);
+        read_unlock(&tasklist_lock);
+        down_write(&mm->mmap_sem);
        /*
         * The task_lock protects c->mm from changing.
         * We always want mm->owner->mm == mm
         */
        task_lock(c);
-        /*
-         * Delay read_unlock() till we have the task_lock()
-         * to ensure that c does not slip away underneath us
-         */
-        read_unlock(&tasklist_lock);
        if (c->mm != mm) {
                task_unlock(c);
+                up_write(&mm->mmap_sem);
                put_task_struct(c);
                goto retry;
        }
        cgroup_mm_owner_callbacks(mm->owner, c);
        mm->owner = c;
        task_unlock(c);
+        up_write(&mm->mmap_sem);
        put_task_struct(c);
 }
 #endif /* CONFIG_MM_OWNER */
@@ -831,26 +835,50 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father)
 * the child reaper process (ie "init") in our pid
 * space.
 */
+static struct task_struct *find_new_reaper(struct task_struct *father)
+{
+        struct pid_namespace *pid_ns = task_active_pid_ns(father);
+        struct task_struct *thread;
+        thread = father;
+        while_each_thread(father, thread) {
+                if (thread->flags & PF_EXITING)
+                        continue;
+                if (unlikely(pid_ns->child_reaper == father))
+                        pid_ns->child_reaper = thread;
+                return thread;
+        }
+        if (unlikely(pid_ns->child_reaper == father)) {
+                write_unlock_irq(&tasklist_lock);
+                if (unlikely(pid_ns == &init_pid_ns))
+                        panic("Attempted to kill init!");
+                zap_pid_ns_processes(pid_ns);
+                write_lock_irq(&tasklist_lock);
+                /*
+                 * We can not clear ->child_reaper or leave it alone.
+                 * There may by stealth EXIT_DEAD tasks on ->children,
+                 * forget_original_parent() must move them somewhere.
+                 */
+                pid_ns->child_reaper = init_pid_ns.child_reaper;
+        }
+        return pid_ns->child_reaper;
+}
 static void forget_original_parent(struct task_struct *father)
 {
-        struct task_struct *p, *n, *reaper = father;
+        struct task_struct *p, *n, *reaper;
        LIST_HEAD(ptrace_dead);
        write_lock_irq(&tasklist_lock);
+        reaper = find_new_reaper(father);
        /*
         * First clean up ptrace if we were using it.
         */
        ptrace_exit(father, &ptrace_dead);
-        do {
-                reaper = next_thread(reaper);
-                if (reaper == father) {
-                        reaper = task_child_reaper(father);
-                        break;
-                }
-        } while (reaper->flags & PF_EXITING);
        list_for_each_entry_safe(p, n, &father->children, sibling) {
                p->real_parent = reaper;
                if (p->parent == father) {
@@ -918,8 +946,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
        /* mt-exec, de_thread() is waiting for us */
        if (thread_group_leader(tsk) &&
-            tsk->signal->notify_count < 0 &&
+            tsk->signal->group_exit_task &&
-            tsk->signal->group_exit_task)
+            tsk->signal->notify_count < 0)
                wake_up_process(tsk->signal->group_exit_task);
        write_unlock_irq(&tasklist_lock);
@@ -959,39 +987,6 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
-static inline void exit_child_reaper(struct task_struct *tsk)
-{
-        if (likely(tsk->group_leader != task_child_reaper(tsk)))
-                return;
-        if (tsk->nsproxy->pid_ns == &init_pid_ns)
-                panic("Attempted to kill init!");
-        /*
-         * @tsk is the last thread in the 'cgroup-init' and is exiting.
-         * Terminate all remaining processes in the namespace and reap them
-         * before exiting @tsk.
-         *
-         * Note that @tsk (last thread of cgroup-init) may not necessarily
-         * be the child-reaper (i.e main thread of cgroup-init) of the
-         * namespace i.e the child_reaper may have already exited.
-         *
-         * Even after a child_reaper exits, we let it inherit orphaned children,
-         * because, pid_ns->child_reaper remains valid as long as there is
-         * at least one living sub-thread in the cgroup init.
-         * This living sub-thread of the cgroup-init will be notified when
-         * a child inherited by the 'child-reaper' exits (do_notify_parent()
-         * uses __group_send_sig_info()). Further, when reaping child processes,
-         * do_wait() iterates over children of all living sub threads.
-         * i.e even though 'child_reaper' thread is listed as the parent of the
-         * orphaned children, any living sub-thread in the cgroup-init can
-         * perform the role of the child_reaper.
-         */
-        zap_pid_ns_processes(tsk->nsproxy->pid_ns);
-}
 NORET_TYPE void do_exit(long code)
 {
        struct task_struct *tsk = current;
@@ -1051,7 +1046,6 @@ NORET_TYPE void do_exit(long code)
        }
        group_dead = atomic_dec_and_test(&tsk->signal->live);
        if (group_dead) {
-                exit_child_reaper(tsk);
                hrtimer_cancel(&tsk->signal->real_timer);
                exit_itimers(tsk->signal);
        }
@@ -1304,6 +1298,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
        if (likely(!traced)) {
                struct signal_struct *psig;
                struct signal_struct *sig;
+                struct task_cputime cputime;
                /*
                 * The resource counters for the group leader are in its
@@ -1319,20 +1314,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
                 * need to protect the access to p->parent->signal fields,
                 * as other threads in the parent group can be right
                 * here reaping other children at the same time.
+                 *
+                 * We use thread_group_cputime() to get times for the thread
+                 * group, which consolidates times for all threads in the
+                 * group including the group leader.
                 */
                spin_lock_irq(&p->parent->sighand->siglock);
                psig = p->parent->signal;
                sig = p->signal;
+                thread_group_cputime(p, &cputime);
                psig->cutime =
                        cputime_add(psig->cutime,
-                        cputime_add(p->utime,
+                        cputime_add(cputime.utime,
-                        cputime_add(sig->utime,
+                                    sig->cutime));
-                                    sig->cutime)));
                psig->cstime =
                        cputime_add(psig->cstime,
-                        cputime_add(p->stime,
+                        cputime_add(cputime.stime,
-                        cputime_add(sig->stime,
+                                    sig->cstime));
-                                    sig->cstime)));
                psig->cgtime =
                        cputime_add(psig->cgtime,
                        cputime_add(p->gtime,

diff --git a/kernel/exit.c b/kernel/exit.c index 38ec40630149..059b38cae384 100644 --- a/kernel/exit.c +++ b/kernel/exit.c
@@ -112,9 +112,7 @@ static void __exit_signal(struct task_struct *tsk)
112	* We won't ever get here for the group leader, since it	112	* We won't ever get here for the group leader, since it
113	* will have been the last reference on the signal_struct.	113	* will have been the last reference on the signal_struct.
114	*/	114	*/
115	sig->utime = cputime_add(sig->utime, tsk->utime);	115	sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
116	sig->stime = cputime_add(sig->stime, tsk->stime);
117	sig->gtime = cputime_add(sig->gtime, tsk->gtime);
118	sig->min_flt += tsk->min_flt;	116	sig->min_flt += tsk->min_flt;
119	sig->maj_flt += tsk->maj_flt;	117	sig->maj_flt += tsk->maj_flt;
120	sig->nvcsw += tsk->nvcsw;	118	sig->nvcsw += tsk->nvcsw;
@@ -122,7 +120,6 @@ static void __exit_signal(struct task_struct *tsk)
122	sig->inblock += task_io_get_inblock(tsk);	120	sig->inblock += task_io_get_inblock(tsk);
123	sig->oublock += task_io_get_oublock(tsk);	121	sig->oublock += task_io_get_oublock(tsk);
124	task_io_accounting_add(&sig->ioac, &tsk->ioac);	122	task_io_accounting_add(&sig->ioac, &tsk->ioac);
125	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
126	sig = NULL; /* Marker for below. */	123	sig = NULL; /* Marker for below. */
127	}	124	}
128		125
@@ -583,8 +580,6 @@ mm_need_new_owner(struct mm_struct mm, struct task_struct p)
583	* If there are other users of the mm and the owner (us) is exiting	580	* If there are other users of the mm and the owner (us) is exiting
584	* we need to find a new owner to take on the responsibility.	581	* we need to find a new owner to take on the responsibility.
585	*/	582	*/
586	if (!mm)
587	return 0;
588	if (atomic_read(&mm->mm_users) <= 1)	583	if (atomic_read(&mm->mm_users) <= 1)
589	return 0;	584	return 0;
590	if (mm->owner != p)	585	if (mm->owner != p)
@@ -627,29 +622,38 @@ retry:
627	} while_each_thread(g, c);	622	} while_each_thread(g, c);
628		623
629	read_unlock(&tasklist_lock);	624	read_unlock(&tasklist_lock);
		625	/*
		626	* We found no owner yet mm_users > 1: this implies that we are
		627	* most likely racing with swapoff (try_to_unuse()) or /proc or
		628	* ptrace or page migration (get_task_mm()). Mark owner as NULL,
		629	* so that subsystems can understand the callback and take action.
		630	*/
		631	down_write(&mm->mmap_sem);
		632	cgroup_mm_owner_callbacks(mm->owner, NULL);
		633	mm->owner = NULL;
		634	up_write(&mm->mmap_sem);
630	return;	635	return;
631		636
632	assign_new_owner:	637	assign_new_owner:
633	BUG_ON(c == p);	638	BUG_ON(c == p);
634	get_task_struct(c);	639	get_task_struct(c);
		640	read_unlock(&tasklist_lock);
		641	down_write(&mm->mmap_sem);
635	/*	642	/*
636	* The task_lock protects c->mm from changing.	643	* The task_lock protects c->mm from changing.
637	* We always want mm->owner->mm == mm	644	* We always want mm->owner->mm == mm
638	*/	645	*/
639	task_lock(c);	646	task_lock(c);
640	/*
641	* Delay read_unlock() till we have the task_lock()
642	* to ensure that c does not slip away underneath us
643	*/
644	read_unlock(&tasklist_lock);
645	if (c->mm != mm) {	647	if (c->mm != mm) {
646	task_unlock(c);	648	task_unlock(c);
		649	up_write(&mm->mmap_sem);
647	put_task_struct(c);	650	put_task_struct(c);
648	goto retry;	651	goto retry;
649	}	652	}
650	cgroup_mm_owner_callbacks(mm->owner, c);	653	cgroup_mm_owner_callbacks(mm->owner, c);
651	mm->owner = c;	654	mm->owner = c;
652	task_unlock(c);	655	task_unlock(c);
		656	up_write(&mm->mmap_sem);
653	put_task_struct(c);	657	put_task_struct(c);
654	}	658	}
655	#endif /* CONFIG_MM_OWNER */	659	#endif /* CONFIG_MM_OWNER */
@@ -831,26 +835,50 @@ static void reparent_thread(struct task_struct p, struct task_struct father)
831	* the child reaper process (ie "init") in our pid	835	* the child reaper process (ie "init") in our pid
832	* space.	836	* space.
833	*/	837	*/
		838	static struct task_struct find_new_reaper(struct task_struct father)
		839	{
		840	struct pid_namespace *pid_ns = task_active_pid_ns(father);
		841	struct task_struct *thread;
		842
		843	thread = father;
		844	while_each_thread(father, thread) {
		845	if (thread->flags & PF_EXITING)
		846	continue;
		847	if (unlikely(pid_ns->child_reaper == father))
		848	pid_ns->child_reaper = thread;
		849	return thread;
		850	}
		851
		852	if (unlikely(pid_ns->child_reaper == father)) {
		853	write_unlock_irq(&tasklist_lock);
		854	if (unlikely(pid_ns == &init_pid_ns))
		855	panic("Attempted to kill init!");
		856
		857	zap_pid_ns_processes(pid_ns);
		858	write_lock_irq(&tasklist_lock);
		859	/*
		860	* We can not clear ->child_reaper or leave it alone.
		861	* There may by stealth EXIT_DEAD tasks on ->children,
		862	* forget_original_parent() must move them somewhere.
		863	*/
		864	pid_ns->child_reaper = init_pid_ns.child_reaper;
		865	}
		866
		867	return pid_ns->child_reaper;
		868	}
		869
834	static void forget_original_parent(struct task_struct *father)	870	static void forget_original_parent(struct task_struct *father)
835	{	871	{
836	struct task_struct p, n, *reaper = father;	872	struct task_struct p, n, *reaper;
837	LIST_HEAD(ptrace_dead);	873	LIST_HEAD(ptrace_dead);
838		874
839	write_lock_irq(&tasklist_lock);	875	write_lock_irq(&tasklist_lock);
840		876	reaper = find_new_reaper(father);
841	/*	877	/*
842	* First clean up ptrace if we were using it.	878	* First clean up ptrace if we were using it.
843	*/	879	*/
844	ptrace_exit(father, &ptrace_dead);	880	ptrace_exit(father, &ptrace_dead);
845		881
846	do {
847	reaper = next_thread(reaper);
848	if (reaper == father) {
849	reaper = task_child_reaper(father);
850	break;
851	}
852	} while (reaper->flags & PF_EXITING);
853
854	list_for_each_entry_safe(p, n, &father->children, sibling) {	882	list_for_each_entry_safe(p, n, &father->children, sibling) {
855	p->real_parent = reaper;	883	p->real_parent = reaper;
856	if (p->parent == father) {	884	if (p->parent == father) {
@@ -918,8 +946,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
918		946
919	/* mt-exec, de_thread() is waiting for us */	947	/* mt-exec, de_thread() is waiting for us */
920	if (thread_group_leader(tsk) &&	948	if (thread_group_leader(tsk) &&
921	tsk->signal->notify_count < 0 &&	949	tsk->signal->group_exit_task &&
922	tsk->signal->group_exit_task)	950	tsk->signal->notify_count < 0)
923	wake_up_process(tsk->signal->group_exit_task);	951	wake_up_process(tsk->signal->group_exit_task);
924		952
925	write_unlock_irq(&tasklist_lock);	953	write_unlock_irq(&tasklist_lock);
@@ -959,39 +987,6 @@ static void check_stack_usage(void)
959	static inline void check_stack_usage(void) {}	987	static inline void check_stack_usage(void) {}
960	#endif	988	#endif
961		989
962	static inline void exit_child_reaper(struct task_struct *tsk)
963	{
964	if (likely(tsk->group_leader != task_child_reaper(tsk)))
965	return;
966
967	if (tsk->nsproxy->pid_ns == &init_pid_ns)
968	panic("Attempted to kill init!");
969
970	/*
971	* @tsk is the last thread in the 'cgroup-init' and is exiting.
972	* Terminate all remaining processes in the namespace and reap them
973	* before exiting @tsk.
974	*
975	* Note that @tsk (last thread of cgroup-init) may not necessarily
976	* be the child-reaper (i.e main thread of cgroup-init) of the
977	* namespace i.e the child_reaper may have already exited.
978	*
979	* Even after a child_reaper exits, we let it inherit orphaned children,
980	* because, pid_ns->child_reaper remains valid as long as there is
981	* at least one living sub-thread in the cgroup init.
982
983	* This living sub-thread of the cgroup-init will be notified when
984	* a child inherited by the 'child-reaper' exits (do_notify_parent()
985	* uses __group_send_sig_info()). Further, when reaping child processes,
986	* do_wait() iterates over children of all living sub threads.
987
988	* i.e even though 'child_reaper' thread is listed as the parent of the
989	* orphaned children, any living sub-thread in the cgroup-init can
990	* perform the role of the child_reaper.
991	*/
992	zap_pid_ns_processes(tsk->nsproxy->pid_ns);
993	}
994
995	NORET_TYPE void do_exit(long code)	990	NORET_TYPE void do_exit(long code)
996	{	991	{
997	struct task_struct *tsk = current;	992	struct task_struct *tsk = current;
@@ -1051,7 +1046,6 @@ NORET_TYPE void do_exit(long code)
1051	}	1046	}
1052	group_dead = atomic_dec_and_test(&tsk->signal->live);	1047	group_dead = atomic_dec_and_test(&tsk->signal->live);
1053	if (group_dead) {	1048	if (group_dead) {
1054	exit_child_reaper(tsk);
1055	hrtimer_cancel(&tsk->signal->real_timer);	1049	hrtimer_cancel(&tsk->signal->real_timer);
1056	exit_itimers(tsk->signal);	1050	exit_itimers(tsk->signal);
1057	}	1051	}
@@ -1304,6 +1298,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
1304	if (likely(!traced)) {	1298	if (likely(!traced)) {
1305	struct signal_struct *psig;	1299	struct signal_struct *psig;
1306	struct signal_struct *sig;	1300	struct signal_struct *sig;
		1301	struct task_cputime cputime;
1307		1302
1308	/*	1303	/*
1309	* The resource counters for the group leader are in its	1304	* The resource counters for the group leader are in its
@@ -1319,20 +1314,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
1319	* need to protect the access to p->parent->signal fields,	1314	* need to protect the access to p->parent->signal fields,
1320	* as other threads in the parent group can be right	1315	* as other threads in the parent group can be right
1321	* here reaping other children at the same time.	1316	* here reaping other children at the same time.
		1317	*
		1318	* We use thread_group_cputime() to get times for the thread
		1319	* group, which consolidates times for all threads in the
		1320	* group including the group leader.
1322	*/	1321	*/
1323	spin_lock_irq(&p->parent->sighand->siglock);	1322	spin_lock_irq(&p->parent->sighand->siglock);
1324	psig = p->parent->signal;	1323	psig = p->parent->signal;
1325	sig = p->signal;	1324	sig = p->signal;
		1325	thread_group_cputime(p, &cputime);
1326	psig->cutime =	1326	psig->cutime =
1327	cputime_add(psig->cutime,	1327	cputime_add(psig->cutime,
1328	cputime_add(p->utime,	1328	cputime_add(cputime.utime,
1329	cputime_add(sig->utime,	1329	sig->cutime));
1330	sig->cutime)));
1331	psig->cstime =	1330	psig->cstime =
1332	cputime_add(psig->cstime,	1331	cputime_add(psig->cstime,
1333	cputime_add(p->stime,	1332	cputime_add(cputime.stime,
1334	cputime_add(sig->stime,	1333	sig->cstime));
1335	sig->cstime)));
1336	psig->cgtime =	1334	psig->cgtime =
1337	cputime_add(psig->cgtime,	1335	cputime_add(psig->cgtime,
1338	cputime_add(p->gtime,	1336	cputime_add(p->gtime,