1 files changed, 38 insertions, 26 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 16395644a98f..2d8be7ebb0f7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -40,13 +40,13 @@
 #include <linux/cn_proc.h>
 #include <linux/mutex.h>
 #include <linux/futex.h>
-#include <linux/compat.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/audit.h> /* for audit_free() */
 #include <linux/resource.h>
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/tracehook.h>
+#include <trace/sched.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -112,8 +112,6 @@ static void __exit_signal(struct task_struct *tsk)
                 * We won't ever get here for the group leader, since it
                 * will have been the last reference on the signal_struct.
                 */
-                sig->utime = cputime_add(sig->utime, task_utime(tsk));
-                sig->stime = cputime_add(sig->stime, task_stime(tsk));
                sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
                sig->min_flt += tsk->min_flt;
                sig->maj_flt += tsk->maj_flt;
@@ -122,7 +120,6 @@ static void __exit_signal(struct task_struct *tsk)
                sig->inblock += task_io_get_inblock(tsk);
                sig->oublock += task_io_get_oublock(tsk);
                task_io_accounting_add(&sig->ioac, &tsk->ioac);
-                sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
                sig = NULL; /* Marker for below. */
        }
@@ -143,13 +140,21 @@ static void __exit_signal(struct task_struct *tsk)
        if (sig) {
                flush_sigqueue(&sig->shared_pending);
                taskstats_tgid_free(sig);
+                /*
+                 * Make sure ->signal can't go away under rq->lock,
+                 * see account_group_exec_runtime().
+                 */
+                task_rq_unlock_wait(tsk);
                __cleanup_signal(sig);
        }
 }
 static void delayed_put_task_struct(struct rcu_head *rhp)
 {
-        put_task_struct(container_of(rhp, struct task_struct, rcu));
+        struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+        trace_sched_process_free(tsk);
+        put_task_struct(tsk);
 }
@@ -583,8 +588,6 @@ mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
         * If there are other users of the mm and the owner (us) is exiting
         * we need to find a new owner to take on the responsibility.
         */
-        if (!mm)
-                return 0;
        if (atomic_read(&mm->mm_users) <= 1)
                return 0;
        if (mm->owner != p)
@@ -627,29 +630,38 @@ retry:
        } while_each_thread(g, c);
        read_unlock(&tasklist_lock);
+        /*
+         * We found no owner yet mm_users > 1: this implies that we are
+         * most likely racing with swapoff (try_to_unuse()) or /proc or
+         * ptrace or page migration (get_task_mm()).  Mark owner as NULL,
+         * so that subsystems can understand the callback and take action.
+         */
+        down_write(&mm->mmap_sem);
+        cgroup_mm_owner_callbacks(mm->owner, NULL);
+        mm->owner = NULL;
+        up_write(&mm->mmap_sem);
        return;
 assign_new_owner:
        BUG_ON(c == p);
        get_task_struct(c);
+        read_unlock(&tasklist_lock);
+        down_write(&mm->mmap_sem);
        /*
         * The task_lock protects c->mm from changing.
         * We always want mm->owner->mm == mm
         */
        task_lock(c);
-        /*
-         * Delay read_unlock() till we have the task_lock()
-         * to ensure that c does not slip away underneath us
-         */
-        read_unlock(&tasklist_lock);
        if (c->mm != mm) {
                task_unlock(c);
+                up_write(&mm->mmap_sem);
                put_task_struct(c);
                goto retry;
        }
        cgroup_mm_owner_callbacks(mm->owner, c);
        mm->owner = c;
        task_unlock(c);
+        up_write(&mm->mmap_sem);
        put_task_struct(c);
 }
 #endif /* CONFIG_MM_OWNER */
@@ -1046,14 +1058,6 @@ NORET_TYPE void do_exit(long code)
                exit_itimers(tsk->signal);
        }
        acct_collect(code, group_dead);
-#ifdef CONFIG_FUTEX
-        if (unlikely(tsk->robust_list))
-                exit_robust_list(tsk);
-#ifdef CONFIG_COMPAT
-        if (unlikely(tsk->compat_robust_list))
-                compat_exit_robust_list(tsk);
-#endif
-#endif
        if (group_dead)
                tty_audit_exit();
        if (unlikely(tsk->audit_context))
@@ -1066,6 +1070,8 @@ NORET_TYPE void do_exit(long code)
        if (group_dead)
                acct_process();
+        trace_sched_process_exit(tsk);
        exit_sem(tsk);
        exit_files(tsk);
        exit_fs(tsk);
@@ -1294,6 +1300,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
        if (likely(!traced)) {
                struct signal_struct *psig;
                struct signal_struct *sig;
+                struct task_cputime cputime;
                /*
                 * The resource counters for the group leader are in its
@@ -1309,20 +1316,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
                 * need to protect the access to p->parent->signal fields,
                 * as other threads in the parent group can be right
                 * here reaping other children at the same time.
+                 *
+                 * We use thread_group_cputime() to get times for the thread
+                 * group, which consolidates times for all threads in the
+                 * group including the group leader.
                 */
                spin_lock_irq(&p->parent->sighand->siglock);
                psig = p->parent->signal;
                sig = p->signal;
+                thread_group_cputime(p, &cputime);
                psig->cutime =
                        cputime_add(psig->cutime,
-                        cputime_add(p->utime,
+                        cputime_add(cputime.utime,
-                        cputime_add(sig->utime,
+                                    sig->cutime));
-                                    sig->cutime)));
                psig->cstime =
                        cputime_add(psig->cstime,
-                        cputime_add(p->stime,
+                        cputime_add(cputime.stime,
-                        cputime_add(sig->stime,
+                                    sig->cstime));
-                                    sig->cstime)));
                psig->cgtime =
                        cputime_add(psig->cgtime,
                        cputime_add(p->gtime,
@@ -1667,6 +1677,8 @@ static long do_wait(enum pid_type type, struct pid *pid, int options,
        struct task_struct *tsk;
        int retval;
+        trace_sched_process_wait(pid);
        add_wait_queue(&current->signal->wait_chldexit,&wait);
 repeat:
        /*