37 files changed, 2255 insertions, 678 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 9df4501cb921..0a32cb21ec97 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+obj-$(CONFIG_GCOV_KERNEL) += gcov/
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3fb789f6df94..3737a682cdf5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -843,6 +843,11 @@ static int parse_cgroupfs_options(char *data,
                                     struct cgroup_sb_opts *opts)
 {
        char *token, *o = data ?: "all";
+        unsigned long mask = (unsigned long)-1;
+#ifdef CONFIG_CPUSETS
+        mask = ~(1UL << cpuset_subsys_id);
+#endif
        opts->subsys_bits = 0;
        opts->flags = 0;
@@ -887,6 +892,15 @@ static int parse_cgroupfs_options(char *data,
                }
        }
+        /*
+         * Option noprefix was introduced just for backward compatibility
+         * with the old cpuset, so we allow noprefix only if mounting just
+         * the cpuset subsystem.
+         */
+        if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
+            (opts->subsys_bits & mask))
+                return -EINVAL;
        /* We can't have an empty hierarchy */
        if (!opts->subsys_bits)
                return -EINVAL;
diff --git a/kernel/exit.c b/kernel/exit.c
index b6c90b5ef509..628d41f0dd54 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -375,9 +375,8 @@ static void set_special_pids(struct pid *pid)
 }
 /*
- * Let kernel threads use this to say that they
+ * Let kernel threads use this to say that they allow a certain signal.
- * allow a certain signal (since daemonize() will
+ * Must not be used if kthread was cloned with CLONE_SIGHAND.
- * have disabled all of them by default).
 */
 int allow_signal(int sig)
 {
@@ -385,14 +384,14 @@ int allow_signal(int sig)
                return -EINVAL;
        spin_lock_irq(&current->sighand->siglock);
+        /* This is only needed for daemonize()'ed kthreads */
        sigdelset(&current->blocked, sig);
-        if (!current->mm) {
+        /*
-                /* Kernel threads handle their own signals.
+         * Kernel threads handle their own signals. Let the signal code
-                   Let the signal code know it'll be handled, so
+         * know it'll be handled, so that they don't get converted to
-                   that they don't get converted to SIGKILL or
+         * SIGKILL or just silently dropped.
-                   just silently dropped */
+         */
-                current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
+        current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
-        }
        recalc_sigpending();
        spin_unlock_irq(&current->sighand->siglock);
        return 0;
@@ -591,7 +590,7 @@ retry:
        /*
         * Search in the siblings
         */
-        list_for_each_entry(c, &p->parent->children, sibling) {
+        list_for_each_entry(c, &p->real_parent->children, sibling) {
                if (c->mm == mm)
                        goto assign_new_owner;
        }
@@ -758,7 +757,7 @@ static void reparent_thread(struct task_struct *father, struct task_struct *p,
        p->exit_signal = SIGCHLD;
        /* If it has exited notify the new parent about this child's death. */
-        if (!p->ptrace &&
+        if (!task_ptrace(p) &&
            p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
                do_notify_parent(p, p->exit_signal);
                if (task_detached(p)) {
@@ -783,7 +782,7 @@ static void forget_original_parent(struct task_struct *father)
        list_for_each_entry_safe(p, n, &father->children, sibling) {
                p->real_parent = reaper;
                if (p->parent == father) {
-                        BUG_ON(p->ptrace);
+                        BUG_ON(task_ptrace(p));
                        p->parent = p->real_parent;
                }
                reparent_thread(father, p, &dead_children);
@@ -1081,6 +1080,18 @@ SYSCALL_DEFINE1(exit_group, int, error_code)
        return 0;
 }
+struct wait_opts {
+        enum pid_type           wo_type;
+        int                     wo_flags;
+        struct pid              *wo_pid;
+        struct siginfo __user   *wo_info;
+        int __user              *wo_stat;
+        struct rusage __user    *wo_rusage;
+        int                     notask_error;
+};
 static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 {
        struct pid *pid = NULL;
@@ -1091,13 +1102,12 @@ static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
        return pid;
 }
-static int eligible_child(enum pid_type type, struct pid *pid, int options,
+static int eligible_child(struct wait_opts *wo, struct task_struct *p)
-                          struct task_struct *p)
 {
        int err;
-        if (type < PIDTYPE_MAX) {
+        if (wo->wo_type < PIDTYPE_MAX) {
-                if (task_pid_type(p, type) != pid)
+                if (task_pid_type(p, wo->wo_type) != wo->wo_pid)
                        return 0;
        }
@@ -1106,8 +1116,8 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
         * set; otherwise, wait for non-clone children *only*.  (Note:
         * A "clone" child here is one that reports to its parent
         * using a signal other than SIGCHLD.) */
-        if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
+        if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
-            && !(options & __WALL))
+            && !(wo->wo_flags & __WALL))
                return 0;
        err = security_task_wait(p);
@@ -1117,14 +1127,15 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
        return 1;
 }
-static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
+static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
-                               int why, int status,
+                                pid_t pid, uid_t uid, int why, int status)
-                               struct siginfo __user *infop,
-                               struct rusage __user *rusagep)
 {
-        int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0;
+        struct siginfo __user *infop;
+        int retval = wo->wo_rusage
+                ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
        put_task_struct(p);
+        infop = wo->wo_info;
        if (!retval)
                retval = put_user(SIGCHLD, &infop->si_signo);
        if (!retval)
@@ -1148,19 +1159,18 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
 * the lock and this task is uninteresting.  If we return nonzero, we have
 * released the lock and the system call should return.
 */
-static int wait_task_zombie(struct task_struct *p, int options,
+static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
-                            struct siginfo __user *infop,
-                            int __user *stat_addr, struct rusage __user *ru)
 {
        unsigned long state;
        int retval, status, traced;
        pid_t pid = task_pid_vnr(p);
        uid_t uid = __task_cred(p)->uid;
+        struct siginfo __user *infop;
-        if (!likely(options & WEXITED))
+        if (!likely(wo->wo_flags & WEXITED))
                return 0;
-        if (unlikely(options & WNOWAIT)) {
+        if (unlikely(wo->wo_flags & WNOWAIT)) {
                int exit_code = p->exit_code;
                int why, status;
@@ -1173,8 +1183,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
                        why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
                        status = exit_code & 0x7f;
                }
-                return wait_noreap_copyout(p, pid, uid, why,
+                return wait_noreap_copyout(wo, p, pid, uid, why, status);
-                                           status, infop, ru);
        }
        /*
@@ -1188,11 +1197,13 @@ static int wait_task_zombie(struct task_struct *p, int options,
        }
        traced = ptrace_reparented(p);
+        /*
-        if (likely(!traced)) {
+         * It can be ptraced but not reparented, check
+         * !task_detached() to filter out sub-threads.
+         */
+        if (likely(!traced) && likely(!task_detached(p))) {
                struct signal_struct *psig;
                struct signal_struct *sig;
-                struct task_cputime cputime;
                /*
                 * The resource counters for the group leader are in its
@@ -1205,26 +1216,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
                 * p->signal fields, because they are only touched by
                 * __exit_signal, which runs with tasklist_lock
                 * write-locked anyway, and so is excluded here.  We do
-                 * need to protect the access to p->parent->signal fields,
+                 * need to protect the access to parent->signal fields,
                 * as other threads in the parent group can be right
                 * here reaping other children at the same time.
-                 *
-                 * We use thread_group_cputime() to get times for the thread
-                 * group, which consolidates times for all threads in the
-                 * group including the group leader.
                 */
-                thread_group_cputime(p, &cputime);
+                spin_lock_irq(&p->real_parent->sighand->siglock);
-                spin_lock_irq(&p->parent->sighand->siglock);
+                psig = p->real_parent->signal;
-                psig = p->parent->signal;
                sig = p->signal;
                psig->cutime =
                        cputime_add(psig->cutime,
-                        cputime_add(cputime.utime,
+                        cputime_add(p->utime,
-                                    sig->cutime));
+                        cputime_add(sig->utime,
+                                    sig->cutime)));
                psig->cstime =
                        cputime_add(psig->cstime,
-                        cputime_add(cputime.stime,
+                        cputime_add(p->stime,
-                                    sig->cstime));
+                        cputime_add(sig->stime,
+                                    sig->cstime)));
                psig->cgtime =
                        cputime_add(psig->cgtime,
                        cputime_add(p->gtime,
@@ -1246,7 +1254,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
                        sig->oublock + sig->coublock;
                task_io_accounting_add(&psig->ioac, &p->ioac);
                task_io_accounting_add(&psig->ioac, &sig->ioac);
-                spin_unlock_irq(&p->parent->sighand->siglock);
+                spin_unlock_irq(&p->real_parent->sighand->siglock);
        }
        /*
@@ -1255,11 +1263,14 @@ static int wait_task_zombie(struct task_struct *p, int options,
         */
        read_unlock(&tasklist_lock);
-        retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
+        retval = wo->wo_rusage
+                ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
        status = (p->signal->flags & SIGNAL_GROUP_EXIT)
                ? p->signal->group_exit_code : p->exit_code;
-        if (!retval && stat_addr)
+        if (!retval && wo->wo_stat)
-                retval = put_user(status, stat_addr);
+                retval = put_user(status, wo->wo_stat);
+        infop = wo->wo_info;
        if (!retval && infop)
                retval = put_user(SIGCHLD, &infop->si_signo);
        if (!retval && infop)
@@ -1327,15 +1338,18 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace)
 * the lock and this task is uninteresting.  If we return nonzero, we have
 * released the lock and the system call should return.
 */
-static int wait_task_stopped(int ptrace, struct task_struct *p,
+static int wait_task_stopped(struct wait_opts *wo,
-                             int options, struct siginfo __user *infop,
+                                int ptrace, struct task_struct *p)
-                             int __user *stat_addr, struct rusage __user *ru)
 {
+        struct siginfo __user *infop;
        int retval, exit_code, *p_code, why;
        uid_t uid = 0; /* unneeded, required by compiler */
        pid_t pid;
-        if (!(options & WUNTRACED))
+        /*
+         * Traditionally we see ptrace'd stopped tasks regardless of options.
+         */
+        if (!ptrace && !(wo->wo_flags & WUNTRACED))
                return 0;
        exit_code = 0;
@@ -1349,7 +1363,7 @@ static int wait_task_stopped(int ptrace, struct task_struct *p,
        if (!exit_code)
                goto unlock_sig;
-        if (!unlikely(options & WNOWAIT))
+        if (!unlikely(wo->wo_flags & WNOWAIT))
                *p_code = 0;
        /* don't need the RCU readlock here as we're holding a spinlock */
@@ -1371,14 +1385,15 @@ unlock_sig:
        why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
        read_unlock(&tasklist_lock);
-        if (unlikely(options & WNOWAIT))
+        if (unlikely(wo->wo_flags & WNOWAIT))
-                return wait_noreap_copyout(p, pid, uid,
+                return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
-                                           why, exit_code,
-                                           infop, ru);
-        retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
+        retval = wo->wo_rusage
-        if (!retval && stat_addr)
+                ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
-                retval = put_user((exit_code << 8) | 0x7f, stat_addr);
+        if (!retval && wo->wo_stat)
+                retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);
+        infop = wo->wo_info;
        if (!retval && infop)
                retval = put_user(SIGCHLD, &infop->si_signo);
        if (!retval && infop)
@@ -1405,15 +1420,13 @@ unlock_sig:
 * the lock and this task is uninteresting.  If we return nonzero, we have
 * released the lock and the system call should return.
 */
-static int wait_task_continued(struct task_struct *p, int options,
+static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
-                               struct siginfo __user *infop,
-                               int __user *stat_addr, struct rusage __user *ru)
 {
        int retval;
        pid_t pid;
        uid_t uid;
-        if (!unlikely(options & WCONTINUED))
+        if (!unlikely(wo->wo_flags & WCONTINUED))
                return 0;
        if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
@@ -1425,7 +1438,7 @@ static int wait_task_continued(struct task_struct *p, int options,
                spin_unlock_irq(&p->sighand->siglock);
                return 0;
        }
-        if (!unlikely(options & WNOWAIT))
+        if (!unlikely(wo->wo_flags & WNOWAIT))
                p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
        uid = __task_cred(p)->uid;
        spin_unlock_irq(&p->sighand->siglock);
@@ -1434,17 +1447,17 @@ static int wait_task_continued(struct task_struct *p, int options,
        get_task_struct(p);
        read_unlock(&tasklist_lock);
-        if (!infop) {
+        if (!wo->wo_info) {
-                retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
+                retval = wo->wo_rusage
+                        ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
                put_task_struct(p);
-                if (!retval && stat_addr)
+                if (!retval && wo->wo_stat)
-                        retval = put_user(0xffff, stat_addr);
+                        retval = put_user(0xffff, wo->wo_stat);
                if (!retval)
                        retval = pid;
        } else {
-                retval = wait_noreap_copyout(p, pid, uid,
+                retval = wait_noreap_copyout(wo, p, pid, uid,
-                                             CLD_CONTINUED, SIGCONT,
+                                             CLD_CONTINUED, SIGCONT);
-                                             infop, ru);
                BUG_ON(retval == 0);
        }
@@ -1454,19 +1467,16 @@ static int wait_task_continued(struct task_struct *p, int options,
 /*
 * Consider @p for a wait by @parent.
 *
- * -ECHILD should be in *@notask_error before the first call.
+ * -ECHILD should be in ->notask_error before the first call.
 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
 * Returns zero if the search for a child should continue;
- * then *@notask_error is 0 if @p is an eligible child,
+ * then ->notask_error is 0 if @p is an eligible child,
 * or another error from security_task_wait(), or still -ECHILD.
 */
-static int wait_consider_task(struct task_struct *parent, int ptrace,
+static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent,
-                              struct task_struct *p, int *notask_error,
+                                int ptrace, struct task_struct *p)
-                              enum pid_type type, struct pid *pid, int options,
-                              struct siginfo __user *infop,
-                              int __user *stat_addr, struct rusage __user *ru)
 {
-        int ret = eligible_child(type, pid, options, p);
+        int ret = eligible_child(wo, p);
        if (!ret)
                return ret;
@@ -1478,17 +1488,17 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
                 * to look for security policy problems, rather
                 * than for mysterious wait bugs.
                 */
-                if (*notask_error)
+                if (wo->notask_error)
-                        *notask_error = ret;
+                        wo->notask_error = ret;
                return 0;
        }
-        if (likely(!ptrace) && unlikely(p->ptrace)) {
+        if (likely(!ptrace) && unlikely(task_ptrace(p))) {
                /*
                 * This child is hidden by ptrace.
                 * We aren't allowed to see it now, but eventually we will.
                 */
-                *notask_error = 0;
+                wo->notask_error = 0;
                return 0;
        }
@@ -1499,34 +1509,30 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
         * We don't reap group leaders with subthreads.
         */
        if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
-                return wait_task_zombie(p, options, infop, stat_addr, ru);
+                return wait_task_zombie(wo, p);
        /*
         * It's stopped or running now, so it might
         * later continue, exit, or stop again.
         */
-        *notask_error = 0;
+        wo->notask_error = 0;
        if (task_stopped_code(p, ptrace))
-                return wait_task_stopped(ptrace, p, options,
+                return wait_task_stopped(wo, ptrace, p);
-                                         infop, stat_addr, ru);
-        return wait_task_continued(p, options, infop, stat_addr, ru);
+        return wait_task_continued(wo, p);
 }
 /*
 * Do the work of do_wait() for one thread in the group, @tsk.
 *
- * -ECHILD should be in *@notask_error before the first call.
+ * -ECHILD should be in ->notask_error before the first call.
 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
 * Returns zero if the search for a child should continue; then
- * *@notask_error is 0 if there were any eligible children,
+ * ->notask_error is 0 if there were any eligible children,
 * or another error from security_task_wait(), or still -ECHILD.
 */
-static int do_wait_thread(struct task_struct *tsk, int *notask_error,
+static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
-                          enum pid_type type, struct pid *pid, int options,
-                          struct siginfo __user *infop, int __user *stat_addr,
-                          struct rusage __user *ru)
 {
        struct task_struct *p;
@@ -1535,9 +1541,7 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error,
                 * Do not consider detached threads.
                 */
                if (!task_detached(p)) {
-                        int ret = wait_consider_task(tsk, 0, p, notask_error,
+                        int ret = wait_consider_task(wo, tsk, 0, p);
-                                                     type, pid, options,
-                                                     infop, stat_addr, ru);
                        if (ret)
                                return ret;
                }
@@ -1546,22 +1550,12 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error,
        return 0;
 }
-static int ptrace_do_wait(struct task_struct *tsk, int *notask_error,
+static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
-                          enum pid_type type, struct pid *pid, int options,
-                          struct siginfo __user *infop, int __user *stat_addr,
-                          struct rusage __user *ru)
 {
        struct task_struct *p;
-        /*
-         * Traditionally we see ptrace'd stopped tasks regardless of options.
-         */
-        options |= WUNTRACED;
        list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
-                int ret = wait_consider_task(tsk, 1, p, notask_error,
+                int ret = wait_consider_task(wo, tsk, 1, p);
-                                             type, pid, options,
-                                             infop, stat_addr, ru);
                if (ret)
                        return ret;
        }
@@ -1569,65 +1563,59 @@ static int ptrace_do_wait(struct task_struct *tsk, int *notask_error,
        return 0;
 }
-static long do_wait(enum pid_type type, struct pid *pid, int options,
+static long do_wait(struct wait_opts *wo)
-                    struct siginfo __user *infop, int __user *stat_addr,
-                    struct rusage __user *ru)
 {
        DECLARE_WAITQUEUE(wait, current);
        struct task_struct *tsk;
        int retval;
-        trace_sched_process_wait(pid);
+        trace_sched_process_wait(wo->wo_pid);
        add_wait_queue(&current->signal->wait_chldexit,&wait);
 repeat:
        /*
         * If there is nothing that can match our critiera just get out.
-         * We will clear @retval to zero if we see any child that might later
+         * We will clear ->notask_error to zero if we see any child that
-         * match our criteria, even if we are not able to reap it yet.
+         * might later match our criteria, even if we are not able to reap
+         * it yet.
         */
-        retval = -ECHILD;
+        wo->notask_error = -ECHILD;
-        if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type])))
+        if ((wo->wo_type < PIDTYPE_MAX) &&
-                goto end;
+           (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
+                goto notask;
-        current->state = TASK_INTERRUPTIBLE;
+        set_current_state(TASK_INTERRUPTIBLE);
        read_lock(&tasklist_lock);
        tsk = current;
        do {
-                int tsk_result = do_wait_thread(tsk, &retval,
+                retval = do_wait_thread(wo, tsk);
-                                                type, pid, options,
+                if (retval)
-                                                infop, stat_addr, ru);
+                        goto end;
-                if (!tsk_result)
-                        tsk_result = ptrace_do_wait(tsk, &retval,
+                retval = ptrace_do_wait(wo, tsk);
-                                                    type, pid, options,
+                if (retval)
-                                                    infop, stat_addr, ru);
-                if (tsk_result) {
-                        /*
-                         * tasklist_lock is unlocked and we have a final result.
-                         */
-                        retval = tsk_result;
                        goto end;
-                }
-                if (options & __WNOTHREAD)
+                if (wo->wo_flags & __WNOTHREAD)
                        break;
-                tsk = next_thread(tsk);
+        } while_each_thread(current, tsk);
-                BUG_ON(tsk->signal != current->signal);
-        } while (tsk != current);
        read_unlock(&tasklist_lock);
-        if (!retval && !(options & WNOHANG)) {
+notask:
+        retval = wo->notask_error;
+        if (!retval && !(wo->wo_flags & WNOHANG)) {
                retval = -ERESTARTSYS;
                if (!signal_pending(current)) {
                        schedule();
                        goto repeat;
                }
        }
 end:
-        current->state = TASK_RUNNING;
+        __set_current_state(TASK_RUNNING);
        remove_wait_queue(&current->signal->wait_chldexit,&wait);
-        if (infop) {
+        if (wo->wo_info) {
+                struct siginfo __user *infop = wo->wo_info;
                if (retval > 0)
                        retval = 0;
                else {
@@ -1656,6 +1644,7 @@ end:
 SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
                infop, int, options, struct rusage __user *, ru)
 {
+        struct wait_opts wo;
        struct pid *pid = NULL;
        enum pid_type type;
        long ret;
@@ -1685,7 +1674,14 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
        if (type < PIDTYPE_MAX)
                pid = find_get_pid(upid);
-        ret = do_wait(type, pid, options, infop, NULL, ru);
+        wo.wo_type      = type;
+        wo.wo_pid       = pid;
+        wo.wo_flags     = options;
+        wo.wo_info      = infop;
+        wo.wo_stat      = NULL;
+        wo.wo_rusage    = ru;
+        ret = do_wait(&wo);
        put_pid(pid);
        /* avoid REGPARM breakage on x86: */
@@ -1696,6 +1692,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
                int, options, struct rusage __user *, ru)
 {
+        struct wait_opts wo;
        struct pid *pid = NULL;
        enum pid_type type;
        long ret;
@@ -1717,7 +1714,13 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
                pid = find_get_pid(upid);
        }
-        ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru);
+        wo.wo_type      = type;
+        wo.wo_pid       = pid;
+        wo.wo_flags     = options | WEXITED;
+        wo.wo_info      = NULL;
+        wo.wo_stat      = stat_addr;
+        wo.wo_rusage    = ru;
+        ret = do_wait(&wo);
        put_pid(pid);
        /* avoid REGPARM breakage on x86: */
diff --git a/kernel/fork.c b/kernel/fork.c
index be022c200da6..467746b3f0aa 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1029,7 +1029,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        p->vfork_done = NULL;
        spin_lock_init(&p->alloc_lock);
-        clear_tsk_thread_flag(p, TIF_SIGPENDING);
        init_sigpending(&p->pending);
        p->utime = cputime_zero;
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
new file mode 100644
index 000000000000..22e9dcfaa3d3
--- /dev/null
+++ b/kernel/gcov/Kconfig
@@ -0,0 +1,48 @@
+menu "GCOV-based kernel profiling"
+config GCOV_KERNEL
+        bool "Enable gcov-based kernel profiling"
+        depends on DEBUG_FS && CONSTRUCTORS
+        default n
+        ---help---
+        This option enables gcov-based code profiling (e.g. for code coverage
+        measurements).
+        If unsure, say N.
+        Additionally specify CONFIG_GCOV_PROFILE_ALL=y to get profiling data
+        for the entire kernel. To enable profiling for specific files or
+        directories, add a line similar to the following to the respective
+        Makefile:
+        For a single file (e.g. main.o):
+                GCOV_PROFILE_main.o := y
+        For all files in one directory:
+                GCOV_PROFILE := y
+        To exclude files from being profiled even when CONFIG_GCOV_PROFILE_ALL
+        is specified, use:
+                GCOV_PROFILE_main.o := n
+        and:
+                GCOV_PROFILE := n
+        Note that the debugfs filesystem has to be mounted to access
+        profiling data.
+config GCOV_PROFILE_ALL
+        bool "Profile entire Kernel"
+        depends on GCOV_KERNEL
+        depends on S390 || X86
+        default n
+        ---help---
+        This options activates profiling for the entire kernel.
+        If unsure, say N.
+        Note that a kernel compiled with profiling flags will be significantly
+        larger and run slower. Also be sure to exclude files from profiling
+        which are not linked to the kernel image to prevent linker errors.
+endmenu
diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
new file mode 100644
index 000000000000..3f761001d517
--- /dev/null
+++ b/kernel/gcov/Makefile
@@ -0,0 +1,3 @@
+EXTRA_CFLAGS := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"'
+obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o
diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
new file mode 100644
index 000000000000..9b22d03cc581
--- /dev/null
+++ b/kernel/gcov/base.c
@@ -0,0 +1,148 @@
+/*
+ *  This code maintains a list of active profiling data structures.
+ *
+ *    Copyright IBM Corp. 2009
+ *    Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *
+ *    Uses gcc-internal data definitions.
+ *    Based on the gcov-kernel patch by:
+ *               Hubertus Franke <frankeh@us.ibm.com>
+ *               Nigel Hinds <nhinds@us.ibm.com>
+ *               Rajan Ravindran <rajancr@us.ibm.com>
+ *               Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *               Paul Larson
+ */
+#define pr_fmt(fmt)     "gcov: " fmt
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include "gcov.h"
+static struct gcov_info *gcov_info_head;
+static int gcov_events_enabled;
+static DEFINE_MUTEX(gcov_lock);
+/*
+ * __gcov_init is called by gcc-generated constructor code for each object
+ * file compiled with -fprofile-arcs.
+ */
+void __gcov_init(struct gcov_info *info)
+{
+        static unsigned int gcov_version;
+        mutex_lock(&gcov_lock);
+        if (gcov_version == 0) {
+                gcov_version = info->version;
+                /*
+                 * Printing gcc's version magic may prove useful for debugging
+                 * incompatibility reports.
+                 */
+                pr_info("version magic: 0x%x\n", gcov_version);
+        }
+        /*
+         * Add new profiling data structure to list and inform event
+         * listener.
+         */
+        info->next = gcov_info_head;
+        gcov_info_head = info;
+        if (gcov_events_enabled)
+                gcov_event(GCOV_ADD, info);
+        mutex_unlock(&gcov_lock);
+}
+EXPORT_SYMBOL(__gcov_init);
+/*
+ * These functions may be referenced by gcc-generated profiling code but serve
+ * no function for kernel profiling.
+ */
+void __gcov_flush(void)
+{
+        /* Unused. */
+}
+EXPORT_SYMBOL(__gcov_flush);
+void __gcov_merge_add(gcov_type *counters, unsigned int n_counters)
+{
+        /* Unused. */
+}
+EXPORT_SYMBOL(__gcov_merge_add);
+void __gcov_merge_single(gcov_type *counters, unsigned int n_counters)
+{
+        /* Unused. */
+}
+EXPORT_SYMBOL(__gcov_merge_single);
+void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters)
+{
+        /* Unused. */
+}
+EXPORT_SYMBOL(__gcov_merge_delta);
+/**
+ * gcov_enable_events - enable event reporting through gcov_event()
+ *
+ * Turn on reporting of profiling data load/unload-events through the
+ * gcov_event() callback. Also replay all previous events once. This function
+ * is needed because some events are potentially generated too early for the
+ * callback implementation to handle them initially.
+ */
+void gcov_enable_events(void)
+{
+        struct gcov_info *info;
+        mutex_lock(&gcov_lock);
+        gcov_events_enabled = 1;
+        /* Perform event callback for previously registered entries. */
+        for (info = gcov_info_head; info; info = info->next)
+                gcov_event(GCOV_ADD, info);
+        mutex_unlock(&gcov_lock);
+}
+#ifdef CONFIG_MODULES
+static inline int within(void *addr, void *start, unsigned long size)
+{
+        return ((addr >= start) && (addr < start + size));
+}
+/* Update list and generate events when modules are unloaded. */
+static int gcov_module_notifier(struct notifier_block *nb, unsigned long event,
+                                void *data)
+{
+        struct module *mod = data;
+        struct gcov_info *info;
+        struct gcov_info *prev;
+        if (event != MODULE_STATE_GOING)
+                return NOTIFY_OK;
+        mutex_lock(&gcov_lock);
+        prev = NULL;
+        /* Remove entries located in module from linked list. */
+        for (info = gcov_info_head; info; info = info->next) {
+                if (within(info, mod->module_core, mod->core_size)) {
+                        if (prev)
+                                prev->next = info->next;
+                        else
+                                gcov_info_head = info->next;
+                        if (gcov_events_enabled)
+                                gcov_event(GCOV_REMOVE, info);
+                } else
+                        prev = info;
+        }
+        mutex_unlock(&gcov_lock);
+        return NOTIFY_OK;
+}
+static struct notifier_block gcov_nb = {
+        .notifier_call  = gcov_module_notifier,
+};
+static int __init gcov_init(void)
+{
+        return register_module_notifier(&gcov_nb);
+}
+device_initcall(gcov_init);
+#endif /* CONFIG_MODULES */
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c
new file mode 100644
index 000000000000..ef3c3f88a7a3
--- /dev/null
+++ b/kernel/gcov/fs.c
@@ -0,0 +1,673 @@
+/*
+ *  This code exports profiling data as debugfs files to userspace.
+ *
+ *    Copyright IBM Corp. 2009
+ *    Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *
+ *    Uses gcc-internal data definitions.
+ *    Based on the gcov-kernel patch by:
+ *               Hubertus Franke <frankeh@us.ibm.com>
+ *               Nigel Hinds <nhinds@us.ibm.com>
+ *               Rajan Ravindran <rajancr@us.ibm.com>
+ *               Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *               Paul Larson
+ *               Yi CDL Yang
+ */
+#define pr_fmt(fmt)     "gcov: " fmt
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/seq_file.h>
+#include "gcov.h"
+/**
+ * struct gcov_node - represents a debugfs entry
+ * @list: list head for child node list
+ * @children: child nodes
+ * @all: list head for list of all nodes
+ * @parent: parent node
+ * @info: associated profiling data structure if not a directory
+ * @ghost: when an object file containing profiling data is unloaded we keep a
+ *         copy of the profiling data here to allow collecting coverage data
+ *         for cleanup code. Such a node is called a "ghost".
+ * @dentry: main debugfs entry, either a directory or data file
+ * @links: associated symbolic links
+ * @name: data file basename
+ *
+ * struct gcov_node represents an entity within the gcov/ subdirectory
+ * of debugfs. There are directory and data file nodes. The latter represent
+ * the actual synthesized data file plus any associated symbolic links which
+ * are needed by the gcov tool to work correctly.
+ */
+struct gcov_node {
+        struct list_head list;
+        struct list_head children;
+        struct list_head all;
+        struct gcov_node *parent;
+        struct gcov_info *info;
+        struct gcov_info *ghost;
+        struct dentry *dentry;
+        struct dentry **links;
+        char name[0];
+};
+static const char objtree[] = OBJTREE;
+static const char srctree[] = SRCTREE;
+static struct gcov_node root_node;
+static struct dentry *reset_dentry;
+static LIST_HEAD(all_head);
+static DEFINE_MUTEX(node_lock);
+/* If non-zero, keep copies of profiling data for unloaded modules. */
+static int gcov_persist = 1;
+static int __init gcov_persist_setup(char *str)
+{
+        unsigned long val;
+        if (strict_strtoul(str, 0, &val)) {
+                pr_warning("invalid gcov_persist parameter '%s'\n", str);
+                return 0;
+        }
+        gcov_persist = val;
+        pr_info("setting gcov_persist to %d\n", gcov_persist);
+        return 1;
+}
+__setup("gcov_persist=", gcov_persist_setup);
+/*
+ * seq_file.start() implementation for gcov data files. Note that the
+ * gcov_iterator interface is designed to be more restrictive than seq_file
+ * (no start from arbitrary position, etc.), to simplify the iterator
+ * implementation.
+ */
+static void *gcov_seq_start(struct seq_file *seq, loff_t *pos)
+{
+        loff_t i;
+        gcov_iter_start(seq->private);
+        for (i = 0; i < *pos; i++) {
+                if (gcov_iter_next(seq->private))
+                        return NULL;
+        }
+        return seq->private;
+}
+/* seq_file.next() implementation for gcov data files. */
+static void *gcov_seq_next(struct seq_file *seq, void *data, loff_t *pos)
+{
+        struct gcov_iterator *iter = data;
+        if (gcov_iter_next(iter))
+                return NULL;
+        (*pos)++;
+        return iter;
+}
+/* seq_file.show() implementation for gcov data files. */
+static int gcov_seq_show(struct seq_file *seq, void *data)
+{
+        struct gcov_iterator *iter = data;
+        if (gcov_iter_write(iter, seq))
+                return -EINVAL;
+        return 0;
+}
+static void gcov_seq_stop(struct seq_file *seq, void *data)
+{
+        /* Unused. */
+}
+static const struct seq_operations gcov_seq_ops = {
+        .start  = gcov_seq_start,
+        .next   = gcov_seq_next,
+        .show   = gcov_seq_show,
+        .stop   = gcov_seq_stop,
+};
+/*
+ * Return the profiling data set for a given node. This can either be the
+ * original profiling data structure or a duplicate (also called "ghost")
+ * in case the associated object file has been unloaded.
+ */
+static struct gcov_info *get_node_info(struct gcov_node *node)
+{
+        if (node->info)
+                return node->info;
+        return node->ghost;
+}
+/*
+ * open() implementation for gcov data files. Create a copy of the profiling
+ * data set and initialize the iterator and seq_file interface.
+ */
+static int gcov_seq_open(struct inode *inode, struct file *file)
+{
+        struct gcov_node *node = inode->i_private;
+        struct gcov_iterator *iter;
+        struct seq_file *seq;
+        struct gcov_info *info;
+        int rc = -ENOMEM;
+        mutex_lock(&node_lock);
+        /*
+         * Read from a profiling data copy to minimize reference tracking
+         * complexity and concurrent access.
+         */
+        info = gcov_info_dup(get_node_info(node));
+        if (!info)
+                goto out_unlock;
+        iter = gcov_iter_new(info);
+        if (!iter)
+                goto err_free_info;
+        rc = seq_open(file, &gcov_seq_ops);
+        if (rc)
+                goto err_free_iter_info;
+        seq = file->private_data;
+        seq->private = iter;
+out_unlock:
+        mutex_unlock(&node_lock);
+        return rc;
+err_free_iter_info:
+        gcov_iter_free(iter);
+err_free_info:
+        gcov_info_free(info);
+        goto out_unlock;
+}
+/*
+ * release() implementation for gcov data files. Release resources allocated
+ * by open().
+ */
+static int gcov_seq_release(struct inode *inode, struct file *file)
+{
+        struct gcov_iterator *iter;
+        struct gcov_info *info;
+        struct seq_file *seq;
+        seq = file->private_data;
+        iter = seq->private;
+        info = gcov_iter_get_info(iter);
+        gcov_iter_free(iter);
+        gcov_info_free(info);
+        seq_release(inode, file);
+        return 0;
+}
+/*
+ * Find a node by the associated data file name. Needs to be called with
+ * node_lock held.
+ */
+static struct gcov_node *get_node_by_name(const char *name)
+{
+        struct gcov_node *node;
+        struct gcov_info *info;
+        list_for_each_entry(node, &all_head, all) {
+                info = get_node_info(node);
+                if (info && (strcmp(info->filename, name) == 0))
+                        return node;
+        }
+        return NULL;
+}
+static void remove_node(struct gcov_node *node);
+/*
+ * write() implementation for gcov data files. Reset profiling data for the
+ * associated file. If the object file has been unloaded (i.e. this is
+ * a "ghost" node), remove the debug fs node as well.
+ */
+static ssize_t gcov_seq_write(struct file *file, const char __user *addr,
+                              size_t len, loff_t *pos)
+{
+        struct seq_file *seq;
+        struct gcov_info *info;
+        struct gcov_node *node;
+        seq = file->private_data;
+        info = gcov_iter_get_info(seq->private);
+        mutex_lock(&node_lock);
+        node = get_node_by_name(info->filename);
+        if (node) {
+                /* Reset counts or remove node for unloaded modules. */
+                if (node->ghost)
+                        remove_node(node);
+                else
+                        gcov_info_reset(node->info);
+        }
+        /* Reset counts for open file. */
+        gcov_info_reset(info);
+        mutex_unlock(&node_lock);
+        return len;
+}
+/*
+ * Given a string <path> representing a file path of format:
+ *   path/to/file.gcda
+ * construct and return a new string:
+ *   <dir/>path/to/file.<ext>
+ */
+static char *link_target(const char *dir, const char *path, const char *ext)
+{
+        char *target;
+        char *old_ext;
+        char *copy;
+        copy = kstrdup(path, GFP_KERNEL);
+        if (!copy)
+                return NULL;
+        old_ext = strrchr(copy, '.');
+        if (old_ext)
+                *old_ext = '\0';
+        if (dir)
+                target = kasprintf(GFP_KERNEL, "%s/%s.%s", dir, copy, ext);
+        else
+                target = kasprintf(GFP_KERNEL, "%s.%s", copy, ext);
+        kfree(copy);
+        return target;
+}
+/*
+ * Construct a string representing the symbolic link target for the given
+ * gcov data file name and link type. Depending on the link type and the
+ * location of the data file, the link target can either point to a
+ * subdirectory of srctree, objtree or in an external location.
+ */
+static char *get_link_target(const char *filename, const struct gcov_link *ext)
+{
+        const char *rel;
+        char *result;
+        if (strncmp(filename, objtree, strlen(objtree)) == 0) {
+                rel = filename + strlen(objtree) + 1;
+                if (ext->dir == SRC_TREE)
+                        result = link_target(srctree, rel, ext->ext);
+                else
+                        result = link_target(objtree, rel, ext->ext);
+        } else {
+                /* External compilation. */
+                result = link_target(NULL, filename, ext->ext);
+        }
+        return result;
+}
+#define SKEW_PREFIX     ".tmp_"
+/*
+ * For a filename .tmp_filename.ext return filename.ext. Needed to compensate
+ * for filename skewing caused by the mod-versioning mechanism.
+ */
+static const char *deskew(const char *basename)
+{
+        if (strncmp(basename, SKEW_PREFIX, sizeof(SKEW_PREFIX) - 1) == 0)
+                return basename + sizeof(SKEW_PREFIX) - 1;
+        return basename;
+}
+/*
+ * Create links to additional files (usually .c and .gcno files) which the
+ * gcov tool expects to find in the same directory as the gcov data file.
+ */
+static void add_links(struct gcov_node *node, struct dentry *parent)
+{
+        char *basename;
+        char *target;
+        int num;
+        int i;
+        for (num = 0; gcov_link[num].ext; num++)
+                /* Nothing. */;
+        node->links = kcalloc(num, sizeof(struct dentry *), GFP_KERNEL);
+        if (!node->links)
+                return;
+        for (i = 0; i < num; i++) {
+                target = get_link_target(get_node_info(node)->filename,
+                                         &gcov_link[i]);
+                if (!target)
+                        goto out_err;
+                basename = strrchr(target, '/');
+                if (!basename)
+                        goto out_err;
+                basename++;
+                node->links[i] = debugfs_create_symlink(deskew(basename),
+                                                        parent, target);
+                if (!node->links[i])
+                        goto out_err;
+                kfree(target);
+        }
+        return;
+out_err:
+        kfree(target);
+        while (i-- > 0)
+                debugfs_remove(node->links[i]);
+        kfree(node->links);
+        node->links = NULL;
+}
+static const struct file_operations gcov_data_fops = {
+        .open           = gcov_seq_open,
+        .release        = gcov_seq_release,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .write          = gcov_seq_write,
+};
+/* Basic initialization of a new node. */
+static void init_node(struct gcov_node *node, struct gcov_info *info,
+                      const char *name, struct gcov_node *parent)
+{
+        INIT_LIST_HEAD(&node->list);
+        INIT_LIST_HEAD(&node->children);
+        INIT_LIST_HEAD(&node->all);
+        node->info = info;
+        node->parent = parent;
+        if (name)
+                strcpy(node->name, name);
+}
+/*
+ * Create a new node and associated debugfs entry. Needs to be called with
+ * node_lock held.
+ */
+static struct gcov_node *new_node(struct gcov_node *parent,
+                                  struct gcov_info *info, const char *name)
+{
+        struct gcov_node *node;
+        node = kzalloc(sizeof(struct gcov_node) + strlen(name) + 1, GFP_KERNEL);
+        if (!node) {
+                pr_warning("out of memory\n");
+                return NULL;
+        }
+        init_node(node, info, name, parent);
+        /* Differentiate between gcov data file nodes and directory nodes. */
+        if (info) {
+                node->dentry = debugfs_create_file(deskew(node->name), 0600,
+                                        parent->dentry, node, &gcov_data_fops);
+        } else
+                node->dentry = debugfs_create_dir(node->name, parent->dentry);
+        if (!node->dentry) {
+                pr_warning("could not create file\n");
+                kfree(node);
+                return NULL;
+        }
+        if (info)
+                add_links(node, parent->dentry);
+        list_add(&node->list, &parent->children);
+        list_add(&node->all, &all_head);
+        return node;
+}
+/* Remove symbolic links associated with node. */
+static void remove_links(struct gcov_node *node)
+{
+        int i;
+        if (!node->links)
+                return;
+        for (i = 0; gcov_link[i].ext; i++)
+                debugfs_remove(node->links[i]);
+        kfree(node->links);
+        node->links = NULL;
+}
+/*
+ * Remove node from all lists and debugfs and release associated resources.
+ * Needs to be called with node_lock held.
+ */
+static void release_node(struct gcov_node *node)
+{
+        list_del(&node->list);
+        list_del(&node->all);
+        debugfs_remove(node->dentry);
+        remove_links(node);
+        if (node->ghost)
+                gcov_info_free(node->ghost);
+        kfree(node);
+}
+/* Release node and empty parents. Needs to be called with node_lock held. */
+static void remove_node(struct gcov_node *node)
+{
+        struct gcov_node *parent;
+        while ((node != &root_node) && list_empty(&node->children)) {
+                parent = node->parent;
+                release_node(node);
+                node = parent;
+        }
+}
+/*
+ * Find child node with given basename. Needs to be called with node_lock
+ * held.
+ */
+static struct gcov_node *get_child_by_name(struct gcov_node *parent,
+                                           const char *name)
+{
+        struct gcov_node *node;
+        list_for_each_entry(node, &parent->children, list) {
+                if (strcmp(node->name, name) == 0)
+                        return node;
+        }
+        return NULL;
+}
+/*
+ * write() implementation for reset file. Reset all profiling data to zero
+ * and remove ghost nodes.
+ */
+static ssize_t reset_write(struct file *file, const char __user *addr,
+                           size_t len, loff_t *pos)
+{
+        struct gcov_node *node;
+        mutex_lock(&node_lock);
+restart:
+        list_for_each_entry(node, &all_head, all) {
+                if (node->info)
+                        gcov_info_reset(node->info);
+                else if (list_empty(&node->children)) {
+                        remove_node(node);
+                        /* Several nodes may have gone - restart loop. */
+                        goto restart;
+                }
+        }
+        mutex_unlock(&node_lock);
+        return len;
+}
+/* read() implementation for reset file. Unused. */
+static ssize_t reset_read(struct file *file, char __user *addr, size_t len,
+                          loff_t *pos)
+{
+        /* Allow read operation so that a recursive copy won't fail. */
+        return 0;
+}
+static const struct file_operations gcov_reset_fops = {
+        .write  = reset_write,
+        .read   = reset_read,
+};
+/*
+ * Create a node for a given profiling data set and add it to all lists and
+ * debugfs. Needs to be called with node_lock held.
+ */
+static void add_node(struct gcov_info *info)
+{
+        char *filename;
+        char *curr;
+        char *next;
+        struct gcov_node *parent;
+        struct gcov_node *node;
+        filename = kstrdup(info->filename, GFP_KERNEL);
+        if (!filename)
+                return;
+        parent = &root_node;
+        /* Create directory nodes along the path. */
+        for (curr = filename; (next = strchr(curr, '/')); curr = next + 1) {
+                if (curr == next)
+                        continue;
+                *next = 0;
+                if (strcmp(curr, ".") == 0)
+                        continue;
+                if (strcmp(curr, "..") == 0) {
+                        if (!parent->parent)
+                                goto err_remove;
+                        parent = parent->parent;
+                        continue;
+                }
+                node = get_child_by_name(parent, curr);
+                if (!node) {
+                        node = new_node(parent, NULL, curr);
+                        if (!node)
+                                goto err_remove;
+                }
+                parent = node;
+        }
+        /* Create file node. */
+        node = new_node(parent, info, curr);
+        if (!node)
+                goto err_remove;
+out:
+        kfree(filename);
+        return;
+err_remove:
+        remove_node(parent);
+        goto out;
+}
+/*
+ * The profiling data set associated with this node is being unloaded. Store a
+ * copy of the profiling data and turn this node into a "ghost".
+ */
+static int ghost_node(struct gcov_node *node)
+{
+        node->ghost = gcov_info_dup(node->info);
+        if (!node->ghost) {
+                pr_warning("could not save data for '%s' (out of memory)\n",
+                           node->info->filename);
+                return -ENOMEM;
+        }
+        node->info = NULL;
+        return 0;
+}
+/*
+ * Profiling data for this node has been loaded again. Add profiling data
+ * from previous instantiation and turn this node into a regular node.
+ */
+static void revive_node(struct gcov_node *node, struct gcov_info *info)
+{
+        if (gcov_info_is_compatible(node->ghost, info))
+                gcov_info_add(info, node->ghost);
+        else {
+                pr_warning("discarding saved data for '%s' (version changed)\n",
+                           info->filename);
+        }
+        gcov_info_free(node->ghost);
+        node->ghost = NULL;
+        node->info = info;
+}
+/*
+ * Callback to create/remove profiling files when code compiled with
+ * -fprofile-arcs is loaded/unloaded.
+ */
+void gcov_event(enum gcov_action action, struct gcov_info *info)
+{
+        struct gcov_node *node;
+        mutex_lock(&node_lock);
+        node = get_node_by_name(info->filename);
+        switch (action) {
+        case GCOV_ADD:
+                /* Add new node or revive ghost. */
+                if (!node) {
+                        add_node(info);
+                        break;
+                }
+                if (gcov_persist)
+                        revive_node(node, info);
+                else {
+                        pr_warning("could not add '%s' (already exists)\n",
+                                   info->filename);
+                }
+                break;
+        case GCOV_REMOVE:
+                /* Remove node or turn into ghost. */
+                if (!node) {
+                        pr_warning("could not remove '%s' (not found)\n",
+                                   info->filename);
+                        break;
+                }
+                if (gcov_persist) {
+                        if (!ghost_node(node))
+                                break;
+                }
+                remove_node(node);
+                break;
+        }
+        mutex_unlock(&node_lock);
+}
+/* Create debugfs entries. */
+static __init int gcov_fs_init(void)
+{
+        int rc = -EIO;
+        init_node(&root_node, NULL, NULL, NULL);
+        /*
+         * /sys/kernel/debug/gcov will be parent for the reset control file
+         * and all profiling files.
+         */
+        root_node.dentry = debugfs_create_dir("gcov", NULL);
+        if (!root_node.dentry)
+                goto err_remove;
+        /*
+         * Create reset file which resets all profiling counts when written
+         * to.
+         */
+        reset_dentry = debugfs_create_file("reset", 0600, root_node.dentry,
+                                           NULL, &gcov_reset_fops);
+        if (!reset_dentry)
+                goto err_remove;
+        /* Replay previous events to get our fs hierarchy up-to-date. */
+        gcov_enable_events();
+        return 0;
+err_remove:
+        pr_err("init failed\n");
+        if (root_node.dentry)
+                debugfs_remove(root_node.dentry);
+        return rc;
+}
+device_initcall(gcov_fs_init);
diff --git a/kernel/gcov/gcc_3_4.c b/kernel/gcov/gcc_3_4.c
new file mode 100644
index 000000000000..ae5bb4260033
--- /dev/null
+++ b/kernel/gcov/gcc_3_4.c
@@ -0,0 +1,447 @@
+/*
+ *  This code provides functions to handle gcc's profiling data format
+ *  introduced with gcc 3.4. Future versions of gcc may change the gcov
+ *  format (as happened before), so all format-specific information needs
+ *  to be kept modular and easily exchangeable.
+ *
+ *  This file is based on gcc-internal definitions. Functions and data
+ *  structures are defined to be compatible with gcc counterparts.
+ *  For a better understanding, refer to gcc source: gcc/gcov-io.h.
+ *
+ *    Copyright IBM Corp. 2009
+ *    Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *
+ *    Uses gcc-internal data definitions.
+ */
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+#include "gcov.h"
+/* Symbolic links to be created for each profiling data file. */
+const struct gcov_link gcov_link[] = {
+        { OBJ_TREE, "gcno" },   /* Link to .gcno file in $(objtree). */
+        { 0, NULL},
+};
+/*
+ * Determine whether a counter is active. Based on gcc magic. Doesn't change
+ * at run-time.
+ */
+static int counter_active(struct gcov_info *info, unsigned int type)
+{
+        return (1 << type) & info->ctr_mask;
+}
+/* Determine number of active counters. Based on gcc magic. */
+static unsigned int num_counter_active(struct gcov_info *info)
+{
+        unsigned int i;
+        unsigned int result = 0;
+        for (i = 0; i < GCOV_COUNTERS; i++) {
+                if (counter_active(info, i))
+                        result++;
+        }
+        return result;
+}
+/**
+ * gcov_info_reset - reset profiling data to zero
+ * @info: profiling data set
+ */
+void gcov_info_reset(struct gcov_info *info)
+{
+        unsigned int active = num_counter_active(info);
+        unsigned int i;
+        for (i = 0; i < active; i++) {
+                memset(info->counts[i].values, 0,
+                       info->counts[i].num * sizeof(gcov_type));
+        }
+}
+/**
+ * gcov_info_is_compatible - check if profiling data can be added
+ * @info1: first profiling data set
+ * @info2: second profiling data set
+ *
+ * Returns non-zero if profiling data can be added, zero otherwise.
+ */
+int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2)
+{
+        return (info1->stamp == info2->stamp);
+}
+/**
+ * gcov_info_add - add up profiling data
+ * @dest: profiling data set to which data is added
+ * @source: profiling data set which is added
+ *
+ * Adds profiling counts of @source to @dest.
+ */
+void gcov_info_add(struct gcov_info *dest, struct gcov_info *source)
+{
+        unsigned int i;
+        unsigned int j;
+        for (i = 0; i < num_counter_active(dest); i++) {
+                for (j = 0; j < dest->counts[i].num; j++) {
+                        dest->counts[i].values[j] +=
+                                source->counts[i].values[j];
+                }
+        }
+}
+/* Get size of function info entry. Based on gcc magic. */
+static size_t get_fn_size(struct gcov_info *info)
+{
+        size_t size;
+        size = sizeof(struct gcov_fn_info) + num_counter_active(info) *
+               sizeof(unsigned int);
+        if (__alignof__(struct gcov_fn_info) > sizeof(unsigned int))
+                size = ALIGN(size, __alignof__(struct gcov_fn_info));
+        return size;
+}
+/* Get address of function info entry. Based on gcc magic. */
+static struct gcov_fn_info *get_fn_info(struct gcov_info *info, unsigned int fn)
+{
+        return (struct gcov_fn_info *)
+                ((char *) info->functions + fn * get_fn_size(info));
+}
+/**
+ * gcov_info_dup - duplicate profiling data set
+ * @info: profiling data set to duplicate
+ *
+ * Return newly allocated duplicate on success, %NULL on error.
+ */
+struct gcov_info *gcov_info_dup(struct gcov_info *info)
+{
+        struct gcov_info *dup;
+        unsigned int i;
+        unsigned int active;
+        /* Duplicate gcov_info. */
+        active = num_counter_active(info);
+        dup = kzalloc(sizeof(struct gcov_info) +
+                      sizeof(struct gcov_ctr_info) * active, GFP_KERNEL);
+        if (!dup)
+                return NULL;
+        dup->version            = info->version;
+        dup->stamp              = info->stamp;
+        dup->n_functions        = info->n_functions;
+        dup->ctr_mask           = info->ctr_mask;
+        /* Duplicate filename. */
+        dup->filename           = kstrdup(info->filename, GFP_KERNEL);
+        if (!dup->filename)
+                goto err_free;
+        /* Duplicate table of functions. */
+        dup->functions = kmemdup(info->functions, info->n_functions *
+                                 get_fn_size(info), GFP_KERNEL);
+        if (!dup->functions)
+                goto err_free;
+        /* Duplicate counter arrays. */
+        for (i = 0; i < active ; i++) {
+                struct gcov_ctr_info *ctr = &info->counts[i];
+                size_t size = ctr->num * sizeof(gcov_type);
+                dup->counts[i].num = ctr->num;
+                dup->counts[i].merge = ctr->merge;
+                dup->counts[i].values = vmalloc(size);
+                if (!dup->counts[i].values)
+                        goto err_free;
+                memcpy(dup->counts[i].values, ctr->values, size);
+        }
+        return dup;
+err_free:
+        gcov_info_free(dup);
+        return NULL;
+}
+/**
+ * gcov_info_free - release memory for profiling data set duplicate
+ * @info: profiling data set duplicate to free
+ */
+void gcov_info_free(struct gcov_info *info)
+{
+        unsigned int active = num_counter_active(info);
+        unsigned int i;
+        for (i = 0; i < active ; i++)
+                vfree(info->counts[i].values);
+        kfree(info->functions);
+        kfree(info->filename);
+        kfree(info);
+}
+/**
+ * struct type_info - iterator helper array
+ * @ctr_type: counter type
+ * @offset: index of the first value of the current function for this type
+ *
+ * This array is needed to convert the in-memory data format into the in-file
+ * data format:
+ *
+ * In-memory:
+ *   for each counter type
+ *     for each function
+ *       values
+ *
+ * In-file:
+ *   for each function
+ *     for each counter type
+ *       values
+ *
+ * See gcc source gcc/gcov-io.h for more information on data organization.
+ */
+struct type_info {
+        int ctr_type;
+        unsigned int offset;
+};
+/**
+ * struct gcov_iterator - specifies current file position in logical records
+ * @info: associated profiling data
+ * @record: record type
+ * @function: function number
+ * @type: counter type
+ * @count: index into values array
+ * @num_types: number of counter types
+ * @type_info: helper array to get values-array offset for current function
+ */
+struct gcov_iterator {
+        struct gcov_info *info;
+        int record;
+        unsigned int function;
+        unsigned int type;
+        unsigned int count;
+        int num_types;
+        struct type_info type_info[0];
+};
+static struct gcov_fn_info *get_func(struct gcov_iterator *iter)
+{
+        return get_fn_info(iter->info, iter->function);
+}
+static struct type_info *get_type(struct gcov_iterator *iter)
+{
+        return &iter->type_info[iter->type];
+}
+/**
+ * gcov_iter_new - allocate and initialize profiling data iterator
+ * @info: profiling data set to be iterated
+ *
+ * Return file iterator on success, %NULL otherwise.
+ */
+struct gcov_iterator *gcov_iter_new(struct gcov_info *info)
+{
+        struct gcov_iterator *iter;
+        iter = kzalloc(sizeof(struct gcov_iterator) +
+                       num_counter_active(info) * sizeof(struct type_info),
+                       GFP_KERNEL);
+        if (iter)
+                iter->info = info;
+        return iter;
+}
+/**
+ * gcov_iter_free - release memory for iterator
+ * @iter: file iterator to free
+ */
+void gcov_iter_free(struct gcov_iterator *iter)
+{
+        kfree(iter);
+}
+/**
+ * gcov_iter_get_info - return profiling data set for given file iterator
+ * @iter: file iterator
+ */
+struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter)
+{
+        return iter->info;
+}
+/**
+ * gcov_iter_start - reset file iterator to starting position
+ * @iter: file iterator
+ */
+void gcov_iter_start(struct gcov_iterator *iter)
+{
+        int i;
+        iter->record = 0;
+        iter->function = 0;
+        iter->type = 0;
+        iter->count = 0;
+        iter->num_types = 0;
+        for (i = 0; i < GCOV_COUNTERS; i++) {
+                if (counter_active(iter->info, i)) {
+                        iter->type_info[iter->num_types].ctr_type = i;
+                        iter->type_info[iter->num_types++].offset = 0;
+                }
+        }
+}
+/* Mapping of logical record number to actual file content. */
+#define RECORD_FILE_MAGIC       0
+#define RECORD_GCOV_VERSION     1
+#define RECORD_TIME_STAMP       2
+#define RECORD_FUNCTION_TAG     3
+#define RECORD_FUNCTON_TAG_LEN  4
+#define RECORD_FUNCTION_IDENT   5
+#define RECORD_FUNCTION_CHECK   6
+#define RECORD_COUNT_TAG        7
+#define RECORD_COUNT_LEN        8
+#define RECORD_COUNT            9
+/**
+ * gcov_iter_next - advance file iterator to next logical record
+ * @iter: file iterator
+ *
+ * Return zero if new position is valid, non-zero if iterator has reached end.
+ */
+int gcov_iter_next(struct gcov_iterator *iter)
+{
+        switch (iter->record) {
+        case RECORD_FILE_MAGIC:
+        case RECORD_GCOV_VERSION:
+        case RECORD_FUNCTION_TAG:
+        case RECORD_FUNCTON_TAG_LEN:
+        case RECORD_FUNCTION_IDENT:
+        case RECORD_COUNT_TAG:
+                /* Advance to next record */
+                iter->record++;
+                break;
+        case RECORD_COUNT:
+                /* Advance to next count */
+                iter->count++;
+                /* fall through */
+        case RECORD_COUNT_LEN:
+                if (iter->count < get_func(iter)->n_ctrs[iter->type]) {
+                        iter->record = 9;
+                        break;
+                }
+                /* Advance to next counter type */
+                get_type(iter)->offset += iter->count;
+                iter->count = 0;
+                iter->type++;
+                /* fall through */
+        case RECORD_FUNCTION_CHECK:
+                if (iter->type < iter->num_types) {
+                        iter->record = 7;
+                        break;
+                }
+                /* Advance to next function */
+                iter->type = 0;
+                iter->function++;
+                /* fall through */
+        case RECORD_TIME_STAMP:
+                if (iter->function < iter->info->n_functions)
+                        iter->record = 3;
+                else
+                        iter->record = -1;
+                break;
+        }
+        /* Check for EOF. */
+        if (iter->record == -1)
+                return -EINVAL;
+        else
+                return 0;
+}
+/**
+ * seq_write_gcov_u32 - write 32 bit number in gcov format to seq_file
+ * @seq: seq_file handle
+ * @v: value to be stored
+ *
+ * Number format defined by gcc: numbers are recorded in the 32 bit
+ * unsigned binary form of the endianness of the machine generating the
+ * file.
+ */
+static int seq_write_gcov_u32(struct seq_file *seq, u32 v)
+{
+        return seq_write(seq, &v, sizeof(v));
+}
+/**
+ * seq_write_gcov_u64 - write 64 bit number in gcov format to seq_file
+ * @seq: seq_file handle
+ * @v: value to be stored
+ *
+ * Number format defined by gcc: numbers are recorded in the 32 bit
+ * unsigned binary form of the endianness of the machine generating the
+ * file. 64 bit numbers are stored as two 32 bit numbers, the low part
+ * first.
+ */
+static int seq_write_gcov_u64(struct seq_file *seq, u64 v)
+{
+        u32 data[2];
+        data[0] = (v & 0xffffffffUL);
+        data[1] = (v >> 32);
+        return seq_write(seq, data, sizeof(data));
+}
+/**
+ * gcov_iter_write - write data for current pos to seq_file
+ * @iter: file iterator
+ * @seq: seq_file handle
+ *
+ * Return zero on success, non-zero otherwise.
+ */
+int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq)
+{
+        int rc = -EINVAL;
+        switch (iter->record) {
+        case RECORD_FILE_MAGIC:
+                rc = seq_write_gcov_u32(seq, GCOV_DATA_MAGIC);
+                break;
+        case RECORD_GCOV_VERSION:
+                rc = seq_write_gcov_u32(seq, iter->info->version);
+                break;
+        case RECORD_TIME_STAMP:
+                rc = seq_write_gcov_u32(seq, iter->info->stamp);
+                break;
+        case RECORD_FUNCTION_TAG:
+                rc = seq_write_gcov_u32(seq, GCOV_TAG_FUNCTION);
+                break;
+        case RECORD_FUNCTON_TAG_LEN:
+                rc = seq_write_gcov_u32(seq, 2);
+                break;
+        case RECORD_FUNCTION_IDENT:
+                rc = seq_write_gcov_u32(seq, get_func(iter)->ident);
+                break;
+        case RECORD_FUNCTION_CHECK:
+                rc = seq_write_gcov_u32(seq, get_func(iter)->checksum);
+                break;
+        case RECORD_COUNT_TAG:
+                rc = seq_write_gcov_u32(seq,
+                        GCOV_TAG_FOR_COUNTER(get_type(iter)->ctr_type));
+                break;
+        case RECORD_COUNT_LEN:
+                rc = seq_write_gcov_u32(seq,
+                                get_func(iter)->n_ctrs[iter->type] * 2);
+                break;
+        case RECORD_COUNT:
+                rc = seq_write_gcov_u64(seq,
+                        iter->info->counts[iter->type].
+                                values[iter->count + get_type(iter)->offset]);
+                break;
+        }
+        return rc;
+}
diff --git a/kernel/gcov/gcov.h b/kernel/gcov/gcov.h
new file mode 100644
index 000000000000..060073ebf7a6
--- /dev/null
+++ b/kernel/gcov/gcov.h
@@ -0,0 +1,128 @@
+/*
+ *  Profiling infrastructure declarations.
+ *
+ *  This file is based on gcc-internal definitions. Data structures are
+ *  defined to be compatible with gcc counterparts. For a better
+ *  understanding, refer to gcc source: gcc/gcov-io.h.
+ *
+ *    Copyright IBM Corp. 2009
+ *    Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *
+ *    Uses gcc-internal data definitions.
+ */
+#ifndef GCOV_H
+#define GCOV_H GCOV_H
+#include <linux/types.h>
+/*
+ * Profiling data types used for gcc 3.4 and above - these are defined by
+ * gcc and need to be kept as close to the original definition as possible to
+ * remain compatible.
+ */
+#define GCOV_COUNTERS           5
+#define GCOV_DATA_MAGIC         ((unsigned int) 0x67636461)
+#define GCOV_TAG_FUNCTION       ((unsigned int) 0x01000000)
+#define GCOV_TAG_COUNTER_BASE   ((unsigned int) 0x01a10000)
+#define GCOV_TAG_FOR_COUNTER(count)                                     \
+        (GCOV_TAG_COUNTER_BASE + ((unsigned int) (count) << 17))
+#if BITS_PER_LONG >= 64
+typedef long gcov_type;
+#else
+typedef long long gcov_type;
+#endif
+/**
+ * struct gcov_fn_info - profiling meta data per function
+ * @ident: object file-unique function identifier
+ * @checksum: function checksum
+ * @n_ctrs: number of values per counter type belonging to this function
+ *
+ * This data is generated by gcc during compilation and doesn't change
+ * at run-time.
+ */
+struct gcov_fn_info {
+        unsigned int ident;
+        unsigned int checksum;
+        unsigned int n_ctrs[0];
+};
+/**
+ * struct gcov_ctr_info - profiling data per counter type
+ * @num: number of counter values for this type
+ * @values: array of counter values for this type
+ * @merge: merge function for counter values of this type (unused)
+ *
+ * This data is generated by gcc during compilation and doesn't change
+ * at run-time with the exception of the values array.
+ */
+struct gcov_ctr_info {
+        unsigned int    num;
+        gcov_type       *values;
+        void            (*merge)(gcov_type *, unsigned int);
+};
+/**
+ * struct gcov_info - profiling data per object file
+ * @version: gcov version magic indicating the gcc version used for compilation
+ * @next: list head for a singly-linked list
+ * @stamp: time stamp
+ * @filename: name of the associated gcov data file
+ * @n_functions: number of instrumented functions
+ * @functions: function data
+ * @ctr_mask: mask specifying which counter types are active
+ * @counts: counter data per counter type
+ *
+ * This data is generated by gcc during compilation and doesn't change
+ * at run-time with the exception of the next pointer.
+ */
+struct gcov_info {
+        unsigned int                    version;
+        struct gcov_info                *next;
+        unsigned int                    stamp;
+        const char                      *filename;
+        unsigned int                    n_functions;
+        const struct gcov_fn_info       *functions;
+        unsigned int                    ctr_mask;
+        struct gcov_ctr_info            counts[0];
+};
+/* Base interface. */
+enum gcov_action {
+        GCOV_ADD,
+        GCOV_REMOVE,
+};
+void gcov_event(enum gcov_action action, struct gcov_info *info);
+void gcov_enable_events(void);
+/* Iterator control. */
+struct seq_file;
+struct gcov_iterator;
+struct gcov_iterator *gcov_iter_new(struct gcov_info *info);
+void gcov_iter_free(struct gcov_iterator *iter);
+void gcov_iter_start(struct gcov_iterator *iter);
+int gcov_iter_next(struct gcov_iterator *iter);
+int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq);
+struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter);
+/* gcov_info control. */
+void gcov_info_reset(struct gcov_info *info);
+int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2);
+void gcov_info_add(struct gcov_info *dest, struct gcov_info *source);
+struct gcov_info *gcov_info_dup(struct gcov_info *info);
+void gcov_info_free(struct gcov_info *info);
+struct gcov_link {
+        enum {
+                OBJ_TREE,
+                SRC_TREE,
+        } dir;
+        const char *ext;
+};
+extern const struct gcov_link gcov_link[];
+#endif /* GCOV_H */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index aaf5c9d05770..50da67672901 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -856,7 +856,7 @@ EXPORT_SYMBOL(free_irq);
 *      still called in hard interrupt context and has to check
 *      whether the interrupt originates from the device. If yes it
 *      needs to disable the interrupt on the device and return
- *      IRQ_THREAD_WAKE which will wake up the handler thread and run
+ *      IRQ_WAKE_THREAD which will wake up the handler thread and run
 *      @thread_fn. This split handler design is necessary to support
 *      shared interrupts.
 *
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 7fa441333529..9b1a7de26979 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -27,7 +27,6 @@ struct kthread_create_info
        /* Information passed to kthread() from kthreadd. */
        int (*threadfn)(void *data);
        void *data;
-        struct completion started;
        /* Result passed back to kthread_create() from kthreadd. */
        struct task_struct *result;
@@ -36,17 +35,13 @@ struct kthread_create_info
        struct list_head list;
 };
-struct kthread_stop_info
+struct kthread {
-{
+        int should_stop;
-        struct task_struct *k;
+        struct completion exited;
-        int err;
-        struct completion done;
 };
-/* Thread stopping is done by setthing this var: lock serializes
+#define to_kthread(tsk) \
- * multiple kthread_stop calls. */
+        container_of((tsk)->vfork_done, struct kthread, exited)
-static DEFINE_MUTEX(kthread_stop_lock);
-static struct kthread_stop_info kthread_stop_info;
 /**
 * kthread_should_stop - should this kthread return now?
@@ -57,36 +52,35 @@ static struct kthread_stop_info kthread_stop_info;
 */
 int kthread_should_stop(void)
 {
-        return (kthread_stop_info.k == current);
+        return to_kthread(current)->should_stop;
 }
 EXPORT_SYMBOL(kthread_should_stop);
 static int kthread(void *_create)
 {
+        /* Copy data: it's on kthread's stack */
        struct kthread_create_info *create = _create;
-        int (*threadfn)(void *data);
+        int (*threadfn)(void *data) = create->threadfn;
-        void *data;
+        void *data = create->data;
-        int ret = -EINTR;
+        struct kthread self;
+        int ret;
-        /* Copy data: it's on kthread's stack */
+        self.should_stop = 0;
-        threadfn = create->threadfn;
+        init_completion(&self.exited);
-        data = create->data;
+        current->vfork_done = &self.exited;
        /* OK, tell user we're spawned, wait for stop or wakeup */
        __set_current_state(TASK_UNINTERRUPTIBLE);
        create->result = current;
-        complete(&create->started);
+        complete(&create->done);
        schedule();
-        if (!kthread_should_stop())
+        ret = -EINTR;
+        if (!self.should_stop)
                ret = threadfn(data);
-        /* It might have exited on its own, w/o kthread_stop.  Check. */
+        /* we can't just return, we must preserve "self" on stack */
-        if (kthread_should_stop()) {
+        do_exit(ret);
-                kthread_stop_info.err = ret;
-                complete(&kthread_stop_info.done);
-        }
-        return 0;
 }
 static void create_kthread(struct kthread_create_info *create)
@@ -95,11 +89,10 @@ static void create_kthread(struct kthread_create_info *create)
        /* We want our own signal handler (we take no signals by default). */
        pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
-        if (pid < 0)
+        if (pid < 0) {
                create->result = ERR_PTR(pid);
-        else
+                complete(&create->done);
-                wait_for_completion(&create->started);
+        }
-        complete(&create->done);
 }
 /**
@@ -130,7 +123,6 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
        create.threadfn = threadfn;
        create.data = data;
-        init_completion(&create.started);
        init_completion(&create.done);
        spin_lock(&kthread_create_lock);
@@ -198,30 +190,22 @@ EXPORT_SYMBOL(kthread_bind);
 */
 int kthread_stop(struct task_struct *k)
 {
+        struct kthread *kthread;
        int ret;
-        mutex_lock(&kthread_stop_lock);
-        /* It could exit after stop_info.k set, but before wake_up_process. */
-        get_task_struct(k);
        trace_sched_kthread_stop(k);
+        get_task_struct(k);
-        /* Must init completion *before* thread sees kthread_stop_info.k */
+        kthread = to_kthread(k);
-        init_completion(&kthread_stop_info.done);
+        barrier(); /* it might have exited */
-        smp_wmb();
+        if (k->vfork_done != NULL) {
+                kthread->should_stop = 1;
+                wake_up_process(k);
+                wait_for_completion(&kthread->exited);
+        }
+        ret = k->exit_code;
-        /* Now set kthread_should_stop() to true, and wake it up. */
-        kthread_stop_info.k = k;
-        wake_up_process(k);
        put_task_struct(k);
-        /* Once it dies, reset stop ptr, gather result and we're done. */
-        wait_for_completion(&kthread_stop_info.done);
-        kthread_stop_info.k = NULL;
-        ret = kthread_stop_info.err;
-        mutex_unlock(&kthread_stop_lock);
        trace_sched_kthread_stop_ret(ret);
        return ret;
diff --git a/kernel/module.c b/kernel/module.c
index 215aaab09e91..38928fcaff2b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2216,6 +2216,10 @@ static noinline struct module *load_module(void __user *umod,
        mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings,
                                            "__kcrctab_unused_gpl");
 #endif
+#ifdef CONFIG_CONSTRUCTORS
+        mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors",
+                                  sizeof(*mod->ctors), &mod->num_ctors);
+#endif
 #ifdef CONFIG_MARKERS
        mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers",
@@ -2389,6 +2393,17 @@ static noinline struct module *load_module(void __user *umod,
        goto free_hdr;
 }
+/* Call module constructors. */
+static void do_mod_ctors(struct module *mod)
+{
+#ifdef CONFIG_CONSTRUCTORS
+        unsigned long i;
+        for (i = 0; i < mod->num_ctors; i++)
+                mod->ctors[i]();
+#endif
+}
 /* This is where the real work happens */
 SYSCALL_DEFINE3(init_module, void __user *, umod,
                unsigned long, len, const char __user *, uargs)
@@ -2417,6 +2432,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
        blocking_notifier_call_chain(&module_notify_list,
                        MODULE_STATE_COMING, mod);
+        do_mod_ctors(mod);
        /* Start the module */
        if (mod->init != NULL)
                ret = do_one_initcall(mod->init);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 63598dca2d0c..09b4ff9711b2 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,19 +26,14 @@ static struct kmem_cache *nsproxy_cachep;
 struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
-/*
+static inline struct nsproxy *create_nsproxy(void)
- * creates a copy of "orig" with refcount 1.
- */
-static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig)
 {
-        struct nsproxy *ns;
+        struct nsproxy *nsproxy;
-        ns = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
+        nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
-        if (ns) {
+        if (nsproxy)
-                memcpy(ns, orig, sizeof(struct nsproxy));
+                atomic_set(&nsproxy->count, 1);
-                atomic_set(&ns->count, 1);
+        return nsproxy;
-        }
-        return ns;
 }
 /*
@@ -52,7 +47,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
        struct nsproxy *new_nsp;
        int err;
-        new_nsp = clone_nsproxy(tsk->nsproxy);
+        new_nsp = create_nsproxy();
        if (!new_nsp)
                return ERR_PTR(-ENOMEM);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 29b685f551aa..1a933a221ea4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -124,7 +124,7 @@ void perf_enable(void)
 static void get_ctx(struct perf_counter_context *ctx)
 {
-        atomic_inc(&ctx->refcount);
+        WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
 }
 static void free_ctx(struct rcu_head *head)
@@ -175,6 +175,11 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
                        spin_unlock_irqrestore(&ctx->lock, *flags);
                        goto retry;
                }
+                if (!atomic_inc_not_zero(&ctx->refcount)) {
+                        spin_unlock_irqrestore(&ctx->lock, *flags);
+                        ctx = NULL;
+                }
        }
        rcu_read_unlock();
        return ctx;
@@ -193,7 +198,6 @@ static struct perf_counter_context *perf_pin_task_context(struct task_struct *ta
        ctx = perf_lock_task_context(task, &flags);
        if (ctx) {
                ++ctx->pin_count;
-                get_ctx(ctx);
                spin_unlock_irqrestore(&ctx->lock, flags);
        }
        return ctx;
@@ -1283,7 +1287,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
                if (!interrupts) {
                        perf_disable();
                        counter->pmu->disable(counter);
-                        atomic_set(&hwc->period_left, 0);
+                        atomic64_set(&hwc->period_left, 0);
                        counter->pmu->enable(counter);
                        perf_enable();
                }
@@ -1459,11 +1463,6 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
                        put_ctx(parent_ctx);
                        ctx->parent_ctx = NULL;         /* no longer a clone */
                }
-                /*
-                 * Get an extra reference before dropping the lock so that
-                 * this context won't get freed if the task exits.
-                 */
-                get_ctx(ctx);
                spin_unlock_irqrestore(&ctx->lock, flags);
        }
@@ -1553,7 +1552,7 @@ static int perf_release(struct inode *inode, struct file *file)
 static ssize_t
 perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 {
-        u64 values[3];
+        u64 values[4];
        int n;
        /*
@@ -1620,22 +1619,6 @@ static void perf_counter_reset(struct perf_counter *counter)
        perf_counter_update_userpage(counter);
 }
-static void perf_counter_for_each_sibling(struct perf_counter *counter,
-                                          void (*func)(struct perf_counter *))
-{
-        struct perf_counter_context *ctx = counter->ctx;
-        struct perf_counter *sibling;
-        WARN_ON_ONCE(ctx->parent_ctx);
-        mutex_lock(&ctx->mutex);
-        counter = counter->group_leader;
-        func(counter);
-        list_for_each_entry(sibling, &counter->sibling_list, list_entry)
-                func(sibling);
-        mutex_unlock(&ctx->mutex);
-}
 /*
 * Holding the top-level counter's child_mutex means that any
 * descendant process that has inherited this counter will block
@@ -1658,14 +1641,18 @@ static void perf_counter_for_each_child(struct perf_counter *counter,
 static void perf_counter_for_each(struct perf_counter *counter,
                                  void (*func)(struct perf_counter *))
 {
-        struct perf_counter *child;
+        struct perf_counter_context *ctx = counter->ctx;
+        struct perf_counter *sibling;
-        WARN_ON_ONCE(counter->ctx->parent_ctx);
+        WARN_ON_ONCE(ctx->parent_ctx);
-        mutex_lock(&counter->child_mutex);
+        mutex_lock(&ctx->mutex);
-        perf_counter_for_each_sibling(counter, func);
+        counter = counter->group_leader;
-        list_for_each_entry(child, &counter->child_list, child_list)
-                perf_counter_for_each_sibling(child, func);
+        perf_counter_for_each_child(counter, func);
-        mutex_unlock(&counter->child_mutex);
+        func(counter);
+        list_for_each_entry(sibling, &counter->sibling_list, list_entry)
+                perf_counter_for_each_child(counter, func);
+        mutex_unlock(&ctx->mutex);
 }
 static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
@@ -1806,6 +1793,12 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        struct perf_mmap_data *data;
        int ret = VM_FAULT_SIGBUS;
+        if (vmf->flags & FAULT_FLAG_MKWRITE) {
+                if (vmf->pgoff == 0)
+                        ret = 0;
+                return ret;
+        }
        rcu_read_lock();
        data = rcu_dereference(counter->data);
        if (!data)
@@ -1819,9 +1812,16 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
                if ((unsigned)nr > data->nr_pages)
                        goto unlock;
+                if (vmf->flags & FAULT_FLAG_WRITE)
+                        goto unlock;
                vmf->page = virt_to_page(data->data_pages[nr]);
        }
        get_page(vmf->page);
+        vmf->page->mapping = vma->vm_file->f_mapping;
+        vmf->page->index   = vmf->pgoff;
        ret = 0;
 unlock:
        rcu_read_unlock();
@@ -1874,6 +1874,14 @@ fail:
        return -ENOMEM;
 }
+static void perf_mmap_free_page(unsigned long addr)
+{
+        struct page *page = virt_to_page(addr);
+        page->mapping = NULL;
+        __free_page(page);
+}
 static void __perf_mmap_data_free(struct rcu_head *rcu_head)
 {
        struct perf_mmap_data *data;
@@ -1881,9 +1889,10 @@ static void __perf_mmap_data_free(struct rcu_head *rcu_head)
        data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
-        free_page((unsigned long)data->user_page);
+        perf_mmap_free_page((unsigned long)data->user_page);
        for (i = 0; i < data->nr_pages; i++)
-                free_page((unsigned long)data->data_pages[i]);
+                perf_mmap_free_page((unsigned long)data->data_pages[i]);
        kfree(data);
 }
@@ -1920,9 +1929,10 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 }
 static struct vm_operations_struct perf_mmap_vmops = {
-        .open  = perf_mmap_open,
+        .open           = perf_mmap_open,
-        .close = perf_mmap_close,
+        .close          = perf_mmap_close,
-        .fault = perf_mmap_fault,
+        .fault          = perf_mmap_fault,
+        .page_mkwrite   = perf_mmap_fault,
 };
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
@@ -1936,7 +1946,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        long user_extra, extra;
        int ret = 0;
-        if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
+        if (!(vma->vm_flags & VM_SHARED))
                return -EINVAL;
        vma_size = vma->vm_end - vma->vm_start;
@@ -1995,10 +2005,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        atomic_long_add(user_extra, &user->locked_vm);
        vma->vm_mm->locked_vm += extra;
        counter->data->nr_locked = extra;
+        if (vma->vm_flags & VM_WRITE)
+                counter->data->writable = 1;
 unlock:
        mutex_unlock(&counter->mmap_mutex);
-        vma->vm_flags &= ~VM_MAYWRITE;
        vma->vm_flags |= VM_RESERVED;
        vma->vm_ops = &perf_mmap_vmops;
@@ -2175,11 +2187,38 @@ struct perf_output_handle {
        unsigned long           head;
        unsigned long           offset;
        int                     nmi;
-        int                     overflow;
+        int                     sample;
        int                     locked;
        unsigned long           flags;
 };
+static bool perf_output_space(struct perf_mmap_data *data,
+                              unsigned int offset, unsigned int head)
+{
+        unsigned long tail;
+        unsigned long mask;
+        if (!data->writable)
+                return true;
+        mask = (data->nr_pages << PAGE_SHIFT) - 1;
+        /*
+         * Userspace could choose to issue a mb() before updating the tail
+         * pointer. So that all reads will be completed before the write is
+         * issued.
+         */
+        tail = ACCESS_ONCE(data->user_page->data_tail);
+        smp_rmb();
+        offset = (offset - tail) & mask;
+        head   = (head   - tail) & mask;
+        if ((int)(head - offset) < 0)
+                return false;
+        return true;
+}
 static void perf_output_wakeup(struct perf_output_handle *handle)
 {
        atomic_set(&handle->data->poll, POLL_IN);
@@ -2270,12 +2309,57 @@ out:
        local_irq_restore(handle->flags);
 }
+static void perf_output_copy(struct perf_output_handle *handle,
+                             const void *buf, unsigned int len)
+{
+        unsigned int pages_mask;
+        unsigned int offset;
+        unsigned int size;
+        void **pages;
+        offset          = handle->offset;
+        pages_mask      = handle->data->nr_pages - 1;
+        pages           = handle->data->data_pages;
+        do {
+                unsigned int page_offset;
+                int nr;
+                nr          = (offset >> PAGE_SHIFT) & pages_mask;
+                page_offset = offset & (PAGE_SIZE - 1);
+                size        = min_t(unsigned int, PAGE_SIZE - page_offset, len);
+                memcpy(pages[nr] + page_offset, buf, size);
+                len         -= size;
+                buf         += size;
+                offset      += size;
+        } while (len);
+        handle->offset = offset;
+        /*
+         * Check we didn't copy past our reservation window, taking the
+         * possible unsigned int wrap into account.
+         */
+        WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
+}
+#define perf_output_put(handle, x) \
+        perf_output_copy((handle), &(x), sizeof(x))
 static int perf_output_begin(struct perf_output_handle *handle,
                             struct perf_counter *counter, unsigned int size,
-                             int nmi, int overflow)
+                             int nmi, int sample)
 {
        struct perf_mmap_data *data;
        unsigned int offset, head;
+        int have_lost;
+        struct {
+                struct perf_event_header header;
+                u64                      id;
+                u64                      lost;
+        } lost_event;
        /*
         * For inherited counters we send all the output towards the parent.
@@ -2288,19 +2372,25 @@ static int perf_output_begin(struct perf_output_handle *handle,
        if (!data)
                goto out;
-        handle->data     = data;
+        handle->data    = data;
-        handle->counter  = counter;
+        handle->counter = counter;
-        handle->nmi      = nmi;
+        handle->nmi     = nmi;
-        handle->overflow = overflow;
+        handle->sample  = sample;
        if (!data->nr_pages)
                goto fail;
+        have_lost = atomic_read(&data->lost);
+        if (have_lost)
+                size += sizeof(lost_event);
        perf_output_lock(handle);
        do {
                offset = head = atomic_long_read(&data->head);
                head += size;
+                if (unlikely(!perf_output_space(data, offset, head)))
+                        goto fail;
        } while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
        handle->offset  = offset;
@@ -2309,55 +2399,27 @@ static int perf_output_begin(struct perf_output_handle *handle,
        if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT))
                atomic_set(&data->wakeup, 1);
+        if (have_lost) {
+                lost_event.header.type = PERF_EVENT_LOST;
+                lost_event.header.misc = 0;
+                lost_event.header.size = sizeof(lost_event);
+                lost_event.id          = counter->id;
+                lost_event.lost        = atomic_xchg(&data->lost, 0);
+                perf_output_put(handle, lost_event);
+        }
        return 0;
 fail:
-        perf_output_wakeup(handle);
+        atomic_inc(&data->lost);
+        perf_output_unlock(handle);
 out:
        rcu_read_unlock();
        return -ENOSPC;
 }
-static void perf_output_copy(struct perf_output_handle *handle,
-                             const void *buf, unsigned int len)
-{
-        unsigned int pages_mask;
-        unsigned int offset;
-        unsigned int size;
-        void **pages;
-        offset          = handle->offset;
-        pages_mask      = handle->data->nr_pages - 1;
-        pages           = handle->data->data_pages;
-        do {
-                unsigned int page_offset;
-                int nr;
-                nr          = (offset >> PAGE_SHIFT) & pages_mask;
-                page_offset = offset & (PAGE_SIZE - 1);
-                size        = min_t(unsigned int, PAGE_SIZE - page_offset, len);
-                memcpy(pages[nr] + page_offset, buf, size);
-                len         -= size;
-                buf         += size;
-                offset      += size;
-        } while (len);
-        handle->offset = offset;
-        /*
-         * Check we didn't copy past our reservation window, taking the
-         * possible unsigned int wrap into account.
-         */
-        WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
-}
-#define perf_output_put(handle, x) \
-        perf_output_copy((handle), &(x), sizeof(x))
 static void perf_output_end(struct perf_output_handle *handle)
 {
        struct perf_counter *counter = handle->counter;
@@ -2365,7 +2427,7 @@ static void perf_output_end(struct perf_output_handle *handle)
        int wakeup_events = counter->attr.wakeup_events;
-        if (handle->overflow && wakeup_events) {
+        if (handle->sample && wakeup_events) {
                int events = atomic_inc_return(&data->events);
                if (events >= wakeup_events) {
                        atomic_sub(wakeup_events, &data->events);
@@ -2970,7 +3032,7 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
 }
 /*
- * Generic counter overflow handling.
+ * Generic counter overflow handling, sampling.
 */
 int perf_counter_overflow(struct perf_counter *counter, int nmi,
@@ -3109,20 +3171,15 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 }
 static void perf_swcounter_overflow(struct perf_counter *counter,
-                                    int nmi, struct pt_regs *regs, u64 addr)
+                                    int nmi, struct perf_sample_data *data)
 {
-        struct perf_sample_data data = {
+        data->period = counter->hw.last_period;
-                .regs   = regs,
-                .addr   = addr,
-                .period = counter->hw.last_period,
-        };
        perf_swcounter_update(counter);
        perf_swcounter_set_period(counter);
-        if (perf_counter_overflow(counter, nmi, &data))
+        if (perf_counter_overflow(counter, nmi, data))
                /* soft-disable the counter */
                ;
 }
 static int perf_swcounter_is_counting(struct perf_counter *counter)
@@ -3187,18 +3244,18 @@ static int perf_swcounter_match(struct perf_counter *counter,
 }
 static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
-                               int nmi, struct pt_regs *regs, u64 addr)
+                               int nmi, struct perf_sample_data *data)
 {
        int neg = atomic64_add_negative(nr, &counter->hw.count);
-        if (counter->hw.sample_period && !neg && regs)
+        if (counter->hw.sample_period && !neg && data->regs)
-                perf_swcounter_overflow(counter, nmi, regs, addr);
+                perf_swcounter_overflow(counter, nmi, data);
 }
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
-                                     enum perf_type_id type, u32 event,
+                                     enum perf_type_id type,
-                                     u64 nr, int nmi, struct pt_regs *regs,
+                                     u32 event, u64 nr, int nmi,
-                                     u64 addr)
+                                     struct perf_sample_data *data)
 {
        struct perf_counter *counter;
@@ -3207,8 +3264,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
        rcu_read_lock();
        list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-                if (perf_swcounter_match(counter, type, event, regs))
+                if (perf_swcounter_match(counter, type, event, data->regs))
-                        perf_swcounter_add(counter, nr, nmi, regs, addr);
+                        perf_swcounter_add(counter, nr, nmi, data);
        }
        rcu_read_unlock();
 }
@@ -3227,9 +3284,9 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
        return &cpuctx->recursion[0];
 }
-static void __perf_swcounter_event(enum perf_type_id type, u32 event,
+static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
-                                   u64 nr, int nmi, struct pt_regs *regs,
+                                    u64 nr, int nmi,
-                                   u64 addr)
+                                    struct perf_sample_data *data)
 {
        struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
        int *recursion = perf_swcounter_recursion_context(cpuctx);
@@ -3242,7 +3299,7 @@ static void __perf_swcounter_event(enum perf_type_id type, u32 event,
        barrier();
        perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
-                                 nr, nmi, regs, addr);
+                                 nr, nmi, data);
        rcu_read_lock();
        /*
         * doesn't really matter which of the child contexts the
@@ -3250,7 +3307,7 @@ static void __perf_swcounter_event(enum perf_type_id type, u32 event,
         */
        ctx = rcu_dereference(current->perf_counter_ctxp);
        if (ctx)
-                perf_swcounter_ctx_event(ctx, type, event, nr, nmi, regs, addr);
+                perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data);
        rcu_read_unlock();
        barrier();
@@ -3263,7 +3320,12 @@ out:
 void
 perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 {
-        __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr);
+        struct perf_sample_data data = {
+                .regs = regs,
+                .addr = addr,
+        };
+        do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, &data);
 }
 static void perf_swcounter_read(struct perf_counter *counter)
@@ -3404,36 +3466,18 @@ static const struct pmu perf_ops_task_clock = {
        .read           = task_clock_perf_counter_read,
 };
-/*
- * Software counter: cpu migrations
- */
-void perf_counter_task_migration(struct task_struct *task, int cpu)
-{
-        struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-        struct perf_counter_context *ctx;
-        perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE,
-                                 PERF_COUNT_SW_CPU_MIGRATIONS,
-                                 1, 1, NULL, 0);
-        ctx = perf_pin_task_context(task);
-        if (ctx) {
-                perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE,
-                                         PERF_COUNT_SW_CPU_MIGRATIONS,
-                                         1, 1, NULL, 0);
-                perf_unpin_context(ctx);
-        }
-}
 #ifdef CONFIG_EVENT_PROFILE
 void perf_tpcounter_event(int event_id)
 {
-        struct pt_regs *regs = get_irq_regs();
+        struct perf_sample_data data = {
+                .regs = get_irq_regs();
+                .addr = 0,
+        };
-        if (!regs)
+        if (!data.regs)
-                regs = task_pt_regs(current);
+                data.regs = task_pt_regs(current);
-        __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0);
+        do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data);
 }
 EXPORT_SYMBOL_GPL(perf_tpcounter_event);
diff --git a/kernel/pid.c b/kernel/pid.c
index b2e5f78fd281..31310b5d3f50 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -378,26 +378,15 @@ EXPORT_SYMBOL(pid_task);
 /*
 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
 */
-struct task_struct *find_task_by_pid_type_ns(int type, int nr,
+struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
-                struct pid_namespace *ns)
 {
-        return pid_task(find_pid_ns(nr, ns), type);
+        return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
 }
-EXPORT_SYMBOL(find_task_by_pid_type_ns);
 struct task_struct *find_task_by_vpid(pid_t vnr)
 {
-        return find_task_by_pid_type_ns(PIDTYPE_PID, vnr,
+        return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns);
-                        current->nsproxy->pid_ns);
-}
-EXPORT_SYMBOL(find_task_by_vpid);
-struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
-{
-        return find_task_by_pid_type_ns(PIDTYPE_PID, nr, ns);
 }
-EXPORT_SYMBOL(find_task_by_pid_ns);
 struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
 {
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 2d1001b4858d..821722ae58a7 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -67,9 +67,10 @@ err_alloc:
        return NULL;
 }
-static struct pid_namespace *create_pid_namespace(unsigned int level)
+static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns)
 {
        struct pid_namespace *ns;
+        unsigned int level = parent_pid_ns->level + 1;
        int i;
        ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
@@ -86,6 +87,7 @@ static struct pid_namespace *create_pid_namespace(unsigned int level)
        kref_init(&ns->kref);
        ns->level = level;
+        ns->parent = get_pid_ns(parent_pid_ns);
        set_bit(0, ns->pidmap[0].page);
        atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
@@ -114,25 +116,11 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
 {
-        struct pid_namespace *new_ns;
-        BUG_ON(!old_ns);
-        new_ns = get_pid_ns(old_ns);
        if (!(flags & CLONE_NEWPID))
-                goto out;
+                return get_pid_ns(old_ns);
-        new_ns = ERR_PTR(-EINVAL);
        if (flags & CLONE_THREAD)
-                goto out_put;
+                return ERR_PTR(-EINVAL);
+        return create_pid_namespace(old_ns);
-        new_ns = create_pid_namespace(old_ns->level + 1);
-        if (!IS_ERR(new_ns))
-                new_ns->parent = get_pid_ns(old_ns);
-out_put:
-        put_pid_ns(old_ns);
-out:
-        return new_ns;
 }
 void free_pid_ns(struct kref *kref)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index f6d8b8cb5e34..61c78b2c07ba 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -167,67 +167,82 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
 int ptrace_attach(struct task_struct *task)
 {
        int retval;
-        unsigned long flags;
        audit_ptrace(task);
        retval = -EPERM;
+        if (unlikely(task->flags & PF_KTHREAD))
+                goto out;
        if (same_thread_group(task, current))
                goto out;
-        /* Protect the target's credential calculations against our
+        /*
+         * Protect exec's credential calculations against our interference;
         * interference; SUID, SGID and LSM creds get determined differently
         * under ptrace.
         */
        retval = mutex_lock_interruptible(&task->cred_guard_mutex);
-        if (retval  < 0)
+        if (retval < 0)
                goto out;
-        retval = -EPERM;
-repeat:
-        /*
-         * Nasty, nasty.
-         *
-         * We want to hold both the task-lock and the
-         * tasklist_lock for writing at the same time.
-         * But that's against the rules (tasklist_lock
-         * is taken for reading by interrupts on other
-         * cpu's that may have task_lock).
-         */
        task_lock(task);
-        if (!write_trylock_irqsave(&tasklist_lock, flags)) {
-                task_unlock(task);
-                do {
-                        cpu_relax();
-                } while (!write_can_lock(&tasklist_lock));
-                goto repeat;
-        }
-        if (!task->mm)
-                goto bad;
-        /* the same process cannot be attached many times */
-        if (task->ptrace & PT_PTRACED)
-                goto bad;
        retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH);
+        task_unlock(task);
        if (retval)
-                goto bad;
+                goto unlock_creds;
-        /* Go */
+        write_lock_irq(&tasklist_lock);
-        task->ptrace |= PT_PTRACED;
+        retval = -EPERM;
+        if (unlikely(task->exit_state))
+                goto unlock_tasklist;
+        if (task->ptrace)
+                goto unlock_tasklist;
+        task->ptrace = PT_PTRACED;
        if (capable(CAP_SYS_PTRACE))
                task->ptrace |= PT_PTRACE_CAP;
        __ptrace_link(task, current);
        send_sig_info(SIGSTOP, SEND_SIG_FORCED, task);
-bad:
-        write_unlock_irqrestore(&tasklist_lock, flags);
+        retval = 0;
-        task_unlock(task);
+unlock_tasklist:
+        write_unlock_irq(&tasklist_lock);
+unlock_creds:
        mutex_unlock(&task->cred_guard_mutex);
 out:
        return retval;
 }
+/**
+ * ptrace_traceme  --  helper for PTRACE_TRACEME
+ *
+ * Performs checks and sets PT_PTRACED.
+ * Should be used by all ptrace implementations for PTRACE_TRACEME.
+ */
+int ptrace_traceme(void)
+{
+        int ret = -EPERM;
+        write_lock_irq(&tasklist_lock);
+        /* Are we already being traced? */
+        if (!current->ptrace) {
+                ret = security_ptrace_traceme(current->parent);
+                /*
+                 * Check PF_EXITING to ensure ->real_parent has not passed
+                 * exit_ptrace(). Otherwise we don't report the error but
+                 * pretend ->real_parent untraces us right after return.
+                 */
+                if (!ret && !(current->real_parent->flags & PF_EXITING)) {
+                        current->ptrace = PT_PTRACED;
+                        __ptrace_link(current, current->real_parent);
+                }
+        }
+        write_unlock_irq(&tasklist_lock);
+        return ret;
+}
 /*
 * Called with irqs disabled, returns true if childs should reap themselves.
 */
@@ -409,37 +424,33 @@ static int ptrace_setoptions(struct task_struct *child, long data)
 static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info)
 {
+        unsigned long flags;
        int error = -ESRCH;
-        read_lock(&tasklist_lock);
+        if (lock_task_sighand(child, &flags)) {
-        if (likely(child->sighand != NULL)) {
                error = -EINVAL;
-                spin_lock_irq(&child->sighand->siglock);
                if (likely(child->last_siginfo != NULL)) {
                        *info = *child->last_siginfo;
                        error = 0;
                }
-                spin_unlock_irq(&child->sighand->siglock);
+                unlock_task_sighand(child, &flags);
        }
-        read_unlock(&tasklist_lock);
        return error;
 }
 static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info)
 {
+        unsigned long flags;
        int error = -ESRCH;
-        read_lock(&tasklist_lock);
+        if (lock_task_sighand(child, &flags)) {
-        if (likely(child->sighand != NULL)) {
                error = -EINVAL;
-                spin_lock_irq(&child->sighand->siglock);
                if (likely(child->last_siginfo != NULL)) {
                        *child->last_siginfo = *info;
                        error = 0;
                }
-                spin_unlock_irq(&child->sighand->siglock);
+                unlock_task_sighand(child, &flags);
        }
-        read_unlock(&tasklist_lock);
        return error;
 }
@@ -566,72 +577,16 @@ int ptrace_request(struct task_struct *child, long request,
        return ret;
 }
-/**
+static struct task_struct *ptrace_get_task_struct(pid_t pid)
- * ptrace_traceme  --  helper for PTRACE_TRACEME
- *
- * Performs checks and sets PT_PTRACED.
- * Should be used by all ptrace implementations for PTRACE_TRACEME.
- */
-int ptrace_traceme(void)
-{
-        int ret = -EPERM;
-        /*
-         * Are we already being traced?
-         */
-repeat:
-        task_lock(current);
-        if (!(current->ptrace & PT_PTRACED)) {
-                /*
-                 * See ptrace_attach() comments about the locking here.
-                 */
-                unsigned long flags;
-                if (!write_trylock_irqsave(&tasklist_lock, flags)) {
-                        task_unlock(current);
-                        do {
-                                cpu_relax();
-                        } while (!write_can_lock(&tasklist_lock));
-                        goto repeat;
-                }
-                ret = security_ptrace_traceme(current->parent);
-                /*
-                 * Check PF_EXITING to ensure ->real_parent has not passed
-                 * exit_ptrace(). Otherwise we don't report the error but
-                 * pretend ->real_parent untraces us right after return.
-                 */
-                if (!ret && !(current->real_parent->flags & PF_EXITING)) {
-                        current->ptrace |= PT_PTRACED;
-                        __ptrace_link(current, current->real_parent);
-                }
-                write_unlock_irqrestore(&tasklist_lock, flags);
-        }
-        task_unlock(current);
-        return ret;
-}
-/**
- * ptrace_get_task_struct  --  grab a task struct reference for ptrace
- * @pid:       process id to grab a task_struct reference of
- *
- * This function is a helper for ptrace implementations.  It checks
- * permissions and then grabs a task struct for use of the actual
- * ptrace implementation.
- *
- * Returns the task_struct for @pid or an ERR_PTR() on failure.
- */
-struct task_struct *ptrace_get_task_struct(pid_t pid)
 {
        struct task_struct *child;
-        read_lock(&tasklist_lock);
+        rcu_read_lock();
        child = find_task_by_vpid(pid);
        if (child)
                get_task_struct(child);
+        rcu_read_unlock();
-        read_unlock(&tasklist_lock);
        if (!child)
                return ERR_PTR(-ESRCH);
        return child;
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index bf8e7534c803..e1338f074314 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -18,7 +18,7 @@
 void res_counter_init(struct res_counter *counter, struct res_counter *parent)
 {
        spin_lock_init(&counter->lock);
-        counter->limit = (unsigned long long)LLONG_MAX;
+        counter->limit = RESOURCE_MAX;
        counter->parent = parent;
 }
@@ -133,6 +133,16 @@ int res_counter_memparse_write_strategy(const char *buf,
                                        unsigned long long *res)
 {
        char *end;
+        /* return RESOURCE_MAX(unlimited) if "-1" is specified */
+        if (*buf == '-') {
+                *res = simple_strtoull(buf + 1, &end, 10);
+                if (*res != 1 || *end != '\0')
+                        return -EINVAL;
+                *res = RESOURCE_MAX;
+                return 0;
+        }
        /* FIXME - make memparse() take const char* args */
        *res = memparse((char *)buf, &end);
        if (*end != '\0')
diff --git a/kernel/sched.c b/kernel/sched.c
index 8fb88a906aaa..7c9098d186e6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1978,7 +1978,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
                if (task_hot(p, old_rq->clock, NULL))
                        schedstat_inc(p, se.nr_forced2_migrations);
 #endif
-                perf_counter_task_migration(p, new_cpu);
+                perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS,
+                                     1, 1, NULL, 0);
        }
        p->se.vruntime -= old_cfsrq->min_vruntime -
                                         new_cfsrq->min_vruntime;
@@ -7045,7 +7046,7 @@ static int migration_thread(void *data)
                if (cpu_is_offline(cpu)) {
                        spin_unlock_irq(&rq->lock);
-                        goto wait_to_die;
+                        break;
                }
                if (rq->active_balance) {
@@ -7071,16 +7072,7 @@ static int migration_thread(void *data)
                complete(&req->done);
        }
        __set_current_state(TASK_RUNNING);
-        return 0;
-wait_to_die:
-        /* Wait for kthread_stop */
-        set_current_state(TASK_INTERRUPTIBLE);
-        while (!kthread_should_stop()) {
-                schedule();
-                set_current_state(TASK_INTERRUPTIBLE);
-        }
-        __set_current_state(TASK_RUNNING);
        return 0;
 }
@@ -7494,6 +7486,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                rq = task_rq_lock(p, &flags);
                __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
                task_rq_unlock(rq, &flags);
+                get_task_struct(p);
                cpu_rq(cpu)->migration_thread = p;
                break;
@@ -7524,6 +7517,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                kthread_bind(cpu_rq(cpu)->migration_thread,
                             cpumask_any(cpu_online_mask));
                kthread_stop(cpu_rq(cpu)->migration_thread);
+                put_task_struct(cpu_rq(cpu)->migration_thread);
                cpu_rq(cpu)->migration_thread = NULL;
                break;
@@ -7533,6 +7527,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                migrate_live_tasks(cpu);
                rq = cpu_rq(cpu);
                kthread_stop(rq->migration_thread);
+                put_task_struct(rq->migration_thread);
                rq->migration_thread = NULL;
                /* Idle task back to normal (off runqueue, low prio) */
                spin_lock_irq(&rq->lock);
@@ -7828,7 +7823,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
                free_rootdomain(old_rd);
 }
-static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
+static int init_rootdomain(struct root_domain *rd, bool bootmem)
 {
        gfp_t gfp = GFP_KERNEL;
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 7deffc9f0e5f..e6c251790dde 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -152,7 +152,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 *
 * Returns: -ENOMEM if memory fails.
 */
-int __init_refok cpupri_init(struct cpupri *cp, bool bootmem)
+int cpupri_init(struct cpupri *cp, bool bootmem)
 {
        gfp_t gfp = GFP_KERNEL;
        int i;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 467ca72f1657..70c7e0b79946 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -162,7 +162,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
        s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
                spread, rq0_min_vruntime, spread0;
-        struct rq *rq = &per_cpu(runqueues, cpu);
+        struct rq *rq = cpu_rq(cpu);
        struct sched_entity *last;
        unsigned long flags;
@@ -191,7 +191,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
        if (last)
                max_vruntime = last->vruntime;
        min_vruntime = cfs_rq->min_vruntime;
-        rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime;
+        rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
        spin_unlock_irqrestore(&rq->lock, flags);
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
                        SPLIT_NS(MIN_vruntime));
@@ -248,7 +248,7 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 static void print_cpu(struct seq_file *m, int cpu)
 {
-        struct rq *rq = &per_cpu(runqueues, cpu);
+        struct rq *rq = cpu_rq(cpu);
 #ifdef CONFIG_X86
        {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5f9650e8fe75..ba7fd6e9556f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -430,12 +430,13 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
        for_each_sched_entity(se) {
                struct load_weight *load;
+                struct load_weight lw;
                cfs_rq = cfs_rq_of(se);
                load = &cfs_rq->load;
                if (unlikely(!se->on_rq)) {
-                        struct load_weight lw = cfs_rq->load;
+                        lw = cfs_rq->load;
                        update_load_add(&lw, se->load.weight);
                        load = &lw;
diff --git a/kernel/signal.c b/kernel/signal.c
index d81f4952eebb..ccf1ceedaebe 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1410,7 +1410,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
        /* do_notify_parent_cldstop should have been called instead.  */
        BUG_ON(task_is_stopped_or_traced(tsk));
-        BUG_ON(!tsk->ptrace &&
+        BUG_ON(!task_ptrace(tsk) &&
               (tsk->group_leader != tsk || !thread_group_empty(tsk)));
        info.si_signo = sig;
@@ -1449,7 +1449,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
        psig = tsk->parent->sighand;
        spin_lock_irqsave(&psig->siglock, flags);
-        if (!tsk->ptrace && sig == SIGCHLD &&
+        if (!task_ptrace(tsk) && sig == SIGCHLD &&
            (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
             (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) {
                /*
@@ -1486,7 +1486,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
        struct task_struct *parent;
        struct sighand_struct *sighand;
-        if (tsk->ptrace & PT_PTRACED)
+        if (task_ptrace(tsk))
                parent = tsk->parent;
        else {
                tsk = tsk->group_leader;
@@ -1499,7 +1499,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
         * see comment in do_notify_parent() abot the following 3 lines
         */
        rcu_read_lock();
-        info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
+        info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns);
        info.si_uid = __task_cred(tsk)->uid;
        rcu_read_unlock();
@@ -1535,7 +1535,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
 static inline int may_ptrace_stop(void)
 {
-        if (!likely(current->ptrace & PT_PTRACED))
+        if (!likely(task_ptrace(current)))
                return 0;
        /*
         * Are we in the middle of do_coredump?
@@ -1753,7 +1753,7 @@ static int do_signal_stop(int signr)
 static int ptrace_signal(int signr, siginfo_t *info,
                         struct pt_regs *regs, void *cookie)
 {
-        if (!(current->ptrace & PT_PTRACED))
+        if (!task_ptrace(current))
                return signr;
        ptrace_signal_deliver(regs, cookie);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b41fb710e114..3a94905fa5d2 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -213,6 +213,7 @@ restart:
        do {
                if (pending & 1) {
                        int prev_count = preempt_count();
+                        kstat_incr_softirqs_this_cpu(h - softirq_vec);
                        trace_softirq_entry(h, softirq_vec);
                        h->action(h);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ab462b9968d5..62e4ff9968b5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2283,7 +2283,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
                  void *data)
 {
 #define TMPBUFLEN 21
-        int *i, vleft, first=1, neg, val;
+        int *i, vleft, first = 1, neg;
        unsigned long lval;
        size_t left, len;
        
@@ -2336,8 +2336,6 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
                        len = p-buf;
                        if ((len < left) && *p && !isspace(*p))
                                break;
-                        if (neg)
-                                val = -val;
                        s += len;
                        left -= len;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 2aff39c6f10c..e0f59a21c061 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -222,6 +222,15 @@ void tick_nohz_stop_sched_tick(int inidle)
        cpu = smp_processor_id();
        ts = &per_cpu(tick_cpu_sched, cpu);
+        /*
+         * Call to tick_nohz_start_idle stops the last_update_time from being
+         * updated. Thus, it must not be called in the event we are called from
+         * irq_exit() with the prior state different than idle.
+         */
+        if (!inidle && !ts->inidle)
+                goto end;
        now = tick_nohz_start_idle(ts);
        /*
@@ -239,9 +248,6 @@ void tick_nohz_stop_sched_tick(int inidle)
        if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
                goto end;
-        if (!inidle && !ts->inidle)
-                goto end;
        ts->inidle = 1;
        if (need_resched())
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 61071fecc82e..1551f47e7669 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -18,6 +18,13 @@ config HAVE_FUNCTION_TRACER
 config HAVE_FUNCTION_GRAPH_TRACER
        bool
+config HAVE_FUNCTION_GRAPH_FP_TEST
+        bool
+        help
+         An arch may pass in a unique value (frame pointer) to both the
+         entering and exiting of a function. On exit, the value is compared
+         and if it does not match, then it will panic the kernel.
 config HAVE_FUNCTION_TRACE_MCOUNT_TEST
        bool
        help
@@ -121,6 +128,7 @@ config FUNCTION_GRAPH_TRACER
        bool "Kernel Function Graph Tracer"
        depends on HAVE_FUNCTION_GRAPH_TRACER
        depends on FUNCTION_TRACER
+        depends on !X86_32 || !CC_OPTIMIZE_FOR_SIZE
        default y
        help
          Enable the kernel to trace a function at both its return
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bb60732ade0c..3718d55fb4c3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1224,6 +1224,13 @@ static void ftrace_shutdown(int command)
                return;
        ftrace_start_up--;
+        /*
+         * Just warn in case of unbalance, no need to kill ftrace, it's not
+         * critical but the ftrace_call callers may be never nopped again after
+         * further ftrace uses.
+         */
+        WARN_ON_ONCE(ftrace_start_up < 0);
        if (!ftrace_start_up)
                command |= FTRACE_DISABLE_CALLS;
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 86cdf671d7e2..1edaa9516e81 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -186,7 +186,7 @@ static int kmem_trace_init(struct trace_array *tr)
        int cpu;
        kmemtrace_array = tr;
-        for_each_cpu_mask(cpu, cpu_possible_map)
+        for_each_cpu(cpu, cpu_possible_mask)
                tracing_reset(tr, cpu);
        kmemtrace_start_probes();
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index dc4dc70171ce..04dac2638258 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -206,6 +206,7 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
 #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
 #define RB_ALIGNMENT            4U
 #define RB_MAX_SMALL_DATA       (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
+#define RB_EVNT_MIN_SIZE        8U      /* two 32bit words */
 /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
 #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -415,6 +416,8 @@ struct ring_buffer_per_cpu {
        unsigned long                   overrun;
        unsigned long                   read;
        local_t                         entries;
+        local_t                         committing;
+        local_t                         commits;
        u64                             write_stamp;
        u64                             read_stamp;
        atomic_t                        record_disabled;
@@ -618,12 +621,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
        kfree(cpu_buffer);
 }
-/*
- * Causes compile errors if the struct buffer_page gets bigger
- * than the struct page.
- */
-extern int ring_buffer_page_too_big(void);
 #ifdef CONFIG_HOTPLUG_CPU
 static int rb_cpu_notify(struct notifier_block *self,
                         unsigned long action, void *hcpu);
@@ -646,11 +643,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
        int bsize;
        int cpu;
-        /* Paranoid! Optimizes out when all is well */
-        if (sizeof(struct buffer_page) > sizeof(struct page))
-                ring_buffer_page_too_big();
        /* keep it in its own cache line */
        buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
                         GFP_KERNEL);
@@ -666,8 +658,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
        buffer->reader_lock_key = key;
        /* need at least two pages */
-        if (buffer->pages == 1)
+        if (buffer->pages < 2)
-                buffer->pages++;
+                buffer->pages = 2;
        /*
         * In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -1011,12 +1003,12 @@ rb_event_index(struct ring_buffer_event *event)
 {
        unsigned long addr = (unsigned long)event;
-        return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
+        return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
 }
 static inline int
-rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
-             struct ring_buffer_event *event)
+                   struct ring_buffer_event *event)
 {
        unsigned long addr = (unsigned long)event;
        unsigned long index;
@@ -1029,31 +1021,6 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
 }
 static void
-rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
-                    struct ring_buffer_event *event)
-{
-        unsigned long addr = (unsigned long)event;
-        unsigned long index;
-        index = rb_event_index(event);
-        addr &= PAGE_MASK;
-        while (cpu_buffer->commit_page->page != (void *)addr) {
-                if (RB_WARN_ON(cpu_buffer,
-                          cpu_buffer->commit_page == cpu_buffer->tail_page))
-                        return;
-                cpu_buffer->commit_page->page->commit =
-                        cpu_buffer->commit_page->write;
-                rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
-                cpu_buffer->write_stamp =
-                        cpu_buffer->commit_page->page->time_stamp;
-        }
-        /* Now set the commit to the event's index */
-        local_set(&cpu_buffer->commit_page->page->commit, index);
-}
-static void
 rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
 {
        /*
@@ -1171,6 +1138,60 @@ static unsigned rb_calculate_event_length(unsigned length)
        return length;
 }
+static inline void
+rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
+              struct buffer_page *tail_page,
+              unsigned long tail, unsigned long length)
+{
+        struct ring_buffer_event *event;
+        /*
+         * Only the event that crossed the page boundary
+         * must fill the old tail_page with padding.
+         */
+        if (tail >= BUF_PAGE_SIZE) {
+                local_sub(length, &tail_page->write);
+                return;
+        }
+        event = __rb_page_index(tail_page, tail);
+        kmemcheck_annotate_bitfield(event, bitfield);
+        /*
+         * If this event is bigger than the minimum size, then
+         * we need to be careful that we don't subtract the
+         * write counter enough to allow another writer to slip
+         * in on this page.
+         * We put in a discarded commit instead, to make sure
+         * that this space is not used again.
+         *
+         * If we are less than the minimum size, we don't need to
+         * worry about it.
+         */
+        if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
+                /* No room for any events */
+                /* Mark the rest of the page with padding */
+                rb_event_set_padding(event);
+                /* Set the write back to the previous setting */
+                local_sub(length, &tail_page->write);
+                return;
+        }
+        /* Put in a discarded event */
+        event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
+        event->type_len = RINGBUF_TYPE_PADDING;
+        /* time delta must be non zero */
+        event->time_delta = 1;
+        /* Account for this as an entry */
+        local_inc(&tail_page->entries);
+        local_inc(&cpu_buffer->entries);
+        /* Set write to end of buffer */
+        length = (tail + length) - BUF_PAGE_SIZE;
+        local_sub(length, &tail_page->write);
+}
 static struct ring_buffer_event *
 rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
@@ -1180,7 +1201,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 {
        struct buffer_page *next_page, *head_page, *reader_page;
        struct ring_buffer *buffer = cpu_buffer->buffer;
-        struct ring_buffer_event *event;
        bool lock_taken = false;
        unsigned long flags;
@@ -1265,27 +1285,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
                cpu_buffer->tail_page->page->time_stamp = *ts;
        }
-        /*
+        rb_reset_tail(cpu_buffer, tail_page, tail, length);
-         * The actual tail page has moved forward.
-         */
-        if (tail < BUF_PAGE_SIZE) {
-                /* Mark the rest of the page with padding */
-                event = __rb_page_index(tail_page, tail);
-                kmemcheck_annotate_bitfield(event, bitfield);
-                rb_event_set_padding(event);
-        }
-        /* Set the write back to the previous setting */
-        local_sub(length, &tail_page->write);
-        /*
-         * If this was a commit entry that failed,
-         * increment that too
-         */
-        if (tail_page == cpu_buffer->commit_page &&
-            tail == rb_commit_index(cpu_buffer)) {
-                rb_set_commit_to_write(cpu_buffer);
-        }
        __raw_spin_unlock(&cpu_buffer->lock);
        local_irq_restore(flags);
@@ -1295,7 +1295,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 out_reset:
        /* reset write */
-        local_sub(length, &tail_page->write);
+        rb_reset_tail(cpu_buffer, tail_page, tail, length);
        if (likely(lock_taken))
                __raw_spin_unlock(&cpu_buffer->lock);
@@ -1325,9 +1325,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
        /* We reserved something on the buffer */
-        if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
-                return NULL;
        event = __rb_page_index(tail_page, tail);
        kmemcheck_annotate_bitfield(event, bitfield);
        rb_update_event(event, type, length);
@@ -1337,11 +1334,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
                local_inc(&tail_page->entries);
        /*
-         * If this is a commit and the tail is zero, then update
+         * If this is the first commit on the page, then update
-         * this page's time stamp.
+         * its timestamp.
         */
-        if (!tail && rb_is_commit(cpu_buffer, event))
+        if (!tail)
-                cpu_buffer->commit_page->page->time_stamp = *ts;
+                tail_page->page->time_stamp = *ts;
        return event;
 }
@@ -1410,16 +1407,16 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
                return -EAGAIN;
        /* Only a commited time event can update the write stamp */
-        if (rb_is_commit(cpu_buffer, event)) {
+        if (rb_event_is_commit(cpu_buffer, event)) {
                /*
-                 * If this is the first on the page, then we need to
+                 * If this is the first on the page, then it was
-                 * update the page itself, and just put in a zero.
+                 * updated with the page itself. Try to discard it
+                 * and if we can't just make it zero.
                 */
                if (rb_event_index(event)) {
                        event->time_delta = *delta & TS_MASK;
                        event->array[0] = *delta >> TS_SHIFT;
                } else {
-                        cpu_buffer->commit_page->page->time_stamp = *ts;
                        /* try to discard, since we do not need this */
                        if (!rb_try_to_discard(cpu_buffer, event)) {
                                /* nope, just zero it */
@@ -1445,6 +1442,44 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
        return ret;
 }
+static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
+{
+        local_inc(&cpu_buffer->committing);
+        local_inc(&cpu_buffer->commits);
+}
+static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
+{
+        unsigned long commits;
+        if (RB_WARN_ON(cpu_buffer,
+                       !local_read(&cpu_buffer->committing)))
+                return;
+ again:
+        commits = local_read(&cpu_buffer->commits);
+        /* synchronize with interrupts */
+        barrier();
+        if (local_read(&cpu_buffer->committing) == 1)
+                rb_set_commit_to_write(cpu_buffer);
+        local_dec(&cpu_buffer->committing);
+        /* synchronize with interrupts */
+        barrier();
+        /*
+         * Need to account for interrupts coming in between the
+         * updating of the commit page and the clearing of the
+         * committing counter.
+         */
+        if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
+            !local_read(&cpu_buffer->committing)) {
+                local_inc(&cpu_buffer->committing);
+                goto again;
+        }
+}
 static struct ring_buffer_event *
 rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
                      unsigned long length)
@@ -1454,6 +1489,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
        int commit = 0;
        int nr_loops = 0;
+        rb_start_commit(cpu_buffer);
        length = rb_calculate_event_length(length);
 again:
        /*
@@ -1466,7 +1503,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
         * Bail!
         */
        if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
-                return NULL;
+                goto out_fail;
        ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
@@ -1497,7 +1534,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
                        commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
                        if (commit == -EBUSY)
-                                return NULL;
+                                goto out_fail;
                        if (commit == -EAGAIN)
                                goto again;
@@ -1511,28 +1548,19 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
        if (unlikely(PTR_ERR(event) == -EAGAIN))
                goto again;
-        if (!event) {
+        if (!event)
-                if (unlikely(commit))
+                goto out_fail;
-                        /*
-                         * Ouch! We needed a timestamp and it was commited. But
-                         * we didn't get our event reserved.
-                         */
-                        rb_set_commit_to_write(cpu_buffer);
-                return NULL;
-        }
-        /*
+        if (!rb_event_is_commit(cpu_buffer, event))
-         * If the timestamp was commited, make the commit our entry
-         * now so that we will update it when needed.
-         */
-        if (unlikely(commit))
-                rb_set_commit_event(cpu_buffer, event);
-        else if (!rb_is_commit(cpu_buffer, event))
                delta = 0;
        event->time_delta = delta;
        return event;
+ out_fail:
+        rb_end_commit(cpu_buffer);
+        return NULL;
 }
 #define TRACE_RECURSIVE_DEPTH 16
@@ -1642,13 +1670,14 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
 {
        local_inc(&cpu_buffer->entries);
-        /* Only process further if we own the commit */
+        /*
-        if (!rb_is_commit(cpu_buffer, event))
+         * The event first in the commit queue updates the
-                return;
+         * time stamp.
+         */
-        cpu_buffer->write_stamp += event->time_delta;
+        if (rb_event_is_commit(cpu_buffer, event))
+                cpu_buffer->write_stamp += event->time_delta;
-        rb_set_commit_to_write(cpu_buffer);
+        rb_end_commit(cpu_buffer);
 }
 /**
@@ -1737,15 +1766,15 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
        /* The event is discarded regardless */
        rb_event_discard(event);
+        cpu = smp_processor_id();
+        cpu_buffer = buffer->buffers[cpu];
        /*
         * This must only be called if the event has not been
         * committed yet. Thus we can assume that preemption
         * is still disabled.
         */
-        RB_WARN_ON(buffer, preemptible());
+        RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
-        cpu = smp_processor_id();
-        cpu_buffer = buffer->buffers[cpu];
        if (!rb_try_to_discard(cpu_buffer, event))
                goto out;
@@ -1756,13 +1785,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
         */
        local_inc(&cpu_buffer->entries);
 out:
-        /*
+        rb_end_commit(cpu_buffer);
-         * If a write came in and pushed the tail page
-         * we still need to update the commit pointer
-         * if we were the commit.
-         */
-        if (rb_is_commit(cpu_buffer, event))
-                rb_set_commit_to_write(cpu_buffer);
        trace_recursive_unlock();
@@ -2446,6 +2469,21 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
+static inline int rb_ok_to_lock(void)
+{
+        /*
+         * If an NMI die dumps out the content of the ring buffer
+         * do not grab locks. We also permanently disable the ring
+         * buffer too. A one time deal is all you get from reading
+         * the ring buffer from an NMI.
+         */
+        if (likely(!in_nmi() && !oops_in_progress))
+                return 1;
+        tracing_off_permanent();
+        return 0;
+}
 /**
 * ring_buffer_peek - peek at the next event to be read
 * @buffer: The ring buffer to read
@@ -2461,14 +2499,20 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
        struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
        struct ring_buffer_event *event;
        unsigned long flags;
+        int dolock;
        if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return NULL;
+        dolock = rb_ok_to_lock();
 again:
-        spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+        local_irq_save(flags);
+        if (dolock)
+                spin_lock(&cpu_buffer->reader_lock);
        event = rb_buffer_peek(buffer, cpu, ts);
-        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+        if (dolock)
+                spin_unlock(&cpu_buffer->reader_lock);
+        local_irq_restore(flags);
        if (event && event->type_len == RINGBUF_TYPE_PADDING) {
                cpu_relax();
@@ -2520,6 +2564,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
        struct ring_buffer_per_cpu *cpu_buffer;
        struct ring_buffer_event *event = NULL;
        unsigned long flags;
+        int dolock;
+        dolock = rb_ok_to_lock();
 again:
        /* might be called in atomic */
@@ -2529,7 +2576,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
                goto out;
        cpu_buffer = buffer->buffers[cpu];
-        spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+        local_irq_save(flags);
+        if (dolock)
+                spin_lock(&cpu_buffer->reader_lock);
        event = rb_buffer_peek(buffer, cpu, ts);
        if (!event)
@@ -2538,7 +2587,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
        rb_advance_reader(cpu_buffer);
 out_unlock:
-        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+        if (dolock)
+                spin_unlock(&cpu_buffer->reader_lock);
+        local_irq_restore(flags);
 out:
        preempt_enable();
@@ -2680,6 +2731,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->overrun = 0;
        cpu_buffer->read = 0;
        local_set(&cpu_buffer->entries, 0);
+        local_set(&cpu_buffer->committing, 0);
+        local_set(&cpu_buffer->commits, 0);
        cpu_buffer->write_stamp = 0;
        cpu_buffer->read_stamp = 0;
@@ -2734,12 +2787,25 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset);
 int ring_buffer_empty(struct ring_buffer *buffer)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
+        unsigned long flags;
+        int dolock;
        int cpu;
+        int ret;
+        dolock = rb_ok_to_lock();
        /* yes this is racy, but if you don't like the race, lock the buffer */
        for_each_buffer_cpu(buffer, cpu) {
                cpu_buffer = buffer->buffers[cpu];
-                if (!rb_per_cpu_empty(cpu_buffer))
+                local_irq_save(flags);
+                if (dolock)
+                        spin_lock(&cpu_buffer->reader_lock);
+                ret = rb_per_cpu_empty(cpu_buffer);
+                if (dolock)
+                        spin_unlock(&cpu_buffer->reader_lock);
+                local_irq_restore(flags);
+                if (!ret)
                        return 0;
        }
@@ -2755,14 +2821,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty);
 int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
+        unsigned long flags;
+        int dolock;
        int ret;
        if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return 1;
+        dolock = rb_ok_to_lock();
        cpu_buffer = buffer->buffers[cpu];
+        local_irq_save(flags);
+        if (dolock)
+                spin_lock(&cpu_buffer->reader_lock);
        ret = rb_per_cpu_empty(cpu_buffer);
+        if (dolock)
+                spin_unlock(&cpu_buffer->reader_lock);
+        local_irq_restore(flags);
        return ret;
 }
@@ -3108,7 +3183,7 @@ static int rb_cpu_notify(struct notifier_block *self,
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
-                if (cpu_isset(cpu, *buffer->cpumask))
+                if (cpumask_test_cpu(cpu, buffer->cpumask))
                        return NOTIFY_OK;
                buffer->buffers[cpu] =
@@ -3119,7 +3194,7 @@ static int rb_cpu_notify(struct notifier_block *self,
                        return NOTIFY_OK;
                }
                smp_wmb();
-                cpu_set(cpu, *buffer->cpumask);
+                cpumask_set_cpu(cpu, buffer->cpumask);
                break;
        case CPU_DOWN_PREPARE:
        case CPU_DOWN_PREPARE_FROZEN:
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 8d68e149a8b3..573d3cc762c3 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -102,8 +102,10 @@ static enum event_status read_page(int cpu)
                        event = (void *)&rpage->data[i];
                        switch (event->type_len) {
                        case RINGBUF_TYPE_PADDING:
-                                /* We don't expect any padding */
+                                /* failed writes may be discarded events */
-                                KILL_TEST();
+                                if (!event->time_delta)
+                                        KILL_TEST();
+                                inc = event->array[0] + 4;
                                break;
                        case RINGBUF_TYPE_TIME_EXTEND:
                                inc = 8;
@@ -119,7 +121,7 @@ static enum event_status read_page(int cpu)
                                        KILL_TEST();
                                        break;
                                }
-                                inc = event->array[0];
+                                inc = event->array[0] + 4;
                                break;
                        default:
                                entry = ring_buffer_event_data(event);
@@ -201,7 +203,7 @@ static void ring_buffer_producer(void)
         * Hammer the buffer for 10 secs (this may
         * make the system stall)
         */
-        pr_info("Starting ring buffer hammer\n");
+        trace_printk("Starting ring buffer hammer\n");
        do_gettimeofday(&start_tv);
        do {
                struct ring_buffer_event *event;
@@ -237,7 +239,7 @@ static void ring_buffer_producer(void)
 #endif
        } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
-        pr_info("End ring buffer hammer\n");
+        trace_printk("End ring buffer hammer\n");
        if (consumer) {
                /* Init both completions here to avoid races */
@@ -260,49 +262,50 @@ static void ring_buffer_producer(void)
        overruns = ring_buffer_overruns(buffer);
        if (kill_test)
-                pr_info("ERROR!\n");
+                trace_printk("ERROR!\n");
-        pr_info("Time:     %lld (usecs)\n", time);
+        trace_printk("Time:     %lld (usecs)\n", time);
-        pr_info("Overruns: %lld\n", overruns);
+        trace_printk("Overruns: %lld\n", overruns);
        if (disable_reader)
-                pr_info("Read:     (reader disabled)\n");
+                trace_printk("Read:     (reader disabled)\n");
        else
-                pr_info("Read:     %ld  (by %s)\n", read,
+                trace_printk("Read:     %ld  (by %s)\n", read,
                        read_events ? "events" : "pages");
-        pr_info("Entries:  %lld\n", entries);
+        trace_printk("Entries:  %lld\n", entries);
-        pr_info("Total:    %lld\n", entries + overruns + read);
+        trace_printk("Total:    %lld\n", entries + overruns + read);
-        pr_info("Missed:   %ld\n", missed);
+        trace_printk("Missed:   %ld\n", missed);
-        pr_info("Hit:      %ld\n", hit);
+        trace_printk("Hit:      %ld\n", hit);
        /* Convert time from usecs to millisecs */
        do_div(time, USEC_PER_MSEC);
        if (time)
                hit /= (long)time;
        else
-                pr_info("TIME IS ZERO??\n");
+                trace_printk("TIME IS ZERO??\n");
-        pr_info("Entries per millisec: %ld\n", hit);
+        trace_printk("Entries per millisec: %ld\n", hit);
        if (hit) {
                /* Calculate the average time in nanosecs */
                avg = NSEC_PER_MSEC / hit;
-                pr_info("%ld ns per entry\n", avg);
+                trace_printk("%ld ns per entry\n", avg);
        }
        if (missed) {
                if (time)
                        missed /= (long)time;
-                pr_info("Total iterations per millisec: %ld\n", hit + missed);
+                trace_printk("Total iterations per millisec: %ld\n",
+                             hit + missed);
                /* it is possible that hit + missed will overflow and be zero */
                if (!(hit + missed)) {
-                        pr_info("hit + missed overflowed and totalled zero!\n");
+                        trace_printk("hit + missed overflowed and totalled zero!\n");
                        hit--; /* make it non zero */
                }
                /* Caculate the average time in nanosecs */
                avg = NSEC_PER_MSEC / (hit + missed);
-                pr_info("%ld ns per entry\n", avg);
+                trace_printk("%ld ns per entry\n", avg);
        }
 }
@@ -353,7 +356,7 @@ static int ring_buffer_producer_thread(void *arg)
                ring_buffer_producer();
-                pr_info("Sleeping for 10 secs\n");
+                trace_printk("Sleeping for 10 secs\n");
                set_current_state(TASK_INTERRUPTIBLE);
                schedule_timeout(HZ * SLEEP_TIME);
                __set_current_state(TASK_RUNNING);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c1878bfb2e1e..076fa6f0ee48 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2191,11 +2191,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
        if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
                return -ENOMEM;
-        mutex_lock(&tracing_cpumask_update_lock);
        err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
        if (err)
                goto err_unlock;
+        mutex_lock(&tracing_cpumask_update_lock);
        local_irq_disable();
        __raw_spin_lock(&ftrace_max_lock);
        for_each_tracing_cpu(cpu) {
@@ -2223,8 +2224,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
        return count;
 err_unlock:
-        mutex_unlock(&tracing_cpumask_update_lock);
+        free_cpumask_var(tracing_cpumask_new);
-        free_cpumask_var(tracing_cpumask);
        return err;
 }
@@ -3626,7 +3626,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
        struct trace_seq *s;
        unsigned long cnt;
-        s = kmalloc(sizeof(*s), GFP_ATOMIC);
+        s = kmalloc(sizeof(*s), GFP_KERNEL);
        if (!s)
                return ENOMEM;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index db6e54bdb596..936c621bbf46 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -27,8 +27,6 @@
 #include "trace.h"
 #include "trace_output.h"
-static DEFINE_MUTEX(filter_mutex);
 enum filter_op_ids
 {
        OP_OR,
@@ -178,7 +176,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
 static int filter_pred_strloc(struct filter_pred *pred, void *event,
                              int val1, int val2)
 {
-        int str_loc = *(int *)(event + pred->offset);
+        unsigned short str_loc = *(unsigned short *)(event + pred->offset);
        char *addr = (char *)(event + str_loc);
        int cmp, match;
@@ -294,12 +292,12 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
 {
        struct event_filter *filter = call->filter;
-        mutex_lock(&filter_mutex);
+        mutex_lock(&event_mutex);
        if (filter->filter_string)
                trace_seq_printf(s, "%s\n", filter->filter_string);
        else
                trace_seq_printf(s, "none\n");
-        mutex_unlock(&filter_mutex);
+        mutex_unlock(&event_mutex);
 }
 void print_subsystem_event_filter(struct event_subsystem *system,
@@ -307,12 +305,12 @@ void print_subsystem_event_filter(struct event_subsystem *system,
 {
        struct event_filter *filter = system->filter;
-        mutex_lock(&filter_mutex);
+        mutex_lock(&event_mutex);
        if (filter->filter_string)
                trace_seq_printf(s, "%s\n", filter->filter_string);
        else
                trace_seq_printf(s, "none\n");
-        mutex_unlock(&filter_mutex);
+        mutex_unlock(&event_mutex);
 }
 static struct ftrace_event_field *
@@ -381,6 +379,7 @@ void destroy_preds(struct ftrace_event_call *call)
                        filter_free_pred(filter->preds[i]);
        }
        kfree(filter->preds);
+        kfree(filter->filter_string);
        kfree(filter);
        call->filter = NULL;
 }
@@ -433,7 +432,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
                filter->n_preds = 0;
        }
-        mutex_lock(&event_mutex);
        list_for_each_entry(call, &ftrace_events, list) {
                if (!call->define_fields)
                        continue;
@@ -443,7 +441,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
                        remove_filter_string(call->filter);
                }
        }
-        mutex_unlock(&event_mutex);
 }
 static int filter_add_pred_fn(struct filter_parse_state *ps,
@@ -546,6 +543,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
        filter_pred_fn_t fn;
        unsigned long long val;
        int string_type;
+        int ret;
        pred->fn = filter_pred_none;
@@ -581,7 +579,11 @@ static int filter_add_pred(struct filter_parse_state *ps,
                        pred->not = 1;
                return filter_add_pred_fn(ps, call, pred, fn);
        } else {
-                if (strict_strtoull(pred->str_val, 0, &val)) {
+                if (field->is_signed)
+                        ret = strict_strtoll(pred->str_val, 0, &val);
+                else
+                        ret = strict_strtoull(pred->str_val, 0, &val);
+                if (ret) {
                        parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
                        return -EINVAL;
                }
@@ -625,7 +627,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
        filter->preds[filter->n_preds] = pred;
        filter->n_preds++;
-        mutex_lock(&event_mutex);
        list_for_each_entry(call, &ftrace_events, list) {
                if (!call->define_fields)
@@ -636,14 +637,12 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
                err = filter_add_pred(ps, call, pred);
                if (err) {
-                        mutex_unlock(&event_mutex);
                        filter_free_subsystem_preds(system);
                        parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
                        goto out;
                }
                replace_filter_string(call->filter, filter_string);
        }
-        mutex_unlock(&event_mutex);
 out:
        return err;
 }
@@ -1070,12 +1069,12 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
        struct filter_parse_state *ps;
-        mutex_lock(&filter_mutex);
+        mutex_lock(&event_mutex);
        if (!strcmp(strstrip(filter_string), "0")) {
                filter_disable_preds(call);
                remove_filter_string(call->filter);
-                mutex_unlock(&filter_mutex);
+                mutex_unlock(&event_mutex);
                return 0;
        }
@@ -1103,7 +1102,7 @@ out:
        postfix_clear(ps);
        kfree(ps);
 out_unlock:
-        mutex_unlock(&filter_mutex);
+        mutex_unlock(&event_mutex);
        return err;
 }
@@ -1115,12 +1114,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
        struct filter_parse_state *ps;
-        mutex_lock(&filter_mutex);
+        mutex_lock(&event_mutex);
        if (!strcmp(strstrip(filter_string), "0")) {
                filter_free_subsystem_preds(system);
                remove_filter_string(system->filter);
-                mutex_unlock(&filter_mutex);
+                mutex_unlock(&event_mutex);
                return 0;
        }
@@ -1148,7 +1147,7 @@ out:
        postfix_clear(ps);
        kfree(ps);
 out_unlock:
-        mutex_unlock(&filter_mutex);
+        mutex_unlock(&event_mutex);
        return err;
 }
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index c9a0b7df44ff..90f134764837 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -193,9 +193,11 @@ static void tracing_start_function_trace(void)
 static void tracing_stop_function_trace(void)
 {
        ftrace_function_enabled = 0;
-        /* OK if they are not registered */
-        unregister_ftrace_function(&trace_stack_ops);
+        if (func_flags.val & TRACE_FUNC_OPT_STACK)
-        unregister_ftrace_function(&trace_ops);
+                unregister_ftrace_function(&trace_stack_ops);
+        else
+                unregister_ftrace_function(&trace_ops);
 }
 static int func_set_flag(u32 old_flags, u32 bit, int set)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8b592418d8b2..d2249abafb53 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -57,7 +57,8 @@ static struct tracer_flags tracer_flags = {
 /* Add a function return address to the trace stack on thread info.*/
 int
-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
+ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
+                         unsigned long frame_pointer)
 {
        unsigned long long calltime;
        int index;
@@ -85,6 +86,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
        current->ret_stack[index].func = func;
        current->ret_stack[index].calltime = calltime;
        current->ret_stack[index].subtime = 0;
+        current->ret_stack[index].fp = frame_pointer;
        *depth = index;
        return 0;
@@ -92,7 +94,8 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
 /* Retrieve a function return address to the trace stack on thread info.*/
 static void
-ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
+ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
+                        unsigned long frame_pointer)
 {
        int index;
@@ -106,6 +109,31 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
                return;
        }
+#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
+        /*
+         * The arch may choose to record the frame pointer used
+         * and check it here to make sure that it is what we expect it
+         * to be. If gcc does not set the place holder of the return
+         * address in the frame pointer, and does a copy instead, then
+         * the function graph trace will fail. This test detects this
+         * case.
+         *
+         * Currently, x86_32 with optimize for size (-Os) makes the latest
+         * gcc do the above.
+         */
+        if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
+                ftrace_graph_stop();
+                WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
+                     "  from func %pF return to %lx\n",
+                     current->ret_stack[index].fp,
+                     frame_pointer,
+                     (void *)current->ret_stack[index].func,
+                     current->ret_stack[index].ret);
+                *ret = (unsigned long)panic;
+                return;
+        }
+#endif
        *ret = current->ret_stack[index].ret;
        trace->func = current->ret_stack[index].func;
        trace->calltime = current->ret_stack[index].calltime;
@@ -117,12 +145,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 * Send the trace to the ring-buffer.
 * @return the original return address.
 */
-unsigned long ftrace_return_to_handler(void)
+unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 {
        struct ftrace_graph_ret trace;
        unsigned long ret;
-        ftrace_pop_return_trace(&trace, &ret);
+        ftrace_pop_return_trace(&trace, &ret, frame_pointer);
        trace.rettime = trace_clock_local();
        ftrace_graph_return(&trace);
        barrier();
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 815237a55af8..8a82b4b8ea52 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -15,6 +15,16 @@
 #include <linux/err.h>
 #include <linux/slab.h>
+static struct uts_namespace *create_uts_ns(void)
+{
+        struct uts_namespace *uts_ns;
+        uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+        if (uts_ns)
+                kref_init(&uts_ns->kref);
+        return uts_ns;
+}
 /*
 * Clone a new ns copying an original utsname, setting refcount to 1
 * @old_ns: namespace to clone
@@ -24,14 +34,13 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
 {
        struct uts_namespace *ns;
-        ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+        ns = create_uts_ns();
        if (!ns)
                return ERR_PTR(-ENOMEM);
        down_read(&uts_sem);
        memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
        up_read(&uts_sem);
-        kref_init(&ns->kref);
        return ns;
 }