32 files changed, 1721 insertions, 536 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index f6328e16dfdd..985ddb7da4d0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,8 +11,6 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
            hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
            notifier.o ksysfs.o pm_qos_params.o sched_clock.o
-CFLAGS_REMOVE_sched.o = -mno-spe
 ifdef CONFIG_FTRACE
 # Do not trace debug files and internal ftrace files
 CFLAGS_REMOVE_lockdep.o = -pg
@@ -21,6 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
 CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
+CFLAGS_REMOVE_sched.o = -mno-spe -pg
 endif
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
@@ -39,6 +38,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
 obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
 obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
 obj-$(CONFIG_SMP) += spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
diff --git a/kernel/cpu.c b/kernel/cpu.c
index b11f06dc149a..cfb1d43ab801 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -299,6 +299,7 @@ int __ref cpu_down(unsigned int cpu)
        cpu_maps_update_done();
        return err;
 }
+EXPORT_SYMBOL(cpu_down);
 #endif /*CONFIG_HOTPLUG_CPU*/
 /* Requires cpu_add_remove_lock to be held */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 459d601947a8..d2cc67dac8b1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -679,7 +679,9 @@ restart:
                                if (apn == b->pn) {
                                        cpus_or(*dp, *dp, b->cpus_allowed);
                                        b->pn = -1;
-                                        update_domain_attr(dattr, b);
+                                        if (dattr)
+                                                update_domain_attr(dattr
+                                                                   + nslot, b);
                                }
                        }
                        nslot++;
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index a9e6bad9f706..c1ef192aa655 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -65,7 +65,7 @@ lookup_exec_domain(u_long personality)
                                goto out;
        }
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
        read_unlock(&exec_domains_lock);
        request_module("personality-%ld", pers);
        read_lock(&exec_domains_lock);
diff --git a/kernel/exit.c b/kernel/exit.c
index ceb258782835..93d2711b9381 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -71,7 +71,7 @@ static void __unhash_process(struct task_struct *p)
                __get_cpu_var(process_counts)--;
        }
        list_del_rcu(&p->thread_group);
-        remove_parent(p);
+        list_del_init(&p->sibling);
 }
 /*
@@ -152,6 +152,18 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
        put_task_struct(container_of(rhp, struct task_struct, rcu));
 }
+/*
+ * Do final ptrace-related cleanup of a zombie being reaped.
+ *
+ * Called with write_lock(&tasklist_lock) held.
+ */
+static void ptrace_release_task(struct task_struct *p)
+{
+        BUG_ON(!list_empty(&p->ptraced));
+        ptrace_unlink(p);
+        BUG_ON(!list_empty(&p->ptrace_entry));
+}
 void release_task(struct task_struct * p)
 {
        struct task_struct *leader;
@@ -160,8 +172,7 @@ repeat:
        atomic_dec(&p->user->processes);
        proc_flush_task(p);
        write_lock_irq(&tasklist_lock);
-        ptrace_unlink(p);
+        ptrace_release_task(p);
-        BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
        __exit_signal(p);
        /*
@@ -315,9 +326,8 @@ static void reparent_to_kthreadd(void)
        ptrace_unlink(current);
        /* Reparent to init */
-        remove_parent(current);
        current->real_parent = current->parent = kthreadd_task;
-        add_parent(current);
+        list_move_tail(&current->sibling, &current->real_parent->children);
        /* Set the exit signal to SIGCHLD so we signal init on exit */
        current->exit_signal = SIGCHLD;
@@ -692,37 +702,97 @@ static void exit_mm(struct task_struct * tsk)
        mmput(mm);
 }
-static void
+/*
-reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
+ * Return nonzero if @parent's children should reap themselves.
+ *
+ * Called with write_lock_irq(&tasklist_lock) held.
+ */
+static int ignoring_children(struct task_struct *parent)
 {
-        if (p->pdeath_signal)
+        int ret;
-                /* We already hold the tasklist_lock here.  */
+        struct sighand_struct *psig = parent->sighand;
-                group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
+        unsigned long flags;
+        spin_lock_irqsave(&psig->siglock, flags);
+        ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
+               (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT));
+        spin_unlock_irqrestore(&psig->siglock, flags);
+        return ret;
+}
-        /* Move the child from its dying parent to the new one.  */
+/*
-        if (unlikely(traced)) {
+ * Detach all tasks we were using ptrace on.
-                /* Preserve ptrace links if someone else is tracing this child.  */
+ * Any that need to be release_task'd are put on the @dead list.
-                list_del_init(&p->ptrace_list);
+ *
-                if (ptrace_reparented(p))
+ * Called with write_lock(&tasklist_lock) held.
-                        list_add(&p->ptrace_list, &p->real_parent->ptrace_children);
+ */
-        } else {
+static void ptrace_exit(struct task_struct *parent, struct list_head *dead)
-                /* If this child is being traced, then we're the one tracing it
+{
-                 * anyway, so let go of it.
+        struct task_struct *p, *n;
+        int ign = -1;
+        list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) {
+                __ptrace_unlink(p);
+                if (p->exit_state != EXIT_ZOMBIE)
+                        continue;
+                /*
+                 * If it's a zombie, our attachedness prevented normal
+                 * parent notification or self-reaping.  Do notification
+                 * now if it would have happened earlier.  If it should
+                 * reap itself, add it to the @dead list.  We can't call
+                 * release_task() here because we already hold tasklist_lock.
+                 *
+                 * If it's our own child, there is no notification to do.
+                 * But if our normal children self-reap, then this child
+                 * was prevented by ptrace and we must reap it now.
                 */
-                p->ptrace = 0;
+                if (!task_detached(p) && thread_group_empty(p)) {
-                remove_parent(p);
+                        if (!same_thread_group(p->real_parent, parent))
-                p->parent = p->real_parent;
+                                do_notify_parent(p, p->exit_signal);
-                add_parent(p);
+                        else {
+                                if (ign < 0)
+                                        ign = ignoring_children(parent);
+                                if (ign)
+                                        p->exit_signal = -1;
+                        }
+                }
-                if (task_is_traced(p)) {
+                if (task_detached(p)) {
                        /*
-                         * If it was at a trace stop, turn it into
+                         * Mark it as in the process of being reaped.
-                         * a normal stop since it's no longer being
-                         * traced.
                         */
-                        ptrace_untrace(p);
+                        p->exit_state = EXIT_DEAD;
+                        list_add(&p->ptrace_entry, dead);
                }
        }
+}
+/*
+ * Finish up exit-time ptrace cleanup.
+ *
+ * Called without locks.
+ */
+static void ptrace_exit_finish(struct task_struct *parent,
+                               struct list_head *dead)
+{
+        struct task_struct *p, *n;
+        BUG_ON(!list_empty(&parent->ptraced));
+        list_for_each_entry_safe(p, n, dead, ptrace_entry) {
+                list_del_init(&p->ptrace_entry);
+                release_task(p);
+        }
+}
+static void reparent_thread(struct task_struct *p, struct task_struct *father)
+{
+        if (p->pdeath_signal)
+                /* We already hold the tasklist_lock here.  */
+                group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
+        list_move_tail(&p->sibling, &p->real_parent->children);
        /* If this is a threaded reparent there is no need to
         * notify anyone anything has happened.
@@ -737,7 +807,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
        /* If we'd notified the old parent about this child's death,
         * also notify the new parent.
         */
-        if (!traced && p->exit_state == EXIT_ZOMBIE &&
+        if (!ptrace_reparented(p) &&
+            p->exit_state == EXIT_ZOMBIE &&
            !task_detached(p) && thread_group_empty(p))
                do_notify_parent(p, p->exit_signal);
@@ -754,12 +825,15 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
 static void forget_original_parent(struct task_struct *father)
 {
        struct task_struct *p, *n, *reaper = father;
-        struct list_head ptrace_dead;
+        LIST_HEAD(ptrace_dead);
-        INIT_LIST_HEAD(&ptrace_dead);
        write_lock_irq(&tasklist_lock);
+        /*
+         * First clean up ptrace if we were using it.
+         */
+        ptrace_exit(father, &ptrace_dead);
        do {
                reaper = next_thread(reaper);
                if (reaper == father) {
@@ -768,58 +842,19 @@ static void forget_original_parent(struct task_struct *father)
                }
        } while (reaper->flags & PF_EXITING);
-        /*
-         * There are only two places where our children can be:
-         *
-         * - in our child list
-         * - in our ptraced child list
-         *
-         * Search them and reparent children.
-         */
        list_for_each_entry_safe(p, n, &father->children, sibling) {
-                int ptrace;
-                ptrace = p->ptrace;
-                /* if father isn't the real parent, then ptrace must be enabled */
-                BUG_ON(father != p->real_parent && !ptrace);
-                if (father == p->real_parent) {
-                        /* reparent with a reaper, real father it's us */
-                        p->real_parent = reaper;
-                        reparent_thread(p, father, 0);
-                } else {
-                        /* reparent ptraced task to its real parent */
-                        __ptrace_unlink (p);
-                        if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) &&
-                            thread_group_empty(p))
-                                do_notify_parent(p, p->exit_signal);
-                }
-                /*
-                 * if the ptraced child is a detached zombie we must collect
-                 * it before we exit, or it will remain zombie forever since
-                 * we prevented it from self-reap itself while it was being
-                 * traced by us, to be able to see it in wait4.
-                 */
-                if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p)))
-                        list_add(&p->ptrace_list, &ptrace_dead);
-        }
-        list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) {
                p->real_parent = reaper;
-                reparent_thread(p, father, 1);
+                if (p->parent == father) {
+                        BUG_ON(p->ptrace);
+                        p->parent = p->real_parent;
+                }
+                reparent_thread(p, father);
        }
        write_unlock_irq(&tasklist_lock);
        BUG_ON(!list_empty(&father->children));
-        BUG_ON(!list_empty(&father->ptrace_children));
-        list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) {
-                list_del_init(&p->ptrace_list);
-                release_task(p);
-        }
+        ptrace_exit_finish(father, &ptrace_dead);
 }
 /*
@@ -1180,13 +1215,6 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
                        return 0;
        }
-        /*
-         * Do not consider detached threads that are
-         * not ptraced:
-         */
-        if (task_detached(p) && !p->ptrace)
-                return 0;
        /* Wait for all children (clone and not) if __WALL is set;
         * otherwise, wait for clone children *only* if __WCLONE is
         * set; otherwise, wait for non-clone children *only*.  (Note:
@@ -1197,14 +1225,10 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
                return 0;
        err = security_task_wait(p);
-        if (likely(!err))
+        if (err)
-                return 1;
+                return err;
-        if (type != PIDTYPE_PID)
+        return 1;
-                return 0;
-        /* This child was explicitly requested, abort */
-        read_unlock(&tasklist_lock);
-        return err;
 }
 static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
@@ -1238,7 +1262,7 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
 * the lock and this task is uninteresting.  If we return nonzero, we have
 * released the lock and the system call should return.
 */
-static int wait_task_zombie(struct task_struct *p, int noreap,
+static int wait_task_zombie(struct task_struct *p, int options,
                            struct siginfo __user *infop,
                            int __user *stat_addr, struct rusage __user *ru)
 {
@@ -1246,7 +1270,10 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
        int retval, status, traced;
        pid_t pid = task_pid_vnr(p);
-        if (unlikely(noreap)) {
+        if (!likely(options & WEXITED))
+                return 0;
+        if (unlikely(options & WNOWAIT)) {
                uid_t uid = p->uid;
                int exit_code = p->exit_code;
                int why, status;
@@ -1396,21 +1423,24 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
 * the lock and this task is uninteresting.  If we return nonzero, we have
 * released the lock and the system call should return.
 */
-static int wait_task_stopped(struct task_struct *p,
+static int wait_task_stopped(int ptrace, struct task_struct *p,
-                             int noreap, struct siginfo __user *infop,
+                             int options, struct siginfo __user *infop,
                             int __user *stat_addr, struct rusage __user *ru)
 {
        int retval, exit_code, why;
        uid_t uid = 0; /* unneeded, required by compiler */
        pid_t pid;
+        if (!(options & WUNTRACED))
+                return 0;
        exit_code = 0;
        spin_lock_irq(&p->sighand->siglock);
        if (unlikely(!task_is_stopped_or_traced(p)))
                goto unlock_sig;
-        if (!(p->ptrace & PT_PTRACED) && p->signal->group_stop_count > 0)
+        if (!ptrace && p->signal->group_stop_count > 0)
                /*
                 * A group stop is in progress and this is the group leader.
                 * We won't report until all threads have stopped.
@@ -1421,7 +1451,7 @@ static int wait_task_stopped(struct task_struct *p,
        if (!exit_code)
                goto unlock_sig;
-        if (!noreap)
+        if (!unlikely(options & WNOWAIT))
                p->exit_code = 0;
        uid = p->uid;
@@ -1439,10 +1469,10 @@ unlock_sig:
         */
        get_task_struct(p);
        pid = task_pid_vnr(p);
-        why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
+        why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
        read_unlock(&tasklist_lock);
-        if (unlikely(noreap))
+        if (unlikely(options & WNOWAIT))
                return wait_noreap_copyout(p, pid, uid,
                                           why, exit_code,
                                           infop, ru);
@@ -1476,7 +1506,7 @@ unlock_sig:
 * the lock and this task is uninteresting.  If we return nonzero, we have
 * released the lock and the system call should return.
 */
-static int wait_task_continued(struct task_struct *p, int noreap,
+static int wait_task_continued(struct task_struct *p, int options,
                               struct siginfo __user *infop,
                               int __user *stat_addr, struct rusage __user *ru)
 {
@@ -1484,6 +1514,9 @@ static int wait_task_continued(struct task_struct *p, int noreap,
        pid_t pid;
        uid_t uid;
+        if (!unlikely(options & WCONTINUED))
+                return 0;
        if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
                return 0;
@@ -1493,7 +1526,7 @@ static int wait_task_continued(struct task_struct *p, int noreap,
                spin_unlock_irq(&p->sighand->siglock);
                return 0;
        }
-        if (!noreap)
+        if (!unlikely(options & WNOWAIT))
                p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
        spin_unlock_irq(&p->sighand->siglock);
@@ -1519,89 +1552,161 @@ static int wait_task_continued(struct task_struct *p, int noreap,
        return retval;
 }
+/*
+ * Consider @p for a wait by @parent.
+ *
+ * -ECHILD should be in *@notask_error before the first call.
+ * Returns nonzero for a final return, when we have unlocked tasklist_lock.
+ * Returns zero if the search for a child should continue;
+ * then *@notask_error is 0 if @p is an eligible child,
+ * or another error from security_task_wait(), or still -ECHILD.
+ */
+static int wait_consider_task(struct task_struct *parent, int ptrace,
+                              struct task_struct *p, int *notask_error,
+                              enum pid_type type, struct pid *pid, int options,
+                              struct siginfo __user *infop,
+                              int __user *stat_addr, struct rusage __user *ru)
+{
+        int ret = eligible_child(type, pid, options, p);
+        if (!ret)
+                return ret;
+        if (unlikely(ret < 0)) {
+                /*
+                 * If we have not yet seen any eligible child,
+                 * then let this error code replace -ECHILD.
+                 * A permission error will give the user a clue
+                 * to look for security policy problems, rather
+                 * than for mysterious wait bugs.
+                 */
+                if (*notask_error)
+                        *notask_error = ret;
+        }
+        if (likely(!ptrace) && unlikely(p->ptrace)) {
+                /*
+                 * This child is hidden by ptrace.
+                 * We aren't allowed to see it now, but eventually we will.
+                 */
+                *notask_error = 0;
+                return 0;
+        }
+        if (p->exit_state == EXIT_DEAD)
+                return 0;
+        /*
+         * We don't reap group leaders with subthreads.
+         */
+        if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
+                return wait_task_zombie(p, options, infop, stat_addr, ru);
+        /*
+         * It's stopped or running now, so it might
+         * later continue, exit, or stop again.
+         */
+        *notask_error = 0;
+        if (task_is_stopped_or_traced(p))
+                return wait_task_stopped(ptrace, p, options,
+                                         infop, stat_addr, ru);
+        return wait_task_continued(p, options, infop, stat_addr, ru);
+}
+/*
+ * Do the work of do_wait() for one thread in the group, @tsk.
+ *
+ * -ECHILD should be in *@notask_error before the first call.
+ * Returns nonzero for a final return, when we have unlocked tasklist_lock.
+ * Returns zero if the search for a child should continue; then
+ * *@notask_error is 0 if there were any eligible children,
+ * or another error from security_task_wait(), or still -ECHILD.
+ */
+static int do_wait_thread(struct task_struct *tsk, int *notask_error,
+                          enum pid_type type, struct pid *pid, int options,
+                          struct siginfo __user *infop, int __user *stat_addr,
+                          struct rusage __user *ru)
+{
+        struct task_struct *p;
+        list_for_each_entry(p, &tsk->children, sibling) {
+                /*
+                 * Do not consider detached threads.
+                 */
+                if (!task_detached(p)) {
+                        int ret = wait_consider_task(tsk, 0, p, notask_error,
+                                                     type, pid, options,
+                                                     infop, stat_addr, ru);
+                        if (ret)
+                                return ret;
+                }
+        }
+        return 0;
+}
+static int ptrace_do_wait(struct task_struct *tsk, int *notask_error,
+                          enum pid_type type, struct pid *pid, int options,
+                          struct siginfo __user *infop, int __user *stat_addr,
+                          struct rusage __user *ru)
+{
+        struct task_struct *p;
+        /*
+         * Traditionally we see ptrace'd stopped tasks regardless of options.
+         */
+        options |= WUNTRACED;
+        list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
+                int ret = wait_consider_task(tsk, 1, p, notask_error,
+                                             type, pid, options,
+                                             infop, stat_addr, ru);
+                if (ret)
+                        return ret;
+        }
+        return 0;
+}
 static long do_wait(enum pid_type type, struct pid *pid, int options,
                    struct siginfo __user *infop, int __user *stat_addr,
                    struct rusage __user *ru)
 {
        DECLARE_WAITQUEUE(wait, current);
        struct task_struct *tsk;
-        int flag, retval;
+        int retval;
        add_wait_queue(&current->signal->wait_chldexit,&wait);
 repeat:
-        /* If there is nothing that can match our critier just get out */
+        /*
+         * If there is nothing that can match our critiera just get out.
+         * We will clear @retval to zero if we see any child that might later
+         * match our criteria, even if we are not able to reap it yet.
+         */
        retval = -ECHILD;
        if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type])))
                goto end;
-        /*
-         * We will set this flag if we see any child that might later
-         * match our criteria, even if we are not able to reap it yet.
-         */
-        flag = retval = 0;
        current->state = TASK_INTERRUPTIBLE;
        read_lock(&tasklist_lock);
        tsk = current;
        do {
-                struct task_struct *p;
+                int tsk_result = do_wait_thread(tsk, &retval,
+                                                type, pid, options,
-                list_for_each_entry(p, &tsk->children, sibling) {
+                                                infop, stat_addr, ru);
-                        int ret = eligible_child(type, pid, options, p);
+                if (!tsk_result)
-                        if (!ret)
+                        tsk_result = ptrace_do_wait(tsk, &retval,
-                                continue;
+                                                    type, pid, options,
+                                                    infop, stat_addr, ru);
-                        if (unlikely(ret < 0)) {
+                if (tsk_result) {
-                                retval = ret;
+                        /*
-                        } else if (task_is_stopped_or_traced(p)) {
+                         * tasklist_lock is unlocked and we have a final result.
-                                /*
+                         */
-                                 * It's stopped now, so it might later
+                        retval = tsk_result;
-                                 * continue, exit, or stop again.
+                        goto end;
-                                 */
-                                flag = 1;
-                                if (!(p->ptrace & PT_PTRACED) &&
-                                    !(options & WUNTRACED))
-                                        continue;
-                                retval = wait_task_stopped(p,
-                                                (options & WNOWAIT), infop,
-                                                stat_addr, ru);
-                        } else if (p->exit_state == EXIT_ZOMBIE &&
-                                        !delay_group_leader(p)) {
-                                /*
-                                 * We don't reap group leaders with subthreads.
-                                 */
-                                if (!likely(options & WEXITED))
-                                        continue;
-                                retval = wait_task_zombie(p,
-                                                (options & WNOWAIT), infop,
-                                                stat_addr, ru);
-                        } else if (p->exit_state != EXIT_DEAD) {
-                                /*
-                                 * It's running now, so it might later
-                                 * exit, stop, or stop and then continue.
-                                 */
-                                flag = 1;
-                                if (!unlikely(options & WCONTINUED))
-                                        continue;
-                                retval = wait_task_continued(p,
-                                                (options & WNOWAIT), infop,
-                                                stat_addr, ru);
-                        }
-                        if (retval != 0) /* tasklist_lock released */
-                                goto end;
-                }
-                if (!flag) {
-                        list_for_each_entry(p, &tsk->ptrace_children,
-                                                                ptrace_list) {
-                                flag = eligible_child(type, pid, options, p);
-                                if (!flag)
-                                        continue;
-                                if (likely(flag > 0))
-                                        break;
-                                retval = flag;
-                                goto end;
-                        }
                }
                if (options & __WNOTHREAD)
                        break;
                tsk = next_thread(tsk);
@@ -1609,16 +1714,14 @@ repeat:
        } while (tsk != current);
        read_unlock(&tasklist_lock);
-        if (flag) {
+        if (!retval && !(options & WNOHANG)) {
-                if (options & WNOHANG)
-                        goto end;
                retval = -ERESTARTSYS;
-                if (signal_pending(current))
+                if (!signal_pending(current)) {
-                        goto end;
+                        schedule();
-                schedule();
+                        goto repeat;
-                goto repeat;
+                }
        }
-        retval = -ECHILD;
 end:
        current->state = TASK_RUNNING;
        remove_wait_queue(&current->signal->wait_chldexit,&wait);
diff --git a/kernel/fork.c b/kernel/fork.c
index 4bd2f516401f..adefc1131f27 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1125,8 +1125,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         */
        p->group_leader = p;
        INIT_LIST_HEAD(&p->thread_group);
-        INIT_LIST_HEAD(&p->ptrace_children);
+        INIT_LIST_HEAD(&p->ptrace_entry);
-        INIT_LIST_HEAD(&p->ptrace_list);
+        INIT_LIST_HEAD(&p->ptraced);
        /* Now that the task is set up, run cgroup callbacks if
         * necessary. We need to run them before the task is visible
@@ -1198,7 +1198,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        }
        if (likely(p->pid)) {
-                add_parent(p);
+                list_add_tail(&p->sibling, &p->real_parent->children);
                if (unlikely(p->ptrace & PT_PTRACED))
                        __ptrace_link(p, current->parent);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 2913a8bff612..b8e4dce80a74 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -622,7 +622,7 @@ static void retrigger_next_event(void *arg)
 void clock_was_set(void)
 {
        /* Retrigger the CPU local events everywhere */
-        on_each_cpu(retrigger_next_event, NULL, 0, 1);
+        on_each_cpu(retrigger_next_event, NULL, 1);
 }
 /*
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 77a51be36010..3cfc0fefb5ee 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -217,6 +217,17 @@ void enable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(enable_irq);
+int set_irq_wake_real(unsigned int irq, unsigned int on)
+{
+        struct irq_desc *desc = irq_desc + irq;
+        int ret = -ENXIO;
+        if (desc->chip->set_wake)
+                ret = desc->chip->set_wake(irq, on);
+        return ret;
+}
 /**
 *      set_irq_wake - control irq power management wakeup
 *      @irq:   interrupt to control
@@ -233,30 +244,34 @@ int set_irq_wake(unsigned int irq, unsigned int on)
 {
        struct irq_desc *desc = irq_desc + irq;
        unsigned long flags;
-        int ret = -ENXIO;
+        int ret = 0;
-        int (*set_wake)(unsigned, unsigned) = desc->chip->set_wake;
        /* wakeup-capable irqs can be shared between drivers that
         * don't need to have the same sleep mode behaviors.
         */
        spin_lock_irqsave(&desc->lock, flags);
        if (on) {
-                if (desc->wake_depth++ == 0)
+                if (desc->wake_depth++ == 0) {
-                        desc->status |= IRQ_WAKEUP;
+                        ret = set_irq_wake_real(irq, on);
-                else
+                        if (ret)
-                        set_wake = NULL;
+                                desc->wake_depth = 0;
+                        else
+                                desc->status |= IRQ_WAKEUP;
+                }
        } else {
                if (desc->wake_depth == 0) {
                        printk(KERN_WARNING "Unbalanced IRQ %d "
                                        "wake disable\n", irq);
                        WARN_ON(1);
-                } else if (--desc->wake_depth == 0)
+                } else if (--desc->wake_depth == 0) {
-                        desc->status &= ~IRQ_WAKEUP;
+                        ret = set_irq_wake_real(irq, on);
-                else
+                        if (ret)
-                        set_wake = NULL;
+                                desc->wake_depth = 1;
+                        else
+                                desc->status &= ~IRQ_WAKEUP;
+                }
        }
-        if (set_wake)
-                ret = desc->chip->set_wake(irq, on);
        spin_unlock_irqrestore(&desc->lock, flags);
        return ret;
 }
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 8df97d3dfda8..90d7af1c1655 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -42,7 +42,7 @@ extern int max_threads;
 static struct workqueue_struct *khelper_wq;
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 /*
        modprobe_path is set via /proc/sys.
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 97747cdd37c9..ac3fb7326641 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -235,7 +235,7 @@ int kthreadd(void *unused)
        set_user_nice(tsk, KTHREAD_NICE_LEVEL);
        set_cpus_allowed(tsk, CPU_MASK_ALL);
-        current->flags |= PF_NOFREEZE;
+        current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
        for (;;) {
                set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/module.c b/kernel/module.c
index 5f80478b746d..d8b5605132a0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -70,6 +70,9 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq);
 static BLOCKING_NOTIFIER_HEAD(module_notify_list);
+/* Bounds of module allocation, for speeding __module_text_address */
+static unsigned long module_addr_min = -1UL, module_addr_max = 0;
 int register_module_notifier(struct notifier_block * nb)
 {
        return blocking_notifier_chain_register(&module_notify_list, nb);
@@ -134,17 +137,19 @@ extern const struct kernel_symbol __start___ksymtab_gpl[];
 extern const struct kernel_symbol __stop___ksymtab_gpl[];
 extern const struct kernel_symbol __start___ksymtab_gpl_future[];
 extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
-extern const struct kernel_symbol __start___ksymtab_unused[];
-extern const struct kernel_symbol __stop___ksymtab_unused[];
-extern const struct kernel_symbol __start___ksymtab_unused_gpl[];
-extern const struct kernel_symbol __stop___ksymtab_unused_gpl[];
 extern const struct kernel_symbol __start___ksymtab_gpl_future[];
 extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
 extern const unsigned long __start___kcrctab[];
 extern const unsigned long __start___kcrctab_gpl[];
 extern const unsigned long __start___kcrctab_gpl_future[];
+#ifdef CONFIG_UNUSED_SYMBOLS
+extern const struct kernel_symbol __start___ksymtab_unused[];
+extern const struct kernel_symbol __stop___ksymtab_unused[];
+extern const struct kernel_symbol __start___ksymtab_unused_gpl[];
+extern const struct kernel_symbol __stop___ksymtab_unused_gpl[];
 extern const unsigned long __start___kcrctab_unused[];
 extern const unsigned long __start___kcrctab_unused_gpl[];
+#endif
 #ifndef CONFIG_MODVERSIONS
 #define symversion(base, idx) NULL
@@ -152,156 +157,186 @@ extern const unsigned long __start___kcrctab_unused_gpl[];
 #define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL)
 #endif
-/* lookup symbol in given range of kernel_symbols */
-static const struct kernel_symbol *lookup_symbol(const char *name,
-        const struct kernel_symbol *start,
-        const struct kernel_symbol *stop)
-{
-        const struct kernel_symbol *ks = start;
-        for (; ks < stop; ks++)
-                if (strcmp(ks->name, name) == 0)
-                        return ks;
-        return NULL;
-}
-static bool always_ok(bool gplok, bool warn, const char *name)
-{
-        return true;
-}
-static bool printk_unused_warning(bool gplok, bool warn, const char *name)
-{
-        if (warn) {
-                printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
-                       "however this module is using it.\n", name);
-                printk(KERN_WARNING
-                       "This symbol will go away in the future.\n");
-                printk(KERN_WARNING
-                       "Please evalute if this is the right api to use and if "
-                       "it really is, submit a report the linux kernel "
-                       "mailinglist together with submitting your code for "
-                       "inclusion.\n");
-        }
-        return true;
-}
-static bool gpl_only_unused_warning(bool gplok, bool warn, const char *name)
-{
-        if (!gplok)
-                return false;
-        return printk_unused_warning(gplok, warn, name);
-}
-static bool gpl_only(bool gplok, bool warn, const char *name)
-{
-        return gplok;
-}
-static bool warn_if_not_gpl(bool gplok, bool warn, const char *name)
-{
-        if (!gplok && warn) {
-                printk(KERN_WARNING "Symbol %s is being used "
-                       "by a non-GPL module, which will not "
-                       "be allowed in the future\n", name);
-                printk(KERN_WARNING "Please see the file "
-                       "Documentation/feature-removal-schedule.txt "
-                       "in the kernel source tree for more details.\n");
-        }
-        return true;
-}
 struct symsearch {
        const struct kernel_symbol *start, *stop;
        const unsigned long *crcs;
-        bool (*check)(bool gplok, bool warn, const char *name);
+        enum {
+                NOT_GPL_ONLY,
+                GPL_ONLY,
+                WILL_BE_GPL_ONLY,
+        } licence;
+        bool unused;
 };
-/* Look through this array of symbol tables for a symbol match which
+static bool each_symbol_in_section(const struct symsearch *arr,
- * passes the check function. */
+                                   unsigned int arrsize,
-static const struct kernel_symbol *search_symarrays(const struct symsearch *arr,
+                                   struct module *owner,
-                                                    unsigned int num,
+                                   bool (*fn)(const struct symsearch *syms,
-                                                    const char *name,
+                                              struct module *owner,
-                                                    bool gplok,
+                                              unsigned int symnum, void *data),
-                                                    bool warn,
+                                   void *data)
-                                                    const unsigned long **crc)
 {
-        unsigned int i;
+        unsigned int i, j;
-        const struct kernel_symbol *ks;
-        for (i = 0; i < num; i++) {
+        for (j = 0; j < arrsize; j++) {
-                ks = lookup_symbol(name, arr[i].start, arr[i].stop);
+                for (i = 0; i < arr[j].stop - arr[j].start; i++)
-                if (!ks || !arr[i].check(gplok, warn, name))
+                        if (fn(&arr[j], owner, i, data))
-                        continue;
+                                return true;
-                if (crc)
-                        *crc = symversion(arr[i].crcs, ks - arr[i].start);
-                return ks;
        }
-        return NULL;
+        return false;
 }
-/* Find a symbol, return value, (optional) crc and (optional) module
+/* Returns true as soon as fn returns true, otherwise false. */
- * which owns it */
+static bool each_symbol(bool (*fn)(const struct symsearch *arr,
-static unsigned long find_symbol(const char *name,
+                                   struct module *owner,
-                                 struct module **owner,
+                                   unsigned int symnum, void *data),
-                                 const unsigned long **crc,
+                        void *data)
-                                 bool gplok,
-                                 bool warn)
 {
        struct module *mod;
-        const struct kernel_symbol *ks;
        const struct symsearch arr[] = {
                { __start___ksymtab, __stop___ksymtab, __start___kcrctab,
-                  always_ok },
+                  NOT_GPL_ONLY, false },
                { __start___ksymtab_gpl, __stop___ksymtab_gpl,
-                  __start___kcrctab_gpl, gpl_only },
+                  __start___kcrctab_gpl,
+                  GPL_ONLY, false },
                { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
-                  __start___kcrctab_gpl_future, warn_if_not_gpl },
+                  __start___kcrctab_gpl_future,
+                  WILL_BE_GPL_ONLY, false },
+#ifdef CONFIG_UNUSED_SYMBOLS
                { __start___ksymtab_unused, __stop___ksymtab_unused,
-                  __start___kcrctab_unused, printk_unused_warning },
+                  __start___kcrctab_unused,
+                  NOT_GPL_ONLY, true },
                { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
-                  __start___kcrctab_unused_gpl, gpl_only_unused_warning },
+                  __start___kcrctab_unused_gpl,
+                  GPL_ONLY, true },
+#endif
        };
-        /* Core kernel first. */
+        if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
-        ks = search_symarrays(arr, ARRAY_SIZE(arr), name, gplok, warn, crc);
+                return true;
-        if (ks) {
-                if (owner)
-                        *owner = NULL;
-                return ks->value;
-        }
-        /* Now try modules. */
        list_for_each_entry(mod, &modules, list) {
                struct symsearch arr[] = {
                        { mod->syms, mod->syms + mod->num_syms, mod->crcs,
-                          always_ok },
+                          NOT_GPL_ONLY, false },
                        { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
-                          mod->gpl_crcs, gpl_only },
+                          mod->gpl_crcs,
+                          GPL_ONLY, false },
                        { mod->gpl_future_syms,
                          mod->gpl_future_syms + mod->num_gpl_future_syms,
-                          mod->gpl_future_crcs, warn_if_not_gpl },
+                          mod->gpl_future_crcs,
+                          WILL_BE_GPL_ONLY, false },
+#ifdef CONFIG_UNUSED_SYMBOLS
                        { mod->unused_syms,
                          mod->unused_syms + mod->num_unused_syms,
-                          mod->unused_crcs, printk_unused_warning },
+                          mod->unused_crcs,
+                          NOT_GPL_ONLY, true },
                        { mod->unused_gpl_syms,
                          mod->unused_gpl_syms + mod->num_unused_gpl_syms,
-                          mod->unused_gpl_crcs, gpl_only_unused_warning },
+                          mod->unused_gpl_crcs,
+                          GPL_ONLY, true },
+#endif
                };
-                ks = search_symarrays(arr, ARRAY_SIZE(arr),
+                if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
-                                      name, gplok, warn, crc);
+                        return true;
-                if (ks) {
+        }
-                        if (owner)
+        return false;
-                                *owner = mod;
+}
-                        return ks->value;
+struct find_symbol_arg {
+        /* Input */
+        const char *name;
+        bool gplok;
+        bool warn;
+        /* Output */
+        struct module *owner;
+        const unsigned long *crc;
+        unsigned long value;
+};
+static bool find_symbol_in_section(const struct symsearch *syms,
+                                   struct module *owner,
+                                   unsigned int symnum, void *data)
+{
+        struct find_symbol_arg *fsa = data;
+        if (strcmp(syms->start[symnum].name, fsa->name) != 0)
+                return false;
+        if (!fsa->gplok) {
+                if (syms->licence == GPL_ONLY)
+                        return false;
+                if (syms->licence == WILL_BE_GPL_ONLY && fsa->warn) {
+                        printk(KERN_WARNING "Symbol %s is being used "
+                               "by a non-GPL module, which will not "
+                               "be allowed in the future\n", fsa->name);
+                        printk(KERN_WARNING "Please see the file "
+                               "Documentation/feature-removal-schedule.txt "
+                               "in the kernel source tree for more details.\n");
                }
        }
+#ifdef CONFIG_UNUSED_SYMBOLS
+        if (syms->unused && fsa->warn) {
+                printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
+                       "however this module is using it.\n", fsa->name);
+                printk(KERN_WARNING
+                       "This symbol will go away in the future.\n");
+                printk(KERN_WARNING
+                       "Please evalute if this is the right api to use and if "
+                       "it really is, submit a report the linux kernel "
+                       "mailinglist together with submitting your code for "
+                       "inclusion.\n");
+        }
+#endif
+        fsa->owner = owner;
+        fsa->crc = symversion(syms->crcs, symnum);
+        fsa->value = syms->start[symnum].value;
+        return true;
+}
+/* Find a symbol, return value, (optional) crc and (optional) module
+ * which owns it */
+static unsigned long find_symbol(const char *name,
+                                 struct module **owner,
+                                 const unsigned long **crc,
+                                 bool gplok,
+                                 bool warn)
+{
+        struct find_symbol_arg fsa;
+        fsa.name = name;
+        fsa.gplok = gplok;
+        fsa.warn = warn;
+        if (each_symbol(find_symbol_in_section, &fsa)) {
+                if (owner)
+                        *owner = fsa.owner;
+                if (crc)
+                        *crc = fsa.crc;
+                return fsa.value;
+        }
        DEBUGP("Failed to find symbol %s\n", name);
        return -ENOENT;
 }
+/* lookup symbol in given range of kernel_symbols */
+static const struct kernel_symbol *lookup_symbol(const char *name,
+        const struct kernel_symbol *start,
+        const struct kernel_symbol *stop)
+{
+        const struct kernel_symbol *ks = start;
+        for (; ks < stop; ks++)
+                if (strcmp(ks->name, name) == 0)
+                        return ks;
+        return NULL;
+}
 /* Search for module by name: must hold module_mutex. */
 static struct module *find_module(const char *name)
 {
@@ -639,8 +674,8 @@ static int __try_stop_module(void *_sref)
 {
        struct stopref *sref = _sref;
-        /* If it's not unused, quit unless we are told to block. */
+        /* If it's not unused, quit unless we're forcing. */
-        if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) {
+        if (module_refcount(sref->mod) != 0) {
                if (!(*sref->forced = try_force_unload(sref->flags)))
                        return -EWOULDBLOCK;
        }
@@ -652,9 +687,16 @@ static int __try_stop_module(void *_sref)
 static int try_stop_module(struct module *mod, int flags, int *forced)
 {
-        struct stopref sref = { mod, flags, forced };
+        if (flags & O_NONBLOCK) {
+                struct stopref sref = { mod, flags, forced };
-        return stop_machine_run(__try_stop_module, &sref, NR_CPUS);
+                return stop_machine_run(__try_stop_module, &sref, NR_CPUS);
+        } else {
+                /* We don't need to stop the machine for this. */
+                mod->state = MODULE_STATE_GOING;
+                synchronize_sched();
+                return 0;
+        }
 }
 unsigned int module_refcount(struct module *mod)
@@ -1445,8 +1487,10 @@ static int verify_export_symbols(struct module *mod)
                { mod->syms, mod->num_syms },
                { mod->gpl_syms, mod->num_gpl_syms },
                { mod->gpl_future_syms, mod->num_gpl_future_syms },
+#ifdef CONFIG_UNUSED_SYMBOLS
                { mod->unused_syms, mod->num_unused_syms },
                { mod->unused_gpl_syms, mod->num_unused_gpl_syms },
+#endif
        };
        for (i = 0; i < ARRAY_SIZE(arr); i++) {
@@ -1526,7 +1570,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
 }
 /* Update size with this section: return offset. */
-static long get_offset(unsigned long *size, Elf_Shdr *sechdr)
+static long get_offset(unsigned int *size, Elf_Shdr *sechdr)
 {
        long ret;
@@ -1738,6 +1782,20 @@ static inline void add_kallsyms(struct module *mod,
 }
 #endif /* CONFIG_KALLSYMS */
+static void *module_alloc_update_bounds(unsigned long size)
+{
+        void *ret = module_alloc(size);
+        if (ret) {
+                /* Update module bounds. */
+                if ((unsigned long)ret < module_addr_min)
+                        module_addr_min = (unsigned long)ret;
+                if ((unsigned long)ret + size > module_addr_max)
+                        module_addr_max = (unsigned long)ret + size;
+        }
+        return ret;
+}
 /* Allocate and load the module: note that size of section 0 is always
   zero, and we rely on this for optional sections. */
 static struct module *load_module(void __user *umod,
@@ -1764,10 +1822,12 @@ static struct module *load_module(void __user *umod,
        unsigned int gplfutureindex;
        unsigned int gplfuturecrcindex;
        unsigned int unwindex = 0;
+#ifdef CONFIG_UNUSED_SYMBOLS
        unsigned int unusedindex;
        unsigned int unusedcrcindex;
        unsigned int unusedgplindex;
        unsigned int unusedgplcrcindex;
+#endif
        unsigned int markersindex;
        unsigned int markersstringsindex;
        struct module *mod;
@@ -1850,13 +1910,15 @@ static struct module *load_module(void __user *umod,
        exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab");
        gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl");
        gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future");
-        unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
-        unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
        crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab");
        gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl");
        gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future");
+#ifdef CONFIG_UNUSED_SYMBOLS
+        unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
+        unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
        unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused");
        unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl");
+#endif
        setupindex = find_sec(hdr, sechdrs, secstrings, "__param");
        exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table");
        obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm");
@@ -1935,7 +1997,7 @@ static struct module *load_module(void __user *umod,
        layout_sections(mod, hdr, sechdrs, secstrings);
        /* Do the allocs. */
-        ptr = module_alloc(mod->core_size);
+        ptr = module_alloc_update_bounds(mod->core_size);
        if (!ptr) {
                err = -ENOMEM;
                goto free_percpu;
@@ -1943,7 +2005,7 @@ static struct module *load_module(void __user *umod,
        memset(ptr, 0, mod->core_size);
        mod->module_core = ptr;
-        ptr = module_alloc(mod->init_size);
+        ptr = module_alloc_update_bounds(mod->init_size);
        if (!ptr && mod->init_size) {
                err = -ENOMEM;
                goto free_core;
@@ -2018,14 +2080,15 @@ static struct module *load_module(void __user *umod,
                mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr;
        mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size /
                                        sizeof(*mod->gpl_future_syms);
-        mod->num_unused_syms = sechdrs[unusedindex].sh_size /
-                                        sizeof(*mod->unused_syms);
-        mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size /
-                                        sizeof(*mod->unused_gpl_syms);
        mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
        if (gplfuturecrcindex)
                mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
+#ifdef CONFIG_UNUSED_SYMBOLS
+        mod->num_unused_syms = sechdrs[unusedindex].sh_size /
+                                        sizeof(*mod->unused_syms);
+        mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size /
+                                        sizeof(*mod->unused_gpl_syms);
        mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
        if (unusedcrcindex)
                mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr;
@@ -2033,13 +2096,17 @@ static struct module *load_module(void __user *umod,
        if (unusedgplcrcindex)
                mod->unused_gpl_crcs
                        = (void *)sechdrs[unusedgplcrcindex].sh_addr;
+#endif
 #ifdef CONFIG_MODVERSIONS
-        if ((mod->num_syms && !crcindex) ||
+        if ((mod->num_syms && !crcindex)
-            (mod->num_gpl_syms && !gplcrcindex) ||
+            || (mod->num_gpl_syms && !gplcrcindex)
-            (mod->num_gpl_future_syms && !gplfuturecrcindex) ||
+            || (mod->num_gpl_future_syms && !gplfuturecrcindex)
-            (mod->num_unused_syms && !unusedcrcindex) ||
+#ifdef CONFIG_UNUSED_SYMBOLS
-            (mod->num_unused_gpl_syms && !unusedgplcrcindex)) {
+            || (mod->num_unused_syms && !unusedcrcindex)
+            || (mod->num_unused_gpl_syms && !unusedgplcrcindex)
+#endif
+                ) {
                printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name);
                err = try_to_force_load(mod, "nocrc");
                if (err)
@@ -2512,7 +2579,7 @@ static int m_show(struct seq_file *m, void *p)
        struct module *mod = list_entry(p, struct module, list);
        char buf[8];
-        seq_printf(m, "%s %lu",
+        seq_printf(m, "%s %u",
                   mod->name, mod->init_size + mod->core_size);
        print_unload_info(m, mod);
@@ -2595,6 +2662,9 @@ struct module *__module_text_address(unsigned long addr)
 {
        struct module *mod;
+        if (addr < module_addr_min || addr > module_addr_max)
+                return NULL;
        list_for_each_entry(mod, &modules, list)
                if (within(addr, mod->module_init, mod->init_text_size)
                    || within(addr, mod->module_core, mod->core_text_size))
diff --git a/kernel/pid.c b/kernel/pid.c
index 20d59fa2d493..30bd5d4b2ac7 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/init.h>
+#include <linux/rculist.h>
 #include <linux/bootmem.h>
 #include <linux/hash.h>
 #include <linux/pid_namespace.h>
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index b45da40e8d25..59dfdf1e1d20 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -82,7 +82,7 @@ config PM_SLEEP_SMP
 config PM_SLEEP
        bool
-        depends on SUSPEND || HIBERNATION
+        depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
        default y
 config SUSPEND
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 14a656cdc652..f011e0870b52 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -180,6 +180,17 @@ static void platform_restore_cleanup(int platform_mode)
 }
 /**
+ *      platform_recover - recover the platform from a failure to suspend
+ *      devices.
+ */
+static void platform_recover(int platform_mode)
+{
+        if (platform_mode && hibernation_ops && hibernation_ops->recover)
+                hibernation_ops->recover();
+}
+/**
 *      create_image - freeze devices that need to be frozen with interrupts
 *      off, create the hibernation image and thaw those devices.  Control
 *      reappears in this routine after a restore.
@@ -193,6 +204,7 @@ static int create_image(int platform_mode)
        if (error)
                return error;
+        device_pm_lock();
        local_irq_disable();
        /* At this point, device_suspend() has been called, but *not*
         * device_power_down(). We *must* call device_power_down() now.
@@ -224,9 +236,11 @@ static int create_image(int platform_mode)
        /* NOTE:  device_power_up() is just a resume() for devices
         * that suspended with irqs off ... no overall powerup.
         */
-        device_power_up();
+        device_power_up(in_suspend ?
+                (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
 Enable_irqs:
        local_irq_enable();
+        device_pm_unlock();
        return error;
 }
@@ -255,10 +269,10 @@ int hibernation_snapshot(int platform_mode)
        suspend_console();
        error = device_suspend(PMSG_FREEZE);
        if (error)
-                goto Resume_console;
+                goto Recover_platform;
        if (hibernation_test(TEST_DEVICES))
-                goto Resume_devices;
+                goto Recover_platform;
        error = platform_pre_snapshot(platform_mode);
        if (error || hibernation_test(TEST_PLATFORM))
@@ -280,12 +294,16 @@ int hibernation_snapshot(int platform_mode)
 Finish:
        platform_finish(platform_mode);
 Resume_devices:
-        device_resume();
+        device_resume(in_suspend ?
- Resume_console:
+                (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
        resume_console();
 Close:
        platform_end(platform_mode);
        return error;
+ Recover_platform:
+        platform_recover(platform_mode);
+        goto Resume_devices;
 }
 /**
@@ -300,8 +318,9 @@ static int resume_target_kernel(void)
 {
        int error;
+        device_pm_lock();
        local_irq_disable();
-        error = device_power_down(PMSG_PRETHAW);
+        error = device_power_down(PMSG_QUIESCE);
        if (error) {
                printk(KERN_ERR "PM: Some devices failed to power down, "
                        "aborting resume\n");
@@ -329,9 +348,10 @@ static int resume_target_kernel(void)
        swsusp_free();
        restore_processor_state();
        touch_softlockup_watchdog();
-        device_power_up();
+        device_power_up(PMSG_RECOVER);
 Enable_irqs:
        local_irq_enable();
+        device_pm_unlock();
        return error;
 }
@@ -350,7 +370,7 @@ int hibernation_restore(int platform_mode)
        pm_prepare_console();
        suspend_console();
-        error = device_suspend(PMSG_PRETHAW);
+        error = device_suspend(PMSG_QUIESCE);
        if (error)
                goto Finish;
@@ -362,7 +382,7 @@ int hibernation_restore(int platform_mode)
                enable_nonboot_cpus();
        }
        platform_restore_cleanup(platform_mode);
-        device_resume();
+        device_resume(PMSG_RECOVER);
 Finish:
        resume_console();
        pm_restore_console();
@@ -392,8 +412,11 @@ int hibernation_platform_enter(void)
        suspend_console();
        error = device_suspend(PMSG_HIBERNATE);
-        if (error)
+        if (error) {
-                goto Resume_console;
+                if (hibernation_ops->recover)
+                        hibernation_ops->recover();
+                goto Resume_devices;
+        }
        error = hibernation_ops->prepare();
        if (error)
@@ -403,6 +426,7 @@ int hibernation_platform_enter(void)
        if (error)
                goto Finish;
+        device_pm_lock();
        local_irq_disable();
        error = device_power_down(PMSG_HIBERNATE);
        if (!error) {
@@ -411,6 +435,7 @@ int hibernation_platform_enter(void)
                while (1);
        }
        local_irq_enable();
+        device_pm_unlock();
        /*
         * We don't need to reenable the nonboot CPUs or resume consoles, since
@@ -419,8 +444,7 @@ int hibernation_platform_enter(void)
 Finish:
        hibernation_ops->finish();
 Resume_devices:
-        device_resume();
+        device_resume(PMSG_RESTORE);
- Resume_console:
        resume_console();
 Close:
        hibernation_ops->end();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6a6d5eb3524e..3398f4651aa1 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -228,6 +228,7 @@ static int suspend_enter(suspend_state_t state)
 {
        int error = 0;
+        device_pm_lock();
        arch_suspend_disable_irqs();
        BUG_ON(!irqs_disabled());
@@ -239,10 +240,11 @@ static int suspend_enter(suspend_state_t state)
        if (!suspend_test(TEST_CORE))
                error = suspend_ops->enter(state);
-        device_power_up();
+        device_power_up(PMSG_RESUME);
 Done:
        arch_suspend_enable_irqs();
        BUG_ON(irqs_disabled());
+        device_pm_unlock();
        return error;
 }
@@ -267,11 +269,11 @@ int suspend_devices_and_enter(suspend_state_t state)
        error = device_suspend(PMSG_SUSPEND);
        if (error) {
                printk(KERN_ERR "PM: Some devices failed to suspend\n");
-                goto Resume_console;
+                goto Recover_platform;
        }
        if (suspend_test(TEST_DEVICES))
-                goto Resume_devices;
+                goto Recover_platform;
        if (suspend_ops->prepare) {
                error = suspend_ops->prepare();
@@ -291,13 +293,17 @@ int suspend_devices_and_enter(suspend_state_t state)
        if (suspend_ops->finish)
                suspend_ops->finish();
 Resume_devices:
-        device_resume();
+        device_resume(PMSG_RESUME);
- Resume_console:
        resume_console();
 Close:
        if (suspend_ops->end)
                suspend_ops->end();
        return error;
+ Recover_platform:
+        if (suspend_ops->recover)
+                suspend_ops->recover();
+        goto Resume_devices;
 }
 /**
diff --git a/kernel/power/process.c b/kernel/power/process.c
index f1d0b345c9ba..5fb87652f214 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -19,9 +19,6 @@
 */
 #define TIMEOUT (20 * HZ)
-#define FREEZER_KERNEL_THREADS 0
-#define FREEZER_USER_SPACE 1
 static inline int freezeable(struct task_struct * p)
 {
        if ((p == current) ||
@@ -84,63 +81,53 @@ static void fake_signal_wake_up(struct task_struct *p)
        spin_unlock_irqrestore(&p->sighand->siglock, flags);
 }
-static int has_mm(struct task_struct *p)
+static inline bool should_send_signal(struct task_struct *p)
 {
-        return (p->mm && !(p->flags & PF_BORROWED_MM));
+        return !(p->flags & PF_FREEZER_NOSIG);
 }
 /**
 *      freeze_task - send a freeze request to given task
 *      @p: task to send the request to
- *      @with_mm_only: if set, the request will only be sent if the task has its
+ *      @sig_only: if set, the request will only be sent if the task has the
- *              own mm
+ *              PF_FREEZER_NOSIG flag unset
- *      Return value: 0, if @with_mm_only is set and the task has no mm of its
+ *      Return value: 'false', if @sig_only is set and the task has
- *              own or the task is frozen, 1, otherwise
+ *              PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
 *
- *      The freeze request is sent by seting the tasks's TIF_FREEZE flag and
+ *      The freeze request is sent by setting the tasks's TIF_FREEZE flag and
 *      either sending a fake signal to it or waking it up, depending on whether
- *      or not it has its own mm (ie. it is a user land task).  If @with_mm_only
+ *      or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
- *      is set and the task has no mm of its own (ie. it is a kernel thread),
+ *      has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
- *      its TIF_FREEZE flag should not be set.
+ *      TIF_FREEZE flag will not be set.
- *
- *      The task_lock() is necessary to prevent races with exit_mm() or
- *      use_mm()/unuse_mm() from occuring.
 */
-static int freeze_task(struct task_struct *p, int with_mm_only)
+static bool freeze_task(struct task_struct *p, bool sig_only)
 {
-        int ret = 1;
+        /*
+         * We first check if the task is freezing and next if it has already
+         * been frozen to avoid the race with frozen_process() which first marks
+         * the task as frozen and next clears its TIF_FREEZE.
+         */
+        if (!freezing(p)) {
+                rmb();
+                if (frozen(p))
+                        return false;
-        task_lock(p);
+                if (!sig_only || should_send_signal(p))
-        if (freezing(p)) {
+                        set_freeze_flag(p);
-                if (has_mm(p)) {
+                else
-                        if (!signal_pending(p))
+                        return false;
-                                fake_signal_wake_up(p);
+        }
-                } else {
-                        if (with_mm_only)
+        if (should_send_signal(p)) {
-                                ret = 0;
+                if (!signal_pending(p))
-                        else
+                        fake_signal_wake_up(p);
-                                wake_up_state(p, TASK_INTERRUPTIBLE);
+        } else if (sig_only) {
-                }
+                return false;
        } else {
-                rmb();
+                wake_up_state(p, TASK_INTERRUPTIBLE);
-                if (frozen(p)) {
-                        ret = 0;
-                } else {
-                        if (has_mm(p)) {
-                                set_freeze_flag(p);
-                                fake_signal_wake_up(p);
-                        } else {
-                                if (with_mm_only) {
-                                        ret = 0;
-                                } else {
-                                        set_freeze_flag(p);
-                                        wake_up_state(p, TASK_INTERRUPTIBLE);
-                                }
-                        }
-                }
        }
-        task_unlock(p);
-        return ret;
+        return true;
 }
 static void cancel_freezing(struct task_struct *p)
@@ -156,7 +143,7 @@ static void cancel_freezing(struct task_struct *p)
        }
 }
-static int try_to_freeze_tasks(int freeze_user_space)
+static int try_to_freeze_tasks(bool sig_only)
 {
        struct task_struct *g, *p;
        unsigned long end_time;
@@ -175,7 +162,7 @@ static int try_to_freeze_tasks(int freeze_user_space)
                        if (frozen(p) || !freezeable(p))
                                continue;
-                        if (!freeze_task(p, freeze_user_space))
+                        if (!freeze_task(p, sig_only))
                                continue;
                        /*
@@ -235,13 +222,13 @@ int freeze_processes(void)
        int error;
        printk("Freezing user space processes ... ");
-        error = try_to_freeze_tasks(FREEZER_USER_SPACE);
+        error = try_to_freeze_tasks(true);
        if (error)
                goto Exit;
        printk("done.\n");
        printk("Freezing remaining freezable tasks ... ");
-        error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS);
+        error = try_to_freeze_tasks(false);
        if (error)
                goto Exit;
        printk("done.");
@@ -251,7 +238,7 @@ int freeze_processes(void)
        return error;
 }
-static void thaw_tasks(int thaw_user_space)
+static void thaw_tasks(bool nosig_only)
 {
        struct task_struct *g, *p;
@@ -260,7 +247,7 @@ static void thaw_tasks(int thaw_user_space)
                if (!freezeable(p))
                        continue;
-                if (!p->mm == thaw_user_space)
+                if (nosig_only && should_send_signal(p))
                        continue;
                thaw_process(p);
@@ -271,8 +258,8 @@ static void thaw_tasks(int thaw_user_space)
 void thaw_processes(void)
 {
        printk("Restarting tasks ... ");
-        thaw_tasks(FREEZER_KERNEL_THREADS);
+        thaw_tasks(true);
-        thaw_tasks(FREEZER_USER_SPACE);
+        thaw_tasks(false);
        schedule();
        printk("done.\n");
 }
diff --git a/kernel/power/user.c b/kernel/power/user.c
index f5512cb3aa86..a6332a313262 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -23,6 +23,7 @@
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/freezer.h>
+#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
@@ -69,16 +70,22 @@ static int snapshot_open(struct inode *inode, struct file *filp)
        struct snapshot_data *data;
        int error;
-        if (!atomic_add_unless(&snapshot_device_available, -1, 0))
+        mutex_lock(&pm_mutex);
-                return -EBUSY;
+        if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+                error = -EBUSY;
+                goto Unlock;
+        }
        if ((filp->f_flags & O_ACCMODE) == O_RDWR) {
                atomic_inc(&snapshot_device_available);
-                return -ENOSYS;
+                error = -ENOSYS;
+                goto Unlock;
        }
        if(create_basic_memory_bitmaps()) {
                atomic_inc(&snapshot_device_available);
-                return -ENOMEM;
+                error = -ENOMEM;
+                goto Unlock;
        }
        nonseekable_open(inode, filp);
        data = &snapshot_state;
@@ -98,33 +105,36 @@ static int snapshot_open(struct inode *inode, struct file *filp)
                if (error)
                        pm_notifier_call_chain(PM_POST_HIBERNATION);
        }
-        if (error) {
+        if (error)
                atomic_inc(&snapshot_device_available);
-                return error;
-        }
        data->frozen = 0;
        data->ready = 0;
        data->platform_support = 0;
-        return 0;
+ Unlock:
+        mutex_unlock(&pm_mutex);
+        return error;
 }
 static int snapshot_release(struct inode *inode, struct file *filp)
 {
        struct snapshot_data *data;
+        mutex_lock(&pm_mutex);
        swsusp_free();
        free_basic_memory_bitmaps();
        data = filp->private_data;
        free_all_swap_pages(data->swap);
-        if (data->frozen) {
+        if (data->frozen)
-                mutex_lock(&pm_mutex);
                thaw_processes();
-                mutex_unlock(&pm_mutex);
-        }
        pm_notifier_call_chain(data->mode == O_WRONLY ?
                        PM_POST_HIBERNATION : PM_POST_RESTORE);
        atomic_inc(&snapshot_device_available);
+        mutex_unlock(&pm_mutex);
        return 0;
 }
@@ -134,9 +144,13 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
        struct snapshot_data *data;
        ssize_t res;
+        mutex_lock(&pm_mutex);
        data = filp->private_data;
-        if (!data->ready)
+        if (!data->ready) {
-                return -ENODATA;
+                res = -ENODATA;
+                goto Unlock;
+        }
        res = snapshot_read_next(&data->handle, count);
        if (res > 0) {
                if (copy_to_user(buf, data_of(data->handle), res))
@@ -144,6 +158,10 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
                else
                        *offp = data->handle.offset;
        }
+ Unlock:
+        mutex_unlock(&pm_mutex);
        return res;
 }
@@ -153,6 +171,8 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
        struct snapshot_data *data;
        ssize_t res;
+        mutex_lock(&pm_mutex);
        data = filp->private_data;
        res = snapshot_write_next(&data->handle, count);
        if (res > 0) {
@@ -161,11 +181,14 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
                else
                        *offp = data->handle.offset;
        }
+        mutex_unlock(&pm_mutex);
        return res;
 }
-static int snapshot_ioctl(struct inode *inode, struct file *filp,
+static long snapshot_ioctl(struct file *filp, unsigned int cmd,
-                          unsigned int cmd, unsigned long arg)
+                                                        unsigned long arg)
 {
        int error = 0;
        struct snapshot_data *data;
@@ -179,6 +202,9 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
+        if (!mutex_trylock(&pm_mutex))
+                return -EBUSY;
        data = filp->private_data;
        switch (cmd) {
@@ -186,7 +212,6 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
        case SNAPSHOT_FREEZE:
                if (data->frozen)
                        break;
-                mutex_lock(&pm_mutex);
                printk("Syncing filesystems ... ");
                sys_sync();
                printk("done.\n");
@@ -194,7 +219,6 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
                error = freeze_processes();
                if (error)
                        thaw_processes();
-                mutex_unlock(&pm_mutex);
                if (!error)
                        data->frozen = 1;
                break;
@@ -202,9 +226,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
        case SNAPSHOT_UNFREEZE:
                if (!data->frozen || data->ready)
                        break;
-                mutex_lock(&pm_mutex);
                thaw_processes();
-                mutex_unlock(&pm_mutex);
                data->frozen = 0;
                break;
@@ -307,16 +329,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
                        error = -EPERM;
                        break;
                }
-                if (!mutex_trylock(&pm_mutex)) {
-                        error = -EBUSY;
-                        break;
-                }
                /*
                 * Tasks are frozen and the notifiers have been called with
                 * PM_HIBERNATION_PREPARE
                 */
                error = suspend_devices_and_enter(PM_SUSPEND_MEM);
-                mutex_unlock(&pm_mutex);
                break;
        case SNAPSHOT_PLATFORM_SUPPORT:
@@ -390,6 +407,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
        }
+        mutex_unlock(&pm_mutex);
        return error;
 }
@@ -399,7 +418,7 @@ static const struct file_operations snapshot_fops = {
        .read = snapshot_read,
        .write = snapshot_write,
        .llseek = no_llseek,
-        .ioctl = snapshot_ioctl,
+        .unlocked_ioctl = snapshot_ioctl,
 };
 static struct miscdevice snapshot_device = {
diff --git a/kernel/profile.c b/kernel/profile.c
index ae7ead82cbc9..58926411eb2a 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -252,7 +252,7 @@ static void profile_flip_buffers(void)
        mutex_lock(&profile_flip_mutex);
        j = per_cpu(cpu_profile_flip, get_cpu());
        put_cpu();
-        on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
+        on_each_cpu(__profile_flip_buffers, NULL, 1);
        for_each_online_cpu(cpu) {
                struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
                for (i = 0; i < NR_PROFILE_HIT; ++i) {
@@ -275,7 +275,7 @@ static void profile_discard_flip_buffers(void)
        mutex_lock(&profile_flip_mutex);
        i = per_cpu(cpu_profile_flip, get_cpu());
        put_cpu();
-        on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
+        on_each_cpu(__profile_flip_buffers, NULL, 1);
        for_each_online_cpu(cpu) {
                struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
                memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
@@ -558,7 +558,7 @@ static int __init create_hash_tables(void)
 out_cleanup:
        prof_on = 0;
        smp_mb();
-        on_each_cpu(profile_nop, NULL, 0, 1);
+        on_each_cpu(profile_nop, NULL, 1);
        for_each_online_cpu(cpu) {
                struct page *page;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e337390fce01..8392a9da6450 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -33,13 +33,9 @@
 */
 void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
 {
-        BUG_ON(!list_empty(&child->ptrace_list));
+        BUG_ON(!list_empty(&child->ptrace_entry));
-        if (child->parent == new_parent)
+        list_add(&child->ptrace_entry, &new_parent->ptraced);
-                return;
-        list_add(&child->ptrace_list, &child->parent->ptrace_children);
-        remove_parent(child);
        child->parent = new_parent;
-        add_parent(child);
 }
 
 /*
@@ -73,12 +69,8 @@ void __ptrace_unlink(struct task_struct *child)
        BUG_ON(!child->ptrace);
        child->ptrace = 0;
-        if (ptrace_reparented(child)) {
+        child->parent = child->real_parent;
-                list_del_init(&child->ptrace_list);
+        list_del_init(&child->ptrace_entry);
-                remove_parent(child);
-                child->parent = child->real_parent;
-                add_parent(child);
-        }
        if (task_is_traced(child))
                ptrace_untrace(child);
@@ -492,15 +484,34 @@ int ptrace_traceme(void)
        /*
         * Are we already being traced?
         */
+repeat:
        task_lock(current);
        if (!(current->ptrace & PT_PTRACED)) {
+                /*
+                 * See ptrace_attach() comments about the locking here.
+                 */
+                unsigned long flags;
+                if (!write_trylock_irqsave(&tasklist_lock, flags)) {
+                        task_unlock(current);
+                        do {
+                                cpu_relax();
+                        } while (!write_can_lock(&tasklist_lock));
+                        goto repeat;
+                }
                ret = security_ptrace(current->parent, current,
                                      PTRACE_MODE_ATTACH);
                /*
                 * Set the ptrace bit in the process ptrace flags.
+                 * Then link us on our parent's ptraced list.
                 */
-                if (!ret)
+                if (!ret) {
                        current->ptrace |= PT_PTRACED;
+                        __ptrace_link(current, current->real_parent);
+                }
+                write_unlock_irqrestore(&tasklist_lock, flags);
        }
        task_unlock(current);
        return ret;
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 65c0906080ef..16eeeaa9d618 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -387,6 +387,10 @@ static void __rcu_offline_cpu(struct rcu_data *this_rdp,
        rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
        rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
        rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
+        local_irq_disable();
+        this_rdp->qlen += rdp->qlen;
+        local_irq_enable();
 }
 static void rcu_offline_cpu(int cpu)
@@ -516,10 +520,38 @@ void rcu_check_callbacks(int cpu, int user)
        if (user ||
            (idle_cpu(cpu) && !in_softirq() &&
                                hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+                /*
+                 * Get here if this CPU took its interrupt from user
+                 * mode or from the idle loop, and if this is not a
+                 * nested interrupt.  In this case, the CPU is in
+                 * a quiescent state, so count it.
+                 *
+                 * Also do a memory barrier.  This is needed to handle
+                 * the case where writes from a preempt-disable section
+                 * of code get reordered into schedule() by this CPU's
+                 * write buffer.  The memory barrier makes sure that
+                 * the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see
+                 * by other CPUs to happen after any such write.
+                 */
+                smp_mb();  /* See above block comment. */
                rcu_qsctr_inc(cpu);
                rcu_bh_qsctr_inc(cpu);
-        } else if (!in_softirq())
+        } else if (!in_softirq()) {
+                /*
+                 * Get here if this CPU did not take its interrupt from
+                 * softirq, in other words, if it is not interrupting
+                 * a rcu_bh read-side critical section.  This is an _bh
+                 * critical section, so count it.  The memory barrier
+                 * is needed for the same reason as is the above one.
+                 */
+                smp_mb();  /* See above block comment. */
                rcu_bh_qsctr_inc(cpu);
+        }
        raise_rcu_softirq();
 }
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c09605f8d16c..f14f372cf6f5 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -39,16 +39,16 @@
 #include <linux/sched.h>
 #include <asm/atomic.h>
 #include <linux/bitops.h>
-#include <linux/completion.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
-struct rcu_synchronize {
+enum rcu_barrier {
-        struct rcu_head head;
+        RCU_BARRIER_STD,
-        struct completion completion;
+        RCU_BARRIER_BH,
+        RCU_BARRIER_SCHED,
 };
 static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
@@ -60,7 +60,7 @@ static struct completion rcu_barrier_completion;
 * Awaken the corresponding synchronize_rcu() instance now that a
 * grace period has elapsed.
 */
-static void wakeme_after_rcu(struct rcu_head  *head)
+void wakeme_after_rcu(struct rcu_head  *head)
 {
        struct rcu_synchronize *rcu;
@@ -77,17 +77,7 @@ static void wakeme_after_rcu(struct rcu_head  *head)
 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
 * and may be nested.
 */
-void synchronize_rcu(void)
+synchronize_rcu_xxx(synchronize_rcu, call_rcu)
-{
-        struct rcu_synchronize rcu;
-        init_completion(&rcu.completion);
-        /* Will wake me after RCU finished */
-        call_rcu(&rcu.head, wakeme_after_rcu);
-        /* Wait for it */
-        wait_for_completion(&rcu.completion);
-}
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 static void rcu_barrier_callback(struct rcu_head *notused)
@@ -99,19 +89,30 @@ static void rcu_barrier_callback(struct rcu_head *notused)
 /*
 * Called with preemption disabled, and from cross-cpu IRQ context.
 */
-static void rcu_barrier_func(void *notused)
+static void rcu_barrier_func(void *type)
 {
        int cpu = smp_processor_id();
        struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
        atomic_inc(&rcu_barrier_cpu_count);
-        call_rcu(head, rcu_barrier_callback);
+        switch ((enum rcu_barrier)type) {
+        case RCU_BARRIER_STD:
+                call_rcu(head, rcu_barrier_callback);
+                break;
+        case RCU_BARRIER_BH:
+                call_rcu_bh(head, rcu_barrier_callback);
+                break;
+        case RCU_BARRIER_SCHED:
+                call_rcu_sched(head, rcu_barrier_callback);
+                break;
+        }
 }
-/**
+/*
- * rcu_barrier - Wait until all the in-flight RCUs are complete.
+ * Orchestrate the specified type of RCU barrier, waiting for all
+ * RCU callbacks of the specified type to complete.
 */
-void rcu_barrier(void)
+static void _rcu_barrier(enum rcu_barrier type)
 {
        BUG_ON(in_interrupt());
        /* Take cpucontrol mutex to protect against CPU hotplug */
@@ -127,13 +128,39 @@ void rcu_barrier(void)
         * until all the callbacks are queued.
         */
        rcu_read_lock();
-        on_each_cpu(rcu_barrier_func, NULL, 0, 1);
+        on_each_cpu(rcu_barrier_func, (void *)type, 1);
        rcu_read_unlock();
        wait_for_completion(&rcu_barrier_completion);
        mutex_unlock(&rcu_barrier_mutex);
 }
+/**
+ * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ */
+void rcu_barrier(void)
+{
+        _rcu_barrier(RCU_BARRIER_STD);
+}
 EXPORT_SYMBOL_GPL(rcu_barrier);
+/**
+ * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
+ */
+void rcu_barrier_bh(void)
+{
+        _rcu_barrier(RCU_BARRIER_BH);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_bh);
+/**
+ * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
+ */
+void rcu_barrier_sched(void)
+{
+        _rcu_barrier(RCU_BARRIER_SCHED);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 void __init rcu_init(void)
 {
        __rcu_init();
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 9bf445664457..6f62b77d93c4 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -46,11 +46,11 @@
 #include <asm/atomic.h>
 #include <linux/bitops.h>
 #include <linux/module.h>
+#include <linux/kthread.h>
 #include <linux/completion.h>
 #include <linux/moduleparam.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
-#include <linux/rcupdate.h>
 #include <linux/cpu.h>
 #include <linux/random.h>
 #include <linux/delay.h>
@@ -82,14 +82,18 @@ struct rcu_data {
        spinlock_t      lock;           /* Protect rcu_data fields. */
        long            completed;      /* Number of last completed batch. */
        int             waitlistcount;
-        struct tasklet_struct rcu_tasklet;
        struct rcu_head *nextlist;
        struct rcu_head **nexttail;
        struct rcu_head *waitlist[GP_STAGES];
        struct rcu_head **waittail[GP_STAGES];
-        struct rcu_head *donelist;
+        struct rcu_head *donelist;      /* from waitlist & waitschedlist */
        struct rcu_head **donetail;
        long rcu_flipctr[2];
+        struct rcu_head *nextschedlist;
+        struct rcu_head **nextschedtail;
+        struct rcu_head *waitschedlist;
+        struct rcu_head **waitschedtail;
+        int rcu_sched_sleeping;
 #ifdef CONFIG_RCU_TRACE
        struct rcupreempt_trace trace;
 #endif /* #ifdef CONFIG_RCU_TRACE */
@@ -131,11 +135,24 @@ enum rcu_try_flip_states {
        rcu_try_flip_waitmb_state,
 };
+/*
+ * States for rcu_ctrlblk.rcu_sched_sleep.
+ */
+enum rcu_sched_sleep_states {
+        rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP.  */
+        rcu_sched_sleep_prep,   /* Thinking of sleeping, rechecking. */
+        rcu_sched_sleeping,     /* Sleeping, awaken if GP needed. */
+};
 struct rcu_ctrlblk {
        spinlock_t      fliplock;       /* Protect state-machine transitions. */
        long            completed;      /* Number of last completed batch. */
        enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
                                                        the rcu state machine */
+        spinlock_t      schedlock;      /* Protect rcu_sched sleep state. */
+        enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */
+        wait_queue_head_t sched_wq;     /* Place for rcu_sched to sleep. */
 };
 static DEFINE_PER_CPU(struct rcu_data, rcu_data);
@@ -143,8 +160,12 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
        .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
        .completed = 0,
        .rcu_try_flip_state = rcu_try_flip_idle_state,
+        .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
+        .sched_sleep = rcu_sched_not_sleeping,
+        .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),
 };
+static struct task_struct *rcu_sched_grace_period_task;
 #ifdef CONFIG_RCU_TRACE
 static char *rcu_try_flip_state_names[] =
@@ -207,6 +228,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
 */
 #define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
+#define RCU_SCHED_BATCH_TIME (HZ / 50)
 /*
 * Return the number of RCU batches processed thus far.  Useful
 * for debug and statistics.
@@ -411,32 +434,34 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
        }
 }
-#ifdef CONFIG_NO_HZ
+DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
+        .dynticks = 1,
+};
-DEFINE_PER_CPU(long, dynticks_progress_counter) = 1;
+#ifdef CONFIG_NO_HZ
-static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
 static DEFINE_PER_CPU(int, rcu_update_flag);
 /**
 * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
 *
 * If the CPU was idle with dynamic ticks active, this updates the
- * dynticks_progress_counter to let the RCU handling know that the
+ * rcu_dyntick_sched.dynticks to let the RCU handling know that the
 * CPU is active.
 */
 void rcu_irq_enter(void)
 {
        int cpu = smp_processor_id();
+        struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
        if (per_cpu(rcu_update_flag, cpu))
                per_cpu(rcu_update_flag, cpu)++;
        /*
         * Only update if we are coming from a stopped ticks mode
-         * (dynticks_progress_counter is even).
+         * (rcu_dyntick_sched.dynticks is even).
         */
        if (!in_interrupt() &&
-            (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) {
+            (rdssp->dynticks & 0x1) == 0) {
                /*
                 * The following might seem like we could have a race
                 * with NMI/SMIs. But this really isn't a problem.
@@ -459,12 +484,12 @@ void rcu_irq_enter(void)
                 * RCU read-side critical sections on this CPU would
                 * have already completed.
                 */
-                per_cpu(dynticks_progress_counter, cpu)++;
+                rdssp->dynticks++;
                /*
                 * The following memory barrier ensures that any
                 * rcu_read_lock() primitives in the irq handler
                 * are seen by other CPUs to follow the above
-                 * increment to dynticks_progress_counter. This is
+                 * increment to rcu_dyntick_sched.dynticks. This is
                 * required in order for other CPUs to correctly
                 * determine when it is safe to advance the RCU
                 * grace-period state machine.
@@ -472,7 +497,7 @@ void rcu_irq_enter(void)
                smp_mb(); /* see above block comment. */
                /*
                 * Since we can't determine the dynamic tick mode from
-                 * the dynticks_progress_counter after this routine,
+                 * the rcu_dyntick_sched.dynticks after this routine,
                 * we use a second flag to acknowledge that we came
                 * from an idle state with ticks stopped.
                 */
@@ -480,7 +505,7 @@ void rcu_irq_enter(void)
                /*
                 * If we take an NMI/SMI now, they will also increment
                 * the rcu_update_flag, and will not update the
-                 * dynticks_progress_counter on exit. That is for
+                 * rcu_dyntick_sched.dynticks on exit. That is for
                 * this IRQ to do.
                 */
        }
@@ -490,12 +515,13 @@ void rcu_irq_enter(void)
 * rcu_irq_exit - Called from exiting Hard irq context.
 *
 * If the CPU was idle with dynamic ticks active, update the
- * dynticks_progress_counter to put let the RCU handling be
+ * rcu_dyntick_sched.dynticks to put let the RCU handling be
 * aware that the CPU is going back to idle with no ticks.
 */
 void rcu_irq_exit(void)
 {
        int cpu = smp_processor_id();
+        struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
        /*
         * rcu_update_flag is set if we interrupted the CPU
@@ -503,7 +529,7 @@ void rcu_irq_exit(void)
         * Once this occurs, we keep track of interrupt nesting
         * because a NMI/SMI could also come in, and we still
         * only want the IRQ that started the increment of the
-         * dynticks_progress_counter to be the one that modifies
+         * rcu_dyntick_sched.dynticks to be the one that modifies
         * it on exit.
         */
        if (per_cpu(rcu_update_flag, cpu)) {
@@ -515,28 +541,29 @@ void rcu_irq_exit(void)
                /*
                 * If an NMI/SMI happens now we are still
-                 * protected by the dynticks_progress_counter being odd.
+                 * protected by the rcu_dyntick_sched.dynticks being odd.
                 */
                /*
                 * The following memory barrier ensures that any
                 * rcu_read_unlock() primitives in the irq handler
                 * are seen by other CPUs to preceed the following
-                 * increment to dynticks_progress_counter. This
+                 * increment to rcu_dyntick_sched.dynticks. This
                 * is required in order for other CPUs to determine
                 * when it is safe to advance the RCU grace-period
                 * state machine.
                 */
                smp_mb(); /* see above block comment. */
-                per_cpu(dynticks_progress_counter, cpu)++;
+                rdssp->dynticks++;
-                WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1);
+                WARN_ON(rdssp->dynticks & 0x1);
        }
 }
 static void dyntick_save_progress_counter(int cpu)
 {
-        per_cpu(rcu_dyntick_snapshot, cpu) =
+        struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
-                per_cpu(dynticks_progress_counter, cpu);
+        rdssp->dynticks_snap = rdssp->dynticks;
 }
 static inline int
@@ -544,9 +571,10 @@ rcu_try_flip_waitack_needed(int cpu)
 {
        long curr;
        long snap;
+        struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
-        curr = per_cpu(dynticks_progress_counter, cpu);
+        curr = rdssp->dynticks;
-        snap = per_cpu(rcu_dyntick_snapshot, cpu);
+        snap = rdssp->dynticks_snap;
        smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
        /*
@@ -567,7 +595,7 @@ rcu_try_flip_waitack_needed(int cpu)
         * that this CPU already acknowledged the counter.
         */
-        if ((curr - snap) > 2 || (snap & 0x1) == 0)
+        if ((curr - snap) > 2 || (curr & 0x1) == 0)
                return 0;
        /* We need this CPU to explicitly acknowledge the counter flip. */
@@ -580,9 +608,10 @@ rcu_try_flip_waitmb_needed(int cpu)
 {
        long curr;
        long snap;
+        struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
-        curr = per_cpu(dynticks_progress_counter, cpu);
+        curr = rdssp->dynticks;
-        snap = per_cpu(rcu_dyntick_snapshot, cpu);
+        snap = rdssp->dynticks_snap;
        smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
        /*
@@ -609,14 +638,86 @@ rcu_try_flip_waitmb_needed(int cpu)
        return 1;
 }
+static void dyntick_save_progress_counter_sched(int cpu)
+{
+        struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+        rdssp->sched_dynticks_snap = rdssp->dynticks;
+}
+static int rcu_qsctr_inc_needed_dyntick(int cpu)
+{
+        long curr;
+        long snap;
+        struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+        curr = rdssp->dynticks;
+        snap = rdssp->sched_dynticks_snap;
+        smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+        /*
+         * If the CPU remained in dynticks mode for the entire time
+         * and didn't take any interrupts, NMIs, SMIs, or whatever,
+         * then it cannot be in the middle of an rcu_read_lock(), so
+         * the next rcu_read_lock() it executes must use the new value
+         * of the counter.  Therefore, this CPU has been in a quiescent
+         * state the entire time, and we don't need to wait for it.
+         */
+        if ((curr == snap) && ((curr & 0x1) == 0))
+                return 0;
+        /*
+         * If the CPU passed through or entered a dynticks idle phase with
+         * no active irq handlers, then, as above, this CPU has already
+         * passed through a quiescent state.
+         */
+        if ((curr - snap) > 2 || (snap & 0x1) == 0)
+                return 0;
+        /* We need this CPU to go through a quiescent state. */
+        return 1;
+}
 #else /* !CONFIG_NO_HZ */
-# define dyntick_save_progress_counter(cpu)     do { } while (0)
+# define dyntick_save_progress_counter(cpu)             do { } while (0)
-# define rcu_try_flip_waitack_needed(cpu)       (1)
+# define rcu_try_flip_waitack_needed(cpu)               (1)
-# define rcu_try_flip_waitmb_needed(cpu)        (1)
+# define rcu_try_flip_waitmb_needed(cpu)                (1)
+# define dyntick_save_progress_counter_sched(cpu)       do { } while (0)
+# define rcu_qsctr_inc_needed_dyntick(cpu)              (1)
 #endif /* CONFIG_NO_HZ */
+static void save_qsctr_sched(int cpu)
+{
+        struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+        rdssp->sched_qs_snap = rdssp->sched_qs;
+}
+static inline int rcu_qsctr_inc_needed(int cpu)
+{
+        struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+        /*
+         * If there has been a quiescent state, no more need to wait
+         * on this CPU.
+         */
+        if (rdssp->sched_qs != rdssp->sched_qs_snap) {
+                smp_mb(); /* force ordering with cpu entering schedule(). */
+                return 0;
+        }
+        /* We need this CPU to go through a quiescent state. */
+        return 1;
+}
 /*
 * Get here when RCU is idle.  Decide whether we need to
 * move out of idle state, and return non-zero if so.
@@ -819,6 +920,26 @@ void rcu_check_callbacks(int cpu, int user)
        unsigned long flags;
        struct rcu_data *rdp = RCU_DATA_CPU(cpu);
+        /*
+         * If this CPU took its interrupt from user mode or from the
+         * idle loop, and this is not a nested interrupt, then
+         * this CPU has to have exited all prior preept-disable
+         * sections of code.  So increment the counter to note this.
+         *
+         * The memory barrier is needed to handle the case where
+         * writes from a preempt-disable section of code get reordered
+         * into schedule() by this CPU's write buffer.  So the memory
+         * barrier makes sure that the rcu_qsctr_inc() is seen by other
+         * CPUs to happen after any such write.
+         */
+        if (user ||
+            (idle_cpu(cpu) && !in_softirq() &&
+             hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+                smp_mb();       /* Guard against aggressive schedule(). */
+                rcu_qsctr_inc(cpu);
+        }
        rcu_check_mb(cpu);
        if (rcu_ctrlblk.completed == rdp->completed)
                rcu_try_flip();
@@ -869,6 +990,8 @@ void rcu_offline_cpu(int cpu)
        struct rcu_head *list = NULL;
        unsigned long flags;
        struct rcu_data *rdp = RCU_DATA_CPU(cpu);
+        struct rcu_head *schedlist = NULL;
+        struct rcu_head **schedtail = &schedlist;
        struct rcu_head **tail = &list;
        /*
@@ -882,6 +1005,11 @@ void rcu_offline_cpu(int cpu)
                rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
                                                list, tail);
        rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
+        rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail,
+                                schedlist, schedtail);
+        rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail,
+                                schedlist, schedtail);
+        rdp->rcu_sched_sleeping = 0;
        spin_unlock_irqrestore(&rdp->lock, flags);
        rdp->waitlistcount = 0;
@@ -916,12 +1044,15 @@ void rcu_offline_cpu(int cpu)
         * fix.
         */
-        local_irq_save(flags);
+        local_irq_save(flags);  /* disable preempt till we know what lock. */
        rdp = RCU_DATA_ME();
        spin_lock(&rdp->lock);
        *rdp->nexttail = list;
        if (list)
                rdp->nexttail = tail;
+        *rdp->nextschedtail = schedlist;
+        if (schedlist)
+                rdp->nextschedtail = schedtail;
        spin_unlock_irqrestore(&rdp->lock, flags);
 }
@@ -936,10 +1067,25 @@ void rcu_offline_cpu(int cpu)
 void __cpuinit rcu_online_cpu(int cpu)
 {
        unsigned long flags;
+        struct rcu_data *rdp;
        spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
        cpu_set(cpu, rcu_cpu_online_map);
        spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
+        /*
+         * The rcu_sched grace-period processing might have bypassed
+         * this CPU, given that it was not in the rcu_cpu_online_map
+         * when the grace-period scan started.  This means that the
+         * grace-period task might sleep.  So make sure that if this
+         * should happen, the first callback posted to this CPU will
+         * wake up the grace-period task if need be.
+         */
+        rdp = RCU_DATA_CPU(cpu);
+        spin_lock_irqsave(&rdp->lock, flags);
+        rdp->rcu_sched_sleeping = 1;
+        spin_unlock_irqrestore(&rdp->lock, flags);
 }
 static void rcu_process_callbacks(struct softirq_action *unused)
@@ -982,31 +1128,196 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
        *rdp->nexttail = head;
        rdp->nexttail = &head->next;
        RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
-        spin_unlock(&rdp->lock);
+        spin_unlock_irqrestore(&rdp->lock, flags);
-        local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(call_rcu);
+void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+        unsigned long flags;
+        struct rcu_data *rdp;
+        int wake_gp = 0;
+        head->func = func;
+        head->next = NULL;
+        local_irq_save(flags);
+        rdp = RCU_DATA_ME();
+        spin_lock(&rdp->lock);
+        *rdp->nextschedtail = head;
+        rdp->nextschedtail = &head->next;
+        if (rdp->rcu_sched_sleeping) {
+                /* Grace-period processing might be sleeping... */
+                rdp->rcu_sched_sleeping = 0;
+                wake_gp = 1;
+        }
+        spin_unlock_irqrestore(&rdp->lock, flags);
+        if (wake_gp) {
+                /* Wake up grace-period processing, unless someone beat us. */
+                spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
+                if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping)
+                        wake_gp = 0;
+                rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping;
+                spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
+                if (wake_gp)
+                        wake_up_interruptible(&rcu_ctrlblk.sched_wq);
+        }
+}
+EXPORT_SYMBOL_GPL(call_rcu_sched);
 /*
 * Wait until all currently running preempt_disable() code segments
 * (including hardware-irq-disable segments) complete.  Note that
 * in -rt this does -not- necessarily result in all currently executing
 * interrupt -handlers- having completed.
 */
-void __synchronize_sched(void)
+synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
+EXPORT_SYMBOL_GPL(__synchronize_sched);
+/*
+ * kthread function that manages call_rcu_sched grace periods.
+ */
+static int rcu_sched_grace_period(void *arg)
 {
-        cpumask_t oldmask;
+        int couldsleep;         /* might sleep after current pass. */
+        int couldsleepnext = 0; /* might sleep after next pass. */
        int cpu;
+        unsigned long flags;
+        struct rcu_data *rdp;
+        int ret;
-        if (sched_getaffinity(0, &oldmask) < 0)
+        /*
-                oldmask = cpu_possible_map;
+         * Each pass through the following loop handles one
-        for_each_online_cpu(cpu) {
+         * rcu_sched grace period cycle.
-                sched_setaffinity(0, &cpumask_of_cpu(cpu));
+         */
-                schedule();
+        do {
-        }
+                /* Save each CPU's current state. */
-        sched_setaffinity(0, &oldmask);
+                for_each_online_cpu(cpu) {
+                        dyntick_save_progress_counter_sched(cpu);
+                        save_qsctr_sched(cpu);
+                }
+                /*
+                 * Sleep for about an RCU grace-period's worth to
+                 * allow better batching and to consume less CPU.
+                 */
+                schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME);
+                /*
+                 * If there was nothing to do last time, prepare to
+                 * sleep at the end of the current grace period cycle.
+                 */
+                couldsleep = couldsleepnext;
+                couldsleepnext = 1;
+                if (couldsleep) {
+                        spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
+                        rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep;
+                        spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
+                }
+                /*
+                 * Wait on each CPU in turn to have either visited
+                 * a quiescent state or been in dynticks-idle mode.
+                 */
+                for_each_online_cpu(cpu) {
+                        while (rcu_qsctr_inc_needed(cpu) &&
+                               rcu_qsctr_inc_needed_dyntick(cpu)) {
+                                /* resched_cpu(cpu); @@@ */
+                                schedule_timeout_interruptible(1);
+                        }
+                }
+                /* Advance callbacks for each CPU.  */
+                for_each_online_cpu(cpu) {
+                        rdp = RCU_DATA_CPU(cpu);
+                        spin_lock_irqsave(&rdp->lock, flags);
+                        /*
+                         * We are running on this CPU irq-disabled, so no
+                         * CPU can go offline until we re-enable irqs.
+                         * The current CPU might have already gone
+                         * offline (between the for_each_offline_cpu and
+                         * the spin_lock_irqsave), but in that case all its
+                         * callback lists will be empty, so no harm done.
+                         *
+                         * Advance the callbacks!  We share normal RCU's
+                         * donelist, since callbacks are invoked the
+                         * same way in either case.
+                         */
+                        if (rdp->waitschedlist != NULL) {
+                                *rdp->donetail = rdp->waitschedlist;
+                                rdp->donetail = rdp->waitschedtail;
+                                /*
+                                 * Next rcu_check_callbacks() will
+                                 * do the required raise_softirq().
+                                 */
+                        }
+                        if (rdp->nextschedlist != NULL) {
+                                rdp->waitschedlist = rdp->nextschedlist;
+                                rdp->waitschedtail = rdp->nextschedtail;
+                                couldsleep = 0;
+                                couldsleepnext = 0;
+                        } else {
+                                rdp->waitschedlist = NULL;
+                                rdp->waitschedtail = &rdp->waitschedlist;
+                        }
+                        rdp->nextschedlist = NULL;
+                        rdp->nextschedtail = &rdp->nextschedlist;
+                        /* Mark sleep intention. */
+                        rdp->rcu_sched_sleeping = couldsleep;
+                        spin_unlock_irqrestore(&rdp->lock, flags);
+                }
+                /* If we saw callbacks on the last scan, go deal with them. */
+                if (!couldsleep)
+                        continue;
+                /* Attempt to block... */
+                spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
+                if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) {
+                        /*
+                         * Someone posted a callback after we scanned.
+                         * Go take care of it.
+                         */
+                        spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
+                        couldsleepnext = 0;
+                        continue;
+                }
+                /* Block until the next person posts a callback. */
+                rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
+                spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
+                ret = 0;
+                __wait_event_interruptible(rcu_ctrlblk.sched_wq,
+                        rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
+                        ret);
+                /*
+                 * Signals would prevent us from sleeping, and we cannot
+                 * do much with them in any case.  So flush them.
+                 */
+                if (ret)
+                        flush_signals(current);
+                couldsleepnext = 0;
+        } while (!kthread_should_stop());
+        return (0);
 }
-EXPORT_SYMBOL_GPL(__synchronize_sched);
 /*
 * Check to see if any future RCU-related work will need to be done
@@ -1023,7 +1334,9 @@ int rcu_needs_cpu(int cpu)
        return (rdp->donelist != NULL ||
                !!rdp->waitlistcount ||
-                rdp->nextlist != NULL);
+                rdp->nextlist != NULL ||
+                rdp->nextschedlist != NULL ||
+                rdp->waitschedlist != NULL);
 }
 int rcu_pending(int cpu)
@@ -1034,7 +1347,9 @@ int rcu_pending(int cpu)
        if (rdp->donelist != NULL ||
            !!rdp->waitlistcount ||
-            rdp->nextlist != NULL)
+            rdp->nextlist != NULL ||
+            rdp->nextschedlist != NULL ||
+            rdp->waitschedlist != NULL)
                return 1;
        /* The RCU core needs an acknowledgement from this CPU. */
@@ -1101,6 +1416,11 @@ void __init __rcu_init(void)
                rdp->donetail = &rdp->donelist;
                rdp->rcu_flipctr[0] = 0;
                rdp->rcu_flipctr[1] = 0;
+                rdp->nextschedlist = NULL;
+                rdp->nextschedtail = &rdp->nextschedlist;
+                rdp->waitschedlist = NULL;
+                rdp->waitschedtail = &rdp->waitschedlist;
+                rdp->rcu_sched_sleeping = 0;
        }
        register_cpu_notifier(&rcu_nb);
@@ -1123,11 +1443,15 @@ void __init __rcu_init(void)
 }
 /*
- * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
+ * Late-boot-time RCU initialization that must wait until after scheduler
+ * has been initialized.
 */
-void synchronize_kernel(void)
+void __init rcu_init_sched(void)
 {
-        synchronize_rcu();
+        rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period,
+                                                  NULL,
+                                                  "rcu_sched_grace_period");
+        WARN_ON(IS_ERR(rcu_sched_grace_period_task));
 }
 #ifdef CONFIG_RCU_TRACE
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c
index 49ac4947af24..5edf82c34bbc 100644
--- a/kernel/rcupreempt_trace.c
+++ b/kernel/rcupreempt_trace.c
@@ -38,7 +38,6 @@
 #include <linux/moduleparam.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
-#include <linux/rcupdate.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 #include <linux/rcupreempt_trace.h>
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 33acc424667e..90b5b123f7a1 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -57,7 +57,9 @@ static int stat_interval;	/* Interval between stats, in seconds. */
                                /*  Defaults to "only at end of test". */
 static int verbose;             /* Print more debug info. */
 static int test_no_idle_hz;     /* Test RCU's support for tickless idle CPUs. */
-static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
+static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
+static int stutter = 5;         /* Start/stop testing interval (in sec) */
+static int irqreader = 1;       /* RCU readers from irq (timers). */
 static char *torture_type = "rcu"; /* What RCU implementation to torture. */
 module_param(nreaders, int, 0444);
@@ -72,6 +74,10 @@ module_param(test_no_idle_hz, bool, 0444);
 MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
 module_param(shuffle_interval, int, 0444);
 MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
+module_param(stutter, int, 0444);
+MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test");
+module_param(irqreader, int, 0444);
+MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers");
 module_param(torture_type, charp, 0444);
 MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
@@ -91,6 +97,7 @@ static struct task_struct **fakewriter_tasks;
 static struct task_struct **reader_tasks;
 static struct task_struct *stats_task;
 static struct task_struct *shuffler_task;
+static struct task_struct *stutter_task;
 #define RCU_TORTURE_PIPE_LEN 10
@@ -117,8 +124,18 @@ static atomic_t n_rcu_torture_alloc_fail;
 static atomic_t n_rcu_torture_free;
 static atomic_t n_rcu_torture_mberror;
 static atomic_t n_rcu_torture_error;
+static long n_rcu_torture_timers = 0;
 static struct list_head rcu_torture_removed;
+static int stutter_pause_test = 0;
+#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
+#define RCUTORTURE_RUNNABLE_INIT 1
+#else
+#define RCUTORTURE_RUNNABLE_INIT 0
+#endif
+int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
 /*
 * Allocate an element from the rcu_tortures pool.
 */
@@ -179,6 +196,16 @@ rcu_random(struct rcu_random_state *rrsp)
        return swahw32(rrsp->rrs_state);
 }
+static void
+rcu_stutter_wait(void)
+{
+        while (stutter_pause_test || !rcutorture_runnable)
+                if (rcutorture_runnable)
+                        schedule_timeout_interruptible(1);
+                else
+                        schedule_timeout_interruptible(round_jiffies_relative(HZ));
+}
 /*
 * Operations vector for selecting different types of tests.
 */
@@ -192,7 +219,9 @@ struct rcu_torture_ops {
        int (*completed)(void);
        void (*deferredfree)(struct rcu_torture *p);
        void (*sync)(void);
+        void (*cb_barrier)(void);
        int (*stats)(char *page);
+        int irqcapable;
        char *name;
 };
 static struct rcu_torture_ops *cur_ops = NULL;
@@ -265,7 +294,9 @@ static struct rcu_torture_ops rcu_ops = {
        .completed = rcu_torture_completed,
        .deferredfree = rcu_torture_deferred_free,
        .sync = synchronize_rcu,
+        .cb_barrier = rcu_barrier,
        .stats = NULL,
+        .irqcapable = 1,
        .name = "rcu"
 };
@@ -304,7 +335,9 @@ static struct rcu_torture_ops rcu_sync_ops = {
        .completed = rcu_torture_completed,
        .deferredfree = rcu_sync_torture_deferred_free,
        .sync = synchronize_rcu,
+        .cb_barrier = NULL,
        .stats = NULL,
+        .irqcapable = 1,
        .name = "rcu_sync"
 };
@@ -364,7 +397,9 @@ static struct rcu_torture_ops rcu_bh_ops = {
        .completed = rcu_bh_torture_completed,
        .deferredfree = rcu_bh_torture_deferred_free,
        .sync = rcu_bh_torture_synchronize,
+        .cb_barrier = rcu_barrier_bh,
        .stats = NULL,
+        .irqcapable = 1,
        .name = "rcu_bh"
 };
@@ -377,7 +412,9 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
        .completed = rcu_bh_torture_completed,
        .deferredfree = rcu_sync_torture_deferred_free,
        .sync = rcu_bh_torture_synchronize,
+        .cb_barrier = NULL,
        .stats = NULL,
+        .irqcapable = 1,
        .name = "rcu_bh_sync"
 };
@@ -458,6 +495,7 @@ static struct rcu_torture_ops srcu_ops = {
        .completed = srcu_torture_completed,
        .deferredfree = rcu_sync_torture_deferred_free,
        .sync = srcu_torture_synchronize,
+        .cb_barrier = NULL,
        .stats = srcu_torture_stats,
        .name = "srcu"
 };
@@ -482,6 +520,11 @@ static int sched_torture_completed(void)
        return 0;
 }
+static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
+{
+        call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
+}
 static void sched_torture_synchronize(void)
 {
        synchronize_sched();
@@ -494,12 +537,28 @@ static struct rcu_torture_ops sched_ops = {
        .readdelay = rcu_read_delay,  /* just reuse rcu's version. */
        .readunlock = sched_torture_read_unlock,
        .completed = sched_torture_completed,
-        .deferredfree = rcu_sync_torture_deferred_free,
+        .deferredfree = rcu_sched_torture_deferred_free,
        .sync = sched_torture_synchronize,
+        .cb_barrier = rcu_barrier_sched,
        .stats = NULL,
+        .irqcapable = 1,
        .name = "sched"
 };
+static struct rcu_torture_ops sched_ops_sync = {
+        .init = rcu_sync_torture_init,
+        .cleanup = NULL,
+        .readlock = sched_torture_read_lock,
+        .readdelay = rcu_read_delay,  /* just reuse rcu's version. */
+        .readunlock = sched_torture_read_unlock,
+        .completed = sched_torture_completed,
+        .deferredfree = rcu_sync_torture_deferred_free,
+        .sync = sched_torture_synchronize,
+        .cb_barrier = NULL,
+        .stats = NULL,
+        .name = "sched_sync"
+};
 /*
 * RCU torture writer kthread.  Repeatedly substitutes a new structure
 * for that pointed to by rcu_torture_current, freeing the old structure
@@ -537,6 +596,7 @@ rcu_torture_writer(void *arg)
                }
                rcu_torture_current_version++;
                oldbatch = cur_ops->completed();
+                rcu_stutter_wait();
        } while (!kthread_should_stop() && !fullstop);
        VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
        while (!kthread_should_stop())
@@ -560,6 +620,7 @@ rcu_torture_fakewriter(void *arg)
                schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
                udelay(rcu_random(&rand) & 0x3ff);
                cur_ops->sync();
+                rcu_stutter_wait();
        } while (!kthread_should_stop() && !fullstop);
        VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
@@ -569,6 +630,52 @@ rcu_torture_fakewriter(void *arg)
 }
 /*
+ * RCU torture reader from timer handler.  Dereferences rcu_torture_current,
+ * incrementing the corresponding element of the pipeline array.  The
+ * counter in the element should never be greater than 1, otherwise, the
+ * RCU implementation is broken.
+ */
+static void rcu_torture_timer(unsigned long unused)
+{
+        int idx;
+        int completed;
+        static DEFINE_RCU_RANDOM(rand);
+        static DEFINE_SPINLOCK(rand_lock);
+        struct rcu_torture *p;
+        int pipe_count;
+        idx = cur_ops->readlock();
+        completed = cur_ops->completed();
+        p = rcu_dereference(rcu_torture_current);
+        if (p == NULL) {
+                /* Leave because rcu_torture_writer is not yet underway */
+                cur_ops->readunlock(idx);
+                return;
+        }
+        if (p->rtort_mbtest == 0)
+                atomic_inc(&n_rcu_torture_mberror);
+        spin_lock(&rand_lock);
+        cur_ops->readdelay(&rand);
+        n_rcu_torture_timers++;
+        spin_unlock(&rand_lock);
+        preempt_disable();
+        pipe_count = p->rtort_pipe_count;
+        if (pipe_count > RCU_TORTURE_PIPE_LEN) {
+                /* Should not happen, but... */
+                pipe_count = RCU_TORTURE_PIPE_LEN;
+        }
+        ++__get_cpu_var(rcu_torture_count)[pipe_count];
+        completed = cur_ops->completed() - completed;
+        if (completed > RCU_TORTURE_PIPE_LEN) {
+                /* Should not happen, but... */
+                completed = RCU_TORTURE_PIPE_LEN;
+        }
+        ++__get_cpu_var(rcu_torture_batch)[completed];
+        preempt_enable();
+        cur_ops->readunlock(idx);
+}
+/*
 * RCU torture reader kthread.  Repeatedly dereferences rcu_torture_current,
 * incrementing the corresponding element of the pipeline array.  The
 * counter in the element should never be greater than 1, otherwise, the
@@ -582,11 +689,18 @@ rcu_torture_reader(void *arg)
        DEFINE_RCU_RANDOM(rand);
        struct rcu_torture *p;
        int pipe_count;
+        struct timer_list t;
        VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
        set_user_nice(current, 19);
+        if (irqreader && cur_ops->irqcapable)
+                setup_timer_on_stack(&t, rcu_torture_timer, 0);
        do {
+                if (irqreader && cur_ops->irqcapable) {
+                        if (!timer_pending(&t))
+                                mod_timer(&t, 1);
+                }
                idx = cur_ops->readlock();
                completed = cur_ops->completed();
                p = rcu_dereference(rcu_torture_current);
@@ -615,8 +729,11 @@ rcu_torture_reader(void *arg)
                preempt_enable();
                cur_ops->readunlock(idx);
                schedule();
+                rcu_stutter_wait();
        } while (!kthread_should_stop() && !fullstop);
        VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
+        if (irqreader && cur_ops->irqcapable)
+                del_timer_sync(&t);
        while (!kthread_should_stop())
                schedule_timeout_uninterruptible(1);
        return 0;
@@ -647,20 +764,22 @@ rcu_torture_printk(char *page)
        cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
        cnt += sprintf(&page[cnt],
                       "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
-                       "rtmbe: %d",
+                       "rtmbe: %d nt: %ld",
                       rcu_torture_current,
                       rcu_torture_current_version,
                       list_empty(&rcu_torture_freelist),
                       atomic_read(&n_rcu_torture_alloc),
                       atomic_read(&n_rcu_torture_alloc_fail),
                       atomic_read(&n_rcu_torture_free),
-                       atomic_read(&n_rcu_torture_mberror));
+                       atomic_read(&n_rcu_torture_mberror),
+                       n_rcu_torture_timers);
        if (atomic_read(&n_rcu_torture_mberror) != 0)
                cnt += sprintf(&page[cnt], " !!!");
        cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
        if (i > 1) {
                cnt += sprintf(&page[cnt], "!!! ");
                atomic_inc(&n_rcu_torture_error);
+                WARN_ON_ONCE(1);
        }
        cnt += sprintf(&page[cnt], "Reader Pipe: ");
        for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
@@ -785,15 +904,34 @@ rcu_torture_shuffle(void *arg)
        return 0;
 }
+/* Cause the rcutorture test to "stutter", starting and stopping all
+ * threads periodically.
+ */
+static int
+rcu_torture_stutter(void *arg)
+{
+        VERBOSE_PRINTK_STRING("rcu_torture_stutter task started");
+        do {
+                schedule_timeout_interruptible(stutter * HZ);
+                stutter_pause_test = 1;
+                if (!kthread_should_stop())
+                        schedule_timeout_interruptible(stutter * HZ);
+                stutter_pause_test = 0;
+        } while (!kthread_should_stop());
+        VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping");
+        return 0;
+}
 static inline void
 rcu_torture_print_module_parms(char *tag)
 {
        printk(KERN_ALERT "%s" TORTURE_FLAG
                "--- %s: nreaders=%d nfakewriters=%d "
                "stat_interval=%d verbose=%d test_no_idle_hz=%d "
-                "shuffle_interval = %d\n",
+                "shuffle_interval=%d stutter=%d irqreader=%d\n",
                torture_type, tag, nrealreaders, nfakewriters,
-                stat_interval, verbose, test_no_idle_hz, shuffle_interval);
+                stat_interval, verbose, test_no_idle_hz, shuffle_interval,
+                stutter, irqreader);
 }
 static void
@@ -802,6 +940,11 @@ rcu_torture_cleanup(void)
        int i;
        fullstop = 1;
+        if (stutter_task) {
+                VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
+                kthread_stop(stutter_task);
+        }
+        stutter_task = NULL;
        if (shuffler_task) {
                VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
                kthread_stop(shuffler_task);
@@ -848,7 +991,9 @@ rcu_torture_cleanup(void)
        stats_task = NULL;
        /* Wait for all RCU callbacks to fire.  */
-        rcu_barrier();
+        if (cur_ops->cb_barrier != NULL)
+                cur_ops->cb_barrier();
        rcu_torture_stats_print();  /* -After- the stats thread is stopped! */
@@ -868,7 +1013,7 @@ rcu_torture_init(void)
        int firsterr = 0;
        static struct rcu_torture_ops *torture_ops[] =
                { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
-                  &srcu_ops, &sched_ops, };
+                  &srcu_ops, &sched_ops, &sched_ops_sync, };
        /* Process args and tell the world that the torturer is on the job. */
        for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
@@ -988,6 +1133,19 @@ rcu_torture_init(void)
                        goto unwind;
                }
        }
+        if (stutter < 0)
+                stutter = 0;
+        if (stutter) {
+                /* Create the stutter thread */
+                stutter_task = kthread_run(rcu_torture_stutter, NULL,
+                                          "rcu_torture_stutter");
+                if (IS_ERR(stutter_task)) {
+                        firsterr = PTR_ERR(stutter_task);
+                        VERBOSE_PRINTK_ERRSTRING("Failed to create stutter");
+                        stutter_task = NULL;
+                        goto unwind;
+                }
+        }
        return 0;
 unwind:
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 092e4c620af9..a56f629b057a 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -297,8 +297,8 @@ static int test_func(void *data)
 *
 * opcode:data
 */
-static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,
+static ssize_t sysfs_test_command(struct sys_device *dev, struct sysdev_attribute *attr,
-                                  size_t count)
+                                  const char *buf, size_t count)
 {
        struct sched_param schedpar;
        struct test_thread_data *td;
@@ -360,7 +360,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,
 * @dev:        thread to query
 * @buf:        char buffer to be filled with thread status info
 */
-static ssize_t sysfs_test_status(struct sys_device *dev, char *buf)
+static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute *attr,
+                                 char *buf)
 {
        struct test_thread_data *td;
        struct task_struct *tsk;
diff --git a/kernel/sched.c b/kernel/sched.c
index 99e6d850ecab..b1104ea5d255 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7737,11 +7737,13 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
 }
 #ifdef CONFIG_SCHED_MC
-static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
+static ssize_t sched_mc_power_savings_show(struct sys_device *dev,
+                                struct sysdev_attribute *attr, char *page)
 {
        return sprintf(page, "%u\n", sched_mc_power_savings);
 }
 static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
+                                            struct sysdev_attribute *attr,
                                            const char *buf, size_t count)
 {
        return sched_power_savings_store(buf, count, 0);
@@ -7751,11 +7753,13 @@ static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show,
 #endif
 #ifdef CONFIG_SCHED_SMT
-static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page)
+static ssize_t sched_smt_power_savings_show(struct sys_device *dev,
+                                struct sysdev_attribute *attr, char *page)
 {
        return sprintf(page, "%u\n", sched_smt_power_savings);
 }
 static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
+                                             struct sysdev_attribute *attr,
                                             const char *buf, size_t count)
 {
        return sched_power_savings_store(buf, count, 1);
diff --git a/kernel/smp.c b/kernel/smp.c
new file mode 100644
index 000000000000..462c785ca1ee
--- /dev/null
+++ b/kernel/smp.c
@@ -0,0 +1,383 @@
+/*
+ * Generic helpers for smp ipi calls
+ *
+ * (C) Jens Axboe <jens.axboe@oracle.com> 2008
+ *
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist.h>
+#include <linux/smp.h>
+static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
+static LIST_HEAD(call_function_queue);
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
+enum {
+        CSD_FLAG_WAIT           = 0x01,
+        CSD_FLAG_ALLOC          = 0x02,
+};
+struct call_function_data {
+        struct call_single_data csd;
+        spinlock_t lock;
+        unsigned int refs;
+        cpumask_t cpumask;
+        struct rcu_head rcu_head;
+};
+struct call_single_queue {
+        struct list_head list;
+        spinlock_t lock;
+};
+void __cpuinit init_call_single_data(void)
+{
+        int i;
+        for_each_possible_cpu(i) {
+                struct call_single_queue *q = &per_cpu(call_single_queue, i);
+                spin_lock_init(&q->lock);
+                INIT_LIST_HEAD(&q->list);
+        }
+}
+static void csd_flag_wait(struct call_single_data *data)
+{
+        /* Wait for response */
+        do {
+                /*
+                 * We need to see the flags store in the IPI handler
+                 */
+                smp_mb();
+                if (!(data->flags & CSD_FLAG_WAIT))
+                        break;
+                cpu_relax();
+        } while (1);
+}
+/*
+ * Insert a previously allocated call_single_data element for execution
+ * on the given CPU. data must already have ->func, ->info, and ->flags set.
+ */
+static void generic_exec_single(int cpu, struct call_single_data *data)
+{
+        struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
+        int wait = data->flags & CSD_FLAG_WAIT, ipi;
+        unsigned long flags;
+        spin_lock_irqsave(&dst->lock, flags);
+        ipi = list_empty(&dst->list);
+        list_add_tail(&data->list, &dst->list);
+        spin_unlock_irqrestore(&dst->lock, flags);
+        if (ipi)
+                arch_send_call_function_single_ipi(cpu);
+        if (wait)
+                csd_flag_wait(data);
+}
+static void rcu_free_call_data(struct rcu_head *head)
+{
+        struct call_function_data *data;
+        data = container_of(head, struct call_function_data, rcu_head);
+        kfree(data);
+}
+/*
+ * Invoked by arch to handle an IPI for call function. Must be called with
+ * interrupts disabled.
+ */
+void generic_smp_call_function_interrupt(void)
+{
+        struct call_function_data *data;
+        int cpu = get_cpu();
+        /*
+         * It's ok to use list_for_each_rcu() here even though we may delete
+         * 'pos', since list_del_rcu() doesn't clear ->next
+         */
+        rcu_read_lock();
+        list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
+                int refs;
+                if (!cpu_isset(cpu, data->cpumask))
+                        continue;
+                data->csd.func(data->csd.info);
+                spin_lock(&data->lock);
+                cpu_clear(cpu, data->cpumask);
+                WARN_ON(data->refs == 0);
+                data->refs--;
+                refs = data->refs;
+                spin_unlock(&data->lock);
+                if (refs)
+                        continue;
+                spin_lock(&call_function_lock);
+                list_del_rcu(&data->csd.list);
+                spin_unlock(&call_function_lock);
+                if (data->csd.flags & CSD_FLAG_WAIT) {
+                        /*
+                         * serialize stores to data with the flag clear
+                         * and wakeup
+                         */
+                        smp_wmb();
+                        data->csd.flags &= ~CSD_FLAG_WAIT;
+                } else
+                        call_rcu(&data->rcu_head, rcu_free_call_data);
+        }
+        rcu_read_unlock();
+        put_cpu();
+}
+/*
+ * Invoked by arch to handle an IPI for call function single. Must be called
+ * from the arch with interrupts disabled.
+ */
+void generic_smp_call_function_single_interrupt(void)
+{
+        struct call_single_queue *q = &__get_cpu_var(call_single_queue);
+        LIST_HEAD(list);
+        /*
+         * Need to see other stores to list head for checking whether
+         * list is empty without holding q->lock
+         */
+        smp_mb();
+        while (!list_empty(&q->list)) {
+                unsigned int data_flags;
+                spin_lock(&q->lock);
+                list_replace_init(&q->list, &list);
+                spin_unlock(&q->lock);
+                while (!list_empty(&list)) {
+                        struct call_single_data *data;
+                        data = list_entry(list.next, struct call_single_data,
+                                                list);
+                        list_del(&data->list);
+                        /*
+                         * 'data' can be invalid after this call if
+                         * flags == 0 (when called through
+                         * generic_exec_single(), so save them away before
+                         * making the call.
+                         */
+                        data_flags = data->flags;
+                        data->func(data->info);
+                        if (data_flags & CSD_FLAG_WAIT) {
+                                smp_wmb();
+                                data->flags &= ~CSD_FLAG_WAIT;
+                        } else if (data_flags & CSD_FLAG_ALLOC)
+                                kfree(data);
+                }
+                /*
+                 * See comment on outer loop
+                 */
+                smp_mb();
+        }
+}
+/*
+ * smp_call_function_single - Run a function on a specific CPU
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code. Note that @wait
+ * will be implicitly turned on in case of allocation failures, since
+ * we fall back to on-stack allocation.
+ */
+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+                             int wait)
+{
+        struct call_single_data d;
+        unsigned long flags;
+        /* prevent preemption and reschedule on another processor */
+        int me = get_cpu();
+        /* Can deadlock when called with interrupts disabled */
+        WARN_ON(irqs_disabled());
+        if (cpu == me) {
+                local_irq_save(flags);
+                func(info);
+                local_irq_restore(flags);
+        } else {
+                struct call_single_data *data = NULL;
+                if (!wait) {
+                        data = kmalloc(sizeof(*data), GFP_ATOMIC);
+                        if (data)
+                                data->flags = CSD_FLAG_ALLOC;
+                }
+                if (!data) {
+                        data = &d;
+                        data->flags = CSD_FLAG_WAIT;
+                }
+                data->func = func;
+                data->info = info;
+                generic_exec_single(cpu, data);
+        }
+        put_cpu();
+        return 0;
+}
+EXPORT_SYMBOL(smp_call_function_single);
+/**
+ * __smp_call_function_single(): Run a function on another CPU
+ * @cpu: The CPU to run on.
+ * @data: Pre-allocated and setup data structure
+ *
+ * Like smp_call_function_single(), but allow caller to pass in a pre-allocated
+ * data structure. Useful for embedding @data inside other structures, for
+ * instance.
+ *
+ */
+void __smp_call_function_single(int cpu, struct call_single_data *data)
+{
+        /* Can deadlock when called with interrupts disabled */
+        WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled());
+        generic_exec_single(cpu, data);
+}
+/**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * @mask: The set of cpus to run on.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned. Note that @wait
+ * will be implicitly turned on in case of allocation failures, since
+ * we fall back to on-stack allocation.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler. Preemption
+ * must be disabled when calling this function.
+ */
+int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
+                           int wait)
+{
+        struct call_function_data d;
+        struct call_function_data *data = NULL;
+        cpumask_t allbutself;
+        unsigned long flags;
+        int cpu, num_cpus;
+        /* Can deadlock when called with interrupts disabled */
+        WARN_ON(irqs_disabled());
+        cpu = smp_processor_id();
+        allbutself = cpu_online_map;
+        cpu_clear(cpu, allbutself);
+        cpus_and(mask, mask, allbutself);
+        num_cpus = cpus_weight(mask);
+        /*
+         * If zero CPUs, return. If just a single CPU, turn this request
+         * into a targetted single call instead since it's faster.
+         */
+        if (!num_cpus)
+                return 0;
+        else if (num_cpus == 1) {
+                cpu = first_cpu(mask);
+                return smp_call_function_single(cpu, func, info, wait);
+        }
+        if (!wait) {
+                data = kmalloc(sizeof(*data), GFP_ATOMIC);
+                if (data)
+                        data->csd.flags = CSD_FLAG_ALLOC;
+        }
+        if (!data) {
+                data = &d;
+                data->csd.flags = CSD_FLAG_WAIT;
+                wait = 1;
+        }
+        spin_lock_init(&data->lock);
+        data->csd.func = func;
+        data->csd.info = info;
+        data->refs = num_cpus;
+        data->cpumask = mask;
+        spin_lock_irqsave(&call_function_lock, flags);
+        list_add_tail_rcu(&data->csd.list, &call_function_queue);
+        spin_unlock_irqrestore(&call_function_lock, flags);
+        /* Send a message to all CPUs in the map */
+        arch_send_call_function_ipi(mask);
+        /* optionally wait for the CPUs to complete */
+        if (wait)
+                csd_flag_wait(&data->csd);
+        return 0;
+}
+EXPORT_SYMBOL(smp_call_function_mask);
+/**
+ * smp_call_function(): Run a function on all other CPUs.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func. In case of allocation
+ * failure, @wait will be implicitly turned on.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function(void (*func)(void *), void *info, int wait)
+{
+        int ret;
+        preempt_disable();
+        ret = smp_call_function_mask(cpu_online_map, func, info, wait);
+        preempt_enable();
+        return ret;
+}
+EXPORT_SYMBOL(smp_call_function);
+void ipi_call_lock(void)
+{
+        spin_lock(&call_function_lock);
+}
+void ipi_call_unlock(void)
+{
+        spin_unlock(&call_function_lock);
+}
+void ipi_call_lock_irq(void)
+{
+        spin_lock_irq(&call_function_lock);
+}
+void ipi_call_unlock_irq(void)
+{
+        spin_unlock_irq(&call_function_lock);
+}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 3e9e896fdc5b..81e2fe0f983a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -645,12 +645,12 @@ __init int spawn_ksoftirqd(void)
 /*
 * Call a function on all processors
 */
-int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
+int on_each_cpu(void (*func) (void *info), void *info, int wait)
 {
        int ret = 0;
        preempt_disable();
-        ret = smp_call_function(func, info, retry, wait);
+        ret = smp_call_function(func, info, wait);
        local_irq_disable();
        func(info);
        local_irq_enable();
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5b9b467de070..0fea0ee12da9 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -59,6 +59,7 @@ cond_syscall(sys_epoll_create);
 cond_syscall(sys_epoll_ctl);
 cond_syscall(sys_epoll_wait);
 cond_syscall(sys_epoll_pwait);
+cond_syscall(compat_sys_epoll_pwait);
 cond_syscall(sys_semget);
 cond_syscall(sys_semop);
 cond_syscall(sys_semtimedop);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ab59ac008caf..2a7b9d88706b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -83,6 +83,9 @@ extern int maps_protect;
 extern int sysctl_stat_interval;
 extern int latencytop_enabled;
 extern int sysctl_nr_open_min, sysctl_nr_open_max;
+#ifdef CONFIG_RCU_TORTURE_TEST
+extern int rcutorture_runnable;
+#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 /* Constants used for minimum and  maximum */
 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
@@ -108,7 +111,7 @@ static int min_percpu_pagelist_fract = 8;
 static int ngroups_max = NGROUPS_MAX;
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 extern char modprobe_path[];
 #endif
 #ifdef CONFIG_CHR_DEV_SG
@@ -473,7 +476,7 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = &ftrace_enable_sysctl,
        },
 #endif
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
        {
                .ctl_name       = KERN_MODPROBE,
                .procname       = "modprobe",
@@ -832,6 +835,16 @@ static struct ctl_table kern_table[] = {
                .child          = key_sysctls,
        },
 #endif
+#ifdef CONFIG_RCU_TORTURE_TEST
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "rcutorture_runnable",
+                .data           = &rcutorture_runnable,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec,
+        },
+#endif
 /*
 * NOTE: do not add new entries to this table unless you have read
 * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index dadde5361f32..b1c2da81b050 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -376,7 +376,8 @@ void clocksource_unregister(struct clocksource *cs)
 * Provides sysfs interface for listing current clocksource.
 */
 static ssize_t
-sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
+sysfs_show_current_clocksources(struct sys_device *dev,
+                                struct sysdev_attribute *attr, char *buf)
 {
        ssize_t count = 0;
@@ -397,6 +398,7 @@ sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
 * clocksource selction.
 */
 static ssize_t sysfs_override_clocksource(struct sys_device *dev,
+                                          struct sysdev_attribute *attr,
                                          const char *buf, size_t count)
 {
        struct clocksource *ovr = NULL;
@@ -449,7 +451,9 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 * Provides sysfs interface for listing registered clocksources
 */
 static ssize_t
-sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
+sysfs_show_available_clocksources(struct sys_device *dev,
+                                  struct sysdev_attribute *attr,
+                                  char *buf)
 {
        struct clocksource *src;
        ssize_t count = 0;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 67f80c261709..f48d0f09d32f 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -268,7 +268,7 @@ void tick_broadcast_on_off(unsigned long reason, int *oncpu)
                       "offline CPU #%d\n", *oncpu);
        else
                smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
-                                         &reason, 1, 1);
+                                         &reason, 1);
 }
 /*