23 files changed, 610 insertions, 503 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d863b3c057bb..9efe7108ccaf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7402,26 +7402,12 @@ static int __perf_cgroup_move(void *info)
        return 0;
 }
-static void perf_cgroup_move(struct task_struct *task)
+static void
+perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task)
 {
        task_function_call(task, __perf_cgroup_move, task);
 }
-static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                struct cgroup *old_cgrp, struct task_struct *task,
-                bool threadgroup)
-{
-        perf_cgroup_move(task);
-        if (threadgroup) {
-                struct task_struct *c;
-                rcu_read_lock();
-                list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
-                        perf_cgroup_move(c);
-                }
-                rcu_read_unlock();
-        }
-}
 static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
                struct cgroup *old_cgrp, struct task_struct *task)
 {
@@ -7433,7 +7419,7 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
        if (!(task->flags & PF_EXITING))
                return;
-        perf_cgroup_move(task);
+        perf_cgroup_attach_task(cgrp, task);
 }
 struct cgroup_subsys perf_subsys = {
@@ -7442,6 +7428,6 @@ struct cgroup_subsys perf_subsys = {
        .create         = perf_cgroup_create,
        .destroy        = perf_cgroup_destroy,
        .exit           = perf_cgroup_exit,
-        .attach         = perf_cgroup_attach,
+        .attach_task    = perf_cgroup_attach_task,
 };
 #endif /* CONFIG_CGROUP_PERF */
diff --git a/kernel/exit.c b/kernel/exit.c
index 20a406471525..f2b321bae440 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -561,29 +561,28 @@ void exit_files(struct task_struct *tsk)
 #ifdef CONFIG_MM_OWNER
 /*
- * Task p is exiting and it owned mm, lets find a new owner for it
+ * A task is exiting.   If it owned this mm, find a new owner for the mm.
 */
-static inline int
-mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
-{
-        /*
-         * If there are other users of the mm and the owner (us) is exiting
-         * we need to find a new owner to take on the responsibility.
-         */
-        if (atomic_read(&mm->mm_users) <= 1)
-                return 0;
-        if (mm->owner != p)
-                return 0;
-        return 1;
-}
 void mm_update_next_owner(struct mm_struct *mm)
 {
        struct task_struct *c, *g, *p = current;
 retry:
-        if (!mm_need_new_owner(mm, p))
+        /*
+         * If the exiting or execing task is not the owner, it's
+         * someone else's problem.
+         */
+        if (mm->owner != p)
                return;
+        /*
+         * The current owner is exiting/execing and there are no other
+         * candidates.  Do not leave the mm pointing to a possibly
+         * freed task structure.
+         */
+        if (atomic_read(&mm->mm_users) <= 1) {
+                mm->owner = NULL;
+                return;
+        }
        read_lock(&tasklist_lock);
        /*
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index b8cadf70b1fb..5bf924d80b5c 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -2,7 +2,8 @@ menu "GCOV-based kernel profiling"
 config GCOV_KERNEL
        bool "Enable gcov-based kernel profiling"
-        depends on DEBUG_FS && CONSTRUCTORS
+        depends on DEBUG_FS
+        select CONSTRUCTORS
        default n
        ---help---
        This option enables gcov-based code profiling (e.g. for code coverage
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 90cb55f6d7eb..470d08c82bbe 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -133,12 +133,6 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
                switch (res) {
                case IRQ_WAKE_THREAD:
                        /*
-                         * Set result to handled so the spurious check
-                         * does not trigger.
-                         */
-                        res = IRQ_HANDLED;
-                        /*
                         * Catch drivers which return WAKE_THREAD but
                         * did not set up a thread function
                         */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 886e80347b32..4c60a50e66b2 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -257,13 +257,11 @@ int __init early_irq_init(void)
        count = ARRAY_SIZE(irq_desc);
        for (i = 0; i < count; i++) {
-                desc[i].irq_data.irq = i;
-                desc[i].irq_data.chip = &no_irq_chip;
                desc[i].kstat_irqs = alloc_percpu(unsigned int);
-                irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
+                alloc_masks(&desc[i], GFP_KERNEL, node);
-                alloc_masks(desc + i, GFP_KERNEL, node);
+                raw_spin_lock_init(&desc[i].lock);
-                desc_smp_init(desc + i, node);
                lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
+                desc_set_defaults(i, &desc[i], node);
        }
        return arch_early_irq_init();
 }
@@ -346,6 +344,12 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
        if (!cnt)
                return -EINVAL;
+        if (irq >= 0) {
+                if (from > irq)
+                        return -EINVAL;
+                from = irq;
+        }
        mutex_lock(&sparse_irq_lock);
        start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index f7ce0021e1c4..0a7840aeb0fb 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -491,6 +491,9 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on)
        struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
        int ret = 0;
+        if (!desc)
+                return -EINVAL;
        /* wakeup-capable irqs can be shared between drivers that
         * don't need to have the same sleep mode behaviors.
         */
@@ -723,13 +726,16 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
 * context. So we need to disable bh here to avoid deadlocks and other
 * side effects.
 */
-static void
+static irqreturn_t
 irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
 {
+        irqreturn_t ret;
        local_bh_disable();
-        action->thread_fn(action->irq, action->dev_id);
+        ret = action->thread_fn(action->irq, action->dev_id);
        irq_finalize_oneshot(desc, action, false);
        local_bh_enable();
+        return ret;
 }
 /*
@@ -737,10 +743,14 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
 * preemtible - many of them need to sleep and wait for slow busses to
 * complete.
 */
-static void irq_thread_fn(struct irq_desc *desc, struct irqaction *action)
+static irqreturn_t irq_thread_fn(struct irq_desc *desc,
+                struct irqaction *action)
 {
-        action->thread_fn(action->irq, action->dev_id);
+        irqreturn_t ret;
+        ret = action->thread_fn(action->irq, action->dev_id);
        irq_finalize_oneshot(desc, action, false);
+        return ret;
 }
 /*
@@ -753,7 +763,8 @@ static int irq_thread(void *data)
        };
        struct irqaction *action = data;
        struct irq_desc *desc = irq_to_desc(action->irq);
-        void (*handler_fn)(struct irq_desc *desc, struct irqaction *action);
+        irqreturn_t (*handler_fn)(struct irq_desc *desc,
+                        struct irqaction *action);
        int wake;
        if (force_irqthreads & test_bit(IRQTF_FORCED_THREAD,
@@ -783,8 +794,12 @@ static int irq_thread(void *data)
                        desc->istate |= IRQS_PENDING;
                        raw_spin_unlock_irq(&desc->lock);
                } else {
+                        irqreturn_t action_ret;
                        raw_spin_unlock_irq(&desc->lock);
-                        handler_fn(desc, action);
+                        action_ret = handler_fn(desc, action);
+                        if (!noirqdebug)
+                                note_interrupt(action->irq, desc, action_ret);
                }
                wake = atomic_dec_and_test(&desc->threads_active);
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index dfbd550401b2..aa57d5da18c1 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -167,6 +167,13 @@ out:
                  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 }
+static inline int bad_action_ret(irqreturn_t action_ret)
+{
+        if (likely(action_ret <= (IRQ_HANDLED | IRQ_WAKE_THREAD)))
+                return 0;
+        return 1;
+}
 /*
 * If 99,900 of the previous 100,000 interrupts have not been handled
 * then assume that the IRQ is stuck in some manner. Drop a diagnostic
@@ -182,7 +189,7 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc,
        struct irqaction *action;
        unsigned long flags;
-        if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
+        if (bad_action_ret(action_ret)) {
                printk(KERN_ERR "irq event %d: bogus return value %x\n",
                                irq, action_ret);
        } else {
@@ -201,10 +208,11 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc,
        raw_spin_lock_irqsave(&desc->lock, flags);
        action = desc->action;
        while (action) {
-                printk(KERN_ERR "[<%p>]", action->handler);
+                printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler);
-                print_symbol(" (%s)",
+                if (action->thread_fn)
-                        (unsigned long)action->handler);
+                        printk(KERN_CONT " threaded [<%p>] %pf",
-                printk("\n");
+                                        action->thread_fn, action->thread_fn);
+                printk(KERN_CONT "\n");
                action = action->next;
        }
        raw_spin_unlock_irqrestore(&desc->lock, flags);
@@ -262,7 +270,16 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
        if (desc->istate & IRQS_POLL_INPROGRESS)
                return;
-        if (unlikely(action_ret != IRQ_HANDLED)) {
+        /* we get here again via the threaded handler */
+        if (action_ret == IRQ_WAKE_THREAD)
+                return;
+        if (bad_action_ret(action_ret)) {
+                report_bad_irq(irq, desc, action_ret);
+                return;
+        }
+        if (unlikely(action_ret == IRQ_NONE)) {
                /*
                 * If we are seeing only the odd spurious IRQ caused by
                 * bus asynchronicity then don't eventually trigger an error,
@@ -274,8 +291,6 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
                else
                        desc->irqs_unhandled++;
                desc->last_unhandled = jiffies;
-                if (unlikely(action_ret != IRQ_NONE))
-                        report_bad_irq(irq, desc, action_ret);
        }
        if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
diff --git a/kernel/kmod.c b/kernel/kmod.c
index ad6a81c58b44..47613dfb7b28 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -156,12 +156,6 @@ static int ____call_usermodehelper(void *data)
         */
        set_user_nice(current, 0);
-        if (sub_info->init) {
-                retval = sub_info->init(sub_info);
-                if (retval)
-                        goto fail;
-        }
        retval = -ENOMEM;
        new = prepare_kernel_cred(current);
        if (!new)
@@ -173,6 +167,14 @@ static int ____call_usermodehelper(void *data)
                                             new->cap_inheritable);
        spin_unlock(&umh_sysctl_lock);
+        if (sub_info->init) {
+                retval = sub_info->init(sub_info, new);
+                if (retval) {
+                        abort_creds(new);
+                        goto fail;
+                }
+        }
        commit_creds(new);
        retval = kernel_execve(sub_info->path,
@@ -388,7 +390,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
 * context in which call_usermodehelper_exec is called.
 */
 void call_usermodehelper_setfns(struct subprocess_info *info,
-                    int (*init)(struct subprocess_info *info),
+                    int (*init)(struct subprocess_info *info, struct cred *new),
                    void (*cleanup)(struct subprocess_info *info),
                    void *data)
 {
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 63437d065ac8..298c9276dfdb 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3426,7 +3426,7 @@ int lock_is_held(struct lockdep_map *lock)
        int ret = 0;
        if (unlikely(current->lockdep_recursion))
-                return ret;
+                return 1; /* avoid false negative lockdep_assert_held() */
        raw_local_irq_save(flags);
        check_flags(flags);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 89419ff92e99..7e59ffb3d0ba 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -87,6 +87,8 @@ static struct rcu_state *rcu_state;
 int rcu_scheduler_active __read_mostly;
 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+#ifdef CONFIG_RCU_BOOST
 /*
 * Control variables for per-CPU and per-rcu_node kthreads.  These
 * handle all flavors of RCU.
@@ -98,8 +100,11 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
 DEFINE_PER_CPU(char, rcu_cpu_has_work);
 static char rcu_kthreads_spawnable;
+#endif /* #ifdef CONFIG_RCU_BOOST */
 static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
-static void invoke_rcu_cpu_kthread(void);
+static void invoke_rcu_core(void);
+static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
 #define RCU_KTHREAD_PRIO 1      /* RT priority for per-CPU kthreads. */
@@ -1088,14 +1093,8 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
        int need_report = 0;
        struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
        struct rcu_node *rnp;
-        struct task_struct *t;
-        /* Stop the CPU's kthread. */
+        rcu_stop_cpu_kthread(cpu);
-        t = per_cpu(rcu_cpu_kthread_task, cpu);
-        if (t != NULL) {
-                per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
-                kthread_stop(t);
-        }
        /* Exclude any attempts to start a new grace period. */
        raw_spin_lock_irqsave(&rsp->onofflock, flags);
@@ -1231,7 +1230,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
        /* Re-raise the RCU softirq if there are callbacks remaining. */
        if (cpu_has_callbacks_ready_to_invoke(rdp))
-                invoke_rcu_cpu_kthread();
+                invoke_rcu_core();
 }
 /*
@@ -1277,7 +1276,7 @@ void rcu_check_callbacks(int cpu, int user)
        }
        rcu_preempt_check_callbacks(cpu);
        if (rcu_pending(cpu))
-                invoke_rcu_cpu_kthread();
+                invoke_rcu_core();
 }
 #ifdef CONFIG_SMP
@@ -1442,13 +1441,14 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
        }
        /* If there are callbacks ready, invoke them. */
-        rcu_do_batch(rsp, rdp);
+        if (cpu_has_callbacks_ready_to_invoke(rdp))
+                invoke_rcu_callbacks(rsp, rdp);
 }
 /*
 * Do softirq processing for the current CPU.
 */
-static void rcu_process_callbacks(void)
+static void rcu_process_callbacks(struct softirq_action *unused)
 {
        __rcu_process_callbacks(&rcu_sched_state,
                                &__get_cpu_var(rcu_sched_data));
@@ -1465,342 +1465,20 @@ static void rcu_process_callbacks(void)
 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
 * cannot disappear out from under us.
 */
-static void invoke_rcu_cpu_kthread(void)
+static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
-{
-        unsigned long flags;
-        local_irq_save(flags);
-        __this_cpu_write(rcu_cpu_has_work, 1);
-        if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) {
-                local_irq_restore(flags);
-                return;
-        }
-        wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
-        local_irq_restore(flags);
-}
-/*
- * Wake up the specified per-rcu_node-structure kthread.
- * Because the per-rcu_node kthreads are immortal, we don't need
- * to do anything to keep them alive.
- */
-static void invoke_rcu_node_kthread(struct rcu_node *rnp)
-{
-        struct task_struct *t;
-        t = rnp->node_kthread_task;
-        if (t != NULL)
-                wake_up_process(t);
-}
-/*
- * Set the specified CPU's kthread to run RT or not, as specified by
- * the to_rt argument.  The CPU-hotplug locks are held, so the task
- * is not going away.
- */
-static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
-{
-        int policy;
-        struct sched_param sp;
-        struct task_struct *t;
-        t = per_cpu(rcu_cpu_kthread_task, cpu);
-        if (t == NULL)
-                return;
-        if (to_rt) {
-                policy = SCHED_FIFO;
-                sp.sched_priority = RCU_KTHREAD_PRIO;
-        } else {
-                policy = SCHED_NORMAL;
-                sp.sched_priority = 0;
-        }
-        sched_setscheduler_nocheck(t, policy, &sp);
-}
-/*
- * Timer handler to initiate the waking up of per-CPU kthreads that
- * have yielded the CPU due to excess numbers of RCU callbacks.
- * We wake up the per-rcu_node kthread, which in turn will wake up
- * the booster kthread.
- */
-static void rcu_cpu_kthread_timer(unsigned long arg)
-{
-        struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
-        struct rcu_node *rnp = rdp->mynode;
-        atomic_or(rdp->grpmask, &rnp->wakemask);
-        invoke_rcu_node_kthread(rnp);
-}
-/*
- * Drop to non-real-time priority and yield, but only after posting a
- * timer that will cause us to regain our real-time priority if we
- * remain preempted.  Either way, we restore our real-time priority
- * before returning.
- */
-static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
-{
-        struct sched_param sp;
-        struct timer_list yield_timer;
-        setup_timer_on_stack(&yield_timer, f, arg);
-        mod_timer(&yield_timer, jiffies + 2);
-        sp.sched_priority = 0;
-        sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
-        set_user_nice(current, 19);
-        schedule();
-        sp.sched_priority = RCU_KTHREAD_PRIO;
-        sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
-        del_timer(&yield_timer);
-}
-/*
- * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
- * This can happen while the corresponding CPU is either coming online
- * or going offline.  We cannot wait until the CPU is fully online
- * before starting the kthread, because the various notifier functions
- * can wait for RCU grace periods.  So we park rcu_cpu_kthread() until
- * the corresponding CPU is online.
- *
- * Return 1 if the kthread needs to stop, 0 otherwise.
- *
- * Caller must disable bh.  This function can momentarily enable it.
- */
-static int rcu_cpu_kthread_should_stop(int cpu)
-{
-        while (cpu_is_offline(cpu) ||
-               !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) ||
-               smp_processor_id() != cpu) {
-                if (kthread_should_stop())
-                        return 1;
-                per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
-                per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id();
-                local_bh_enable();
-                schedule_timeout_uninterruptible(1);
-                if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)))
-                        set_cpus_allowed_ptr(current, cpumask_of(cpu));
-                local_bh_disable();
-        }
-        per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
-        return 0;
-}
-/*
- * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the
- * earlier RCU softirq.
- */
-static int rcu_cpu_kthread(void *arg)
-{
-        int cpu = (int)(long)arg;
-        unsigned long flags;
-        int spincnt = 0;
-        unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
-        char work;
-        char *workp = &per_cpu(rcu_cpu_has_work, cpu);
-        for (;;) {
-                *statusp = RCU_KTHREAD_WAITING;
-                rcu_wait(*workp != 0 || kthread_should_stop());
-                local_bh_disable();
-                if (rcu_cpu_kthread_should_stop(cpu)) {
-                        local_bh_enable();
-                        break;
-                }
-                *statusp = RCU_KTHREAD_RUNNING;
-                per_cpu(rcu_cpu_kthread_loops, cpu)++;
-                local_irq_save(flags);
-                work = *workp;
-                *workp = 0;
-                local_irq_restore(flags);
-                if (work)
-                        rcu_process_callbacks();
-                local_bh_enable();
-                if (*workp != 0)
-                        spincnt++;
-                else
-                        spincnt = 0;
-                if (spincnt > 10) {
-                        *statusp = RCU_KTHREAD_YIELDING;
-                        rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
-                        spincnt = 0;
-                }
-        }
-        *statusp = RCU_KTHREAD_STOPPED;
-        return 0;
-}
-/*
- * Spawn a per-CPU kthread, setting up affinity and priority.
- * Because the CPU hotplug lock is held, no other CPU will be attempting
- * to manipulate rcu_cpu_kthread_task.  There might be another CPU
- * attempting to access it during boot, but the locking in kthread_bind()
- * will enforce sufficient ordering.
- */
-static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
 {
-        struct sched_param sp;
+        if (likely(!rsp->boost)) {
-        struct task_struct *t;
+                rcu_do_batch(rsp, rdp);
-        if (!rcu_kthreads_spawnable ||
-            per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
-                return 0;
-        t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
-        if (IS_ERR(t))
-                return PTR_ERR(t);
-        kthread_bind(t, cpu);
-        per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
-        WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
-        per_cpu(rcu_cpu_kthread_task, cpu) = t;
-        sp.sched_priority = RCU_KTHREAD_PRIO;
-        sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
-        return 0;
-}
-/*
- * Per-rcu_node kthread, which is in charge of waking up the per-CPU
- * kthreads when needed.  We ignore requests to wake up kthreads
- * for offline CPUs, which is OK because force_quiescent_state()
- * takes care of this case.
- */
-static int rcu_node_kthread(void *arg)
-{
-        int cpu;
-        unsigned long flags;
-        unsigned long mask;
-        struct rcu_node *rnp = (struct rcu_node *)arg;
-        struct sched_param sp;
-        struct task_struct *t;
-        for (;;) {
-                rnp->node_kthread_status = RCU_KTHREAD_WAITING;
-                rcu_wait(atomic_read(&rnp->wakemask) != 0);
-                rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
-                raw_spin_lock_irqsave(&rnp->lock, flags);
-                mask = atomic_xchg(&rnp->wakemask, 0);
-                rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
-                for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
-                        if ((mask & 0x1) == 0)
-                                continue;
-                        preempt_disable();
-                        t = per_cpu(rcu_cpu_kthread_task, cpu);
-                        if (!cpu_online(cpu) || t == NULL) {
-                                preempt_enable();
-                                continue;
-                        }
-                        per_cpu(rcu_cpu_has_work, cpu) = 1;
-                        sp.sched_priority = RCU_KTHREAD_PRIO;
-                        sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
-                        preempt_enable();
-                }
-        }
-        /* NOTREACHED */
-        rnp->node_kthread_status = RCU_KTHREAD_STOPPED;
-        return 0;
-}
-/*
- * Set the per-rcu_node kthread's affinity to cover all CPUs that are
- * served by the rcu_node in question.  The CPU hotplug lock is still
- * held, so the value of rnp->qsmaskinit will be stable.
- *
- * We don't include outgoingcpu in the affinity set, use -1 if there is
- * no outgoing CPU.  If there are no CPUs left in the affinity set,
- * this function allows the kthread to execute on any CPU.
- */
-static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
-{
-        cpumask_var_t cm;
-        int cpu;
-        unsigned long mask = rnp->qsmaskinit;
-        if (rnp->node_kthread_task == NULL)
-                return;
-        if (!alloc_cpumask_var(&cm, GFP_KERNEL))
                return;
-        cpumask_clear(cm);
-        for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
-                if ((mask & 0x1) && cpu != outgoingcpu)
-                        cpumask_set_cpu(cpu, cm);
-        if (cpumask_weight(cm) == 0) {
-                cpumask_setall(cm);
-                for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
-                        cpumask_clear_cpu(cpu, cm);
-                WARN_ON_ONCE(cpumask_weight(cm) == 0);
        }
-        set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
+        invoke_rcu_callbacks_kthread();
-        rcu_boost_kthread_setaffinity(rnp, cm);
-        free_cpumask_var(cm);
 }
-/*
+static void invoke_rcu_core(void)
- * Spawn a per-rcu_node kthread, setting priority and affinity.
- * Called during boot before online/offline can happen, or, if
- * during runtime, with the main CPU-hotplug locks held.  So only
- * one of these can be executing at a time.
- */
-static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
-                                                struct rcu_node *rnp)
 {
-        unsigned long flags;
+        raise_softirq(RCU_SOFTIRQ);
-        int rnp_index = rnp - &rsp->node[0];
-        struct sched_param sp;
-        struct task_struct *t;
-        if (!rcu_kthreads_spawnable ||
-            rnp->qsmaskinit == 0)
-                return 0;
-        if (rnp->node_kthread_task == NULL) {
-                t = kthread_create(rcu_node_kthread, (void *)rnp,
-                                   "rcun%d", rnp_index);
-                if (IS_ERR(t))
-                        return PTR_ERR(t);
-                raw_spin_lock_irqsave(&rnp->lock, flags);
-                rnp->node_kthread_task = t;
-                raw_spin_unlock_irqrestore(&rnp->lock, flags);
-                sp.sched_priority = 99;
-                sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
-        }
-        return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
 }
-static void rcu_wake_one_boost_kthread(struct rcu_node *rnp);
-/*
- * Spawn all kthreads -- called as soon as the scheduler is running.
- */
-static int __init rcu_spawn_kthreads(void)
-{
-        int cpu;
-        struct rcu_node *rnp;
-        struct task_struct *t;
-        rcu_kthreads_spawnable = 1;
-        for_each_possible_cpu(cpu) {
-                per_cpu(rcu_cpu_has_work, cpu) = 0;
-                if (cpu_online(cpu)) {
-                        (void)rcu_spawn_one_cpu_kthread(cpu);
-                        t = per_cpu(rcu_cpu_kthread_task, cpu);
-                        if (t)
-                                wake_up_process(t);
-                }
-        }
-        rnp = rcu_get_root(rcu_state);
-        (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
-        if (rnp->node_kthread_task)
-                wake_up_process(rnp->node_kthread_task);
-        if (NUM_RCU_NODES > 1) {
-                rcu_for_each_leaf_node(rcu_state, rnp) {
-                        (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
-                        t = rnp->node_kthread_task;
-                        if (t)
-                                wake_up_process(t);
-                        rcu_wake_one_boost_kthread(rnp);
-                }
-        }
-        return 0;
-}
-early_initcall(rcu_spawn_kthreads);
 static void
 __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
           struct rcu_state *rsp)
@@ -2207,44 +1885,6 @@ static void __cpuinit rcu_prepare_cpu(int cpu)
        rcu_preempt_init_percpu_data(cpu);
 }
-static void __cpuinit rcu_prepare_kthreads(int cpu)
-{
-        struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
-        struct rcu_node *rnp = rdp->mynode;
-        /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
-        if (rcu_kthreads_spawnable) {
-                (void)rcu_spawn_one_cpu_kthread(cpu);
-                if (rnp->node_kthread_task == NULL)
-                        (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
-        }
-}
-/*
- * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state,
- * but the RCU threads are woken on demand, and if demand is low this
- * could be a while triggering the hung task watchdog.
- *
- * In order to avoid this, poke all tasks once the CPU is fully
- * up and running.
- */
-static void __cpuinit rcu_online_kthreads(int cpu)
-{
-        struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
-        struct rcu_node *rnp = rdp->mynode;
-        struct task_struct *t;
-        t = per_cpu(rcu_cpu_kthread_task, cpu);
-        if (t)
-                wake_up_process(t);
-        t = rnp->node_kthread_task;
-        if (t)
-                wake_up_process(t);
-        rcu_wake_one_boost_kthread(rnp);
-}
 /*
 * Handle CPU online/offline notification events.
 */
@@ -2262,7 +1902,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
                rcu_prepare_kthreads(cpu);
                break;
        case CPU_ONLINE:
-                rcu_online_kthreads(cpu);
        case CPU_DOWN_FAILED:
                rcu_node_kthread_setaffinity(rnp, -1);
                rcu_cpu_kthread_setrt(cpu, 1);
@@ -2410,6 +2049,7 @@ void __init rcu_init(void)
        rcu_init_one(&rcu_sched_state, &rcu_sched_data);
        rcu_init_one(&rcu_bh_state, &rcu_bh_data);
        __rcu_init_preempt();
+         open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
        /*
         * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 7b9a08b4aaea..01b2ccda26fb 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -369,6 +369,7 @@ struct rcu_state {
                                                /*  period because */
                                                /*  force_quiescent_state() */
                                                /*  was running. */
+        u8      boost;                          /* Subject to priority boost. */
        unsigned long gpnum;                    /* Current gp number. */
        unsigned long completed;                /* # of last completed gp. */
@@ -426,6 +427,7 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
                                      unsigned long flags);
+static void rcu_stop_cpu_kthread(int cpu);
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 static void rcu_print_detail_task_stall(struct rcu_state *rsp);
 static void rcu_print_task_stall(struct rcu_node *rnp);
@@ -450,11 +452,19 @@ static void rcu_preempt_send_cbs_to_online(void);
 static void __init __rcu_init_preempt(void);
 static void rcu_needs_cpu_flush(void);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
+static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
+static void invoke_rcu_callbacks_kthread(void);
+#ifdef CONFIG_RCU_BOOST
+static void rcu_preempt_do_callbacks(void);
 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
                                          cpumask_var_t cm);
-static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
                                                 struct rcu_node *rnp,
                                                 int rnp_index);
+static void invoke_rcu_node_kthread(struct rcu_node *rnp);
+static void rcu_yield(void (*f)(unsigned long), unsigned long arg);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+static void rcu_cpu_kthread_setrt(int cpu, int to_rt);
+static void __cpuinit rcu_prepare_kthreads(int cpu);
 #endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index c8bff3099a89..14dc7dd00902 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -602,6 +602,15 @@ static void rcu_preempt_process_callbacks(void)
                                &__get_cpu_var(rcu_preempt_data));
 }
+#ifdef CONFIG_RCU_BOOST
+static void rcu_preempt_do_callbacks(void)
+{
+        rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
+}
+#endif /* #ifdef CONFIG_RCU_BOOST */
 /*
 * Queue a preemptible-RCU callback for invocation after a grace period.
 */
@@ -1249,6 +1258,23 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 }
 /*
+ * Wake up the per-CPU kthread to invoke RCU callbacks.
+ */
+static void invoke_rcu_callbacks_kthread(void)
+{
+        unsigned long flags;
+        local_irq_save(flags);
+        __this_cpu_write(rcu_cpu_has_work, 1);
+        if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) {
+                local_irq_restore(flags);
+                return;
+        }
+        wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
+        local_irq_restore(flags);
+}
+/*
 * Set the affinity of the boost kthread.  The CPU-hotplug locks are
 * held, so no one should be messing with the existence of the boost
 * kthread.
@@ -1288,6 +1314,7 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
        if (&rcu_preempt_state != rsp)
                return 0;
+        rsp->boost = 1;
        if (rnp->boost_kthread_task != NULL)
                return 0;
        t = kthread_create(rcu_boost_kthread, (void *)rnp,
@@ -1299,13 +1326,372 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
        sp.sched_priority = RCU_KTHREAD_PRIO;
        sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+        wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
        return 0;
 }
-static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Stop the RCU's per-CPU kthread when its CPU goes offline,.
+ */
+static void rcu_stop_cpu_kthread(int cpu)
 {
-        if (rnp->boost_kthread_task)
+        struct task_struct *t;
-                wake_up_process(rnp->boost_kthread_task);
+        /* Stop the CPU's kthread. */
+        t = per_cpu(rcu_cpu_kthread_task, cpu);
+        if (t != NULL) {
+                per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
+                kthread_stop(t);
+        }
+}
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+static void rcu_kthread_do_work(void)
+{
+        rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
+        rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
+        rcu_preempt_do_callbacks();
+}
+/*
+ * Wake up the specified per-rcu_node-structure kthread.
+ * Because the per-rcu_node kthreads are immortal, we don't need
+ * to do anything to keep them alive.
+ */
+static void invoke_rcu_node_kthread(struct rcu_node *rnp)
+{
+        struct task_struct *t;
+        t = rnp->node_kthread_task;
+        if (t != NULL)
+                wake_up_process(t);
+}
+/*
+ * Set the specified CPU's kthread to run RT or not, as specified by
+ * the to_rt argument.  The CPU-hotplug locks are held, so the task
+ * is not going away.
+ */
+static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
+{
+        int policy;
+        struct sched_param sp;
+        struct task_struct *t;
+        t = per_cpu(rcu_cpu_kthread_task, cpu);
+        if (t == NULL)
+                return;
+        if (to_rt) {
+                policy = SCHED_FIFO;
+                sp.sched_priority = RCU_KTHREAD_PRIO;
+        } else {
+                policy = SCHED_NORMAL;
+                sp.sched_priority = 0;
+        }
+        sched_setscheduler_nocheck(t, policy, &sp);
+}
+/*
+ * Timer handler to initiate the waking up of per-CPU kthreads that
+ * have yielded the CPU due to excess numbers of RCU callbacks.
+ * We wake up the per-rcu_node kthread, which in turn will wake up
+ * the booster kthread.
+ */
+static void rcu_cpu_kthread_timer(unsigned long arg)
+{
+        struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
+        struct rcu_node *rnp = rdp->mynode;
+        atomic_or(rdp->grpmask, &rnp->wakemask);
+        invoke_rcu_node_kthread(rnp);
+}
+/*
+ * Drop to non-real-time priority and yield, but only after posting a
+ * timer that will cause us to regain our real-time priority if we
+ * remain preempted.  Either way, we restore our real-time priority
+ * before returning.
+ */
+static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
+{
+        struct sched_param sp;
+        struct timer_list yield_timer;
+        setup_timer_on_stack(&yield_timer, f, arg);
+        mod_timer(&yield_timer, jiffies + 2);
+        sp.sched_priority = 0;
+        sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
+        set_user_nice(current, 19);
+        schedule();
+        sp.sched_priority = RCU_KTHREAD_PRIO;
+        sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+        del_timer(&yield_timer);
+}
+/*
+ * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
+ * This can happen while the corresponding CPU is either coming online
+ * or going offline.  We cannot wait until the CPU is fully online
+ * before starting the kthread, because the various notifier functions
+ * can wait for RCU grace periods.  So we park rcu_cpu_kthread() until
+ * the corresponding CPU is online.
+ *
+ * Return 1 if the kthread needs to stop, 0 otherwise.
+ *
+ * Caller must disable bh.  This function can momentarily enable it.
+ */
+static int rcu_cpu_kthread_should_stop(int cpu)
+{
+        while (cpu_is_offline(cpu) ||
+               !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) ||
+               smp_processor_id() != cpu) {
+                if (kthread_should_stop())
+                        return 1;
+                per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
+                per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id();
+                local_bh_enable();
+                schedule_timeout_uninterruptible(1);
+                if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)))
+                        set_cpus_allowed_ptr(current, cpumask_of(cpu));
+                local_bh_disable();
+        }
+        per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
+        return 0;
+}
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the
+ * earlier RCU softirq.
+ */
+static int rcu_cpu_kthread(void *arg)
+{
+        int cpu = (int)(long)arg;
+        unsigned long flags;
+        int spincnt = 0;
+        unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
+        char work;
+        char *workp = &per_cpu(rcu_cpu_has_work, cpu);
+        for (;;) {
+                *statusp = RCU_KTHREAD_WAITING;
+                rcu_wait(*workp != 0 || kthread_should_stop());
+                local_bh_disable();
+                if (rcu_cpu_kthread_should_stop(cpu)) {
+                        local_bh_enable();
+                        break;
+                }
+                *statusp = RCU_KTHREAD_RUNNING;
+                per_cpu(rcu_cpu_kthread_loops, cpu)++;
+                local_irq_save(flags);
+                work = *workp;
+                *workp = 0;
+                local_irq_restore(flags);
+                if (work)
+                        rcu_kthread_do_work();
+                local_bh_enable();
+                if (*workp != 0)
+                        spincnt++;
+                else
+                        spincnt = 0;
+                if (spincnt > 10) {
+                        *statusp = RCU_KTHREAD_YIELDING;
+                        rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
+                        spincnt = 0;
+                }
+        }
+        *statusp = RCU_KTHREAD_STOPPED;
+        return 0;
+}
+/*
+ * Spawn a per-CPU kthread, setting up affinity and priority.
+ * Because the CPU hotplug lock is held, no other CPU will be attempting
+ * to manipulate rcu_cpu_kthread_task.  There might be another CPU
+ * attempting to access it during boot, but the locking in kthread_bind()
+ * will enforce sufficient ordering.
+ *
+ * Please note that we cannot simply refuse to wake up the per-CPU
+ * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state,
+ * which can result in softlockup complaints if the task ends up being
+ * idle for more than a couple of minutes.
+ *
+ * However, please note also that we cannot bind the per-CPU kthread to its
+ * CPU until that CPU is fully online.  We also cannot wait until the
+ * CPU is fully online before we create its per-CPU kthread, as this would
+ * deadlock the system when CPU notifiers tried waiting for grace
+ * periods.  So we bind the per-CPU kthread to its CPU only if the CPU
+ * is online.  If its CPU is not yet fully online, then the code in
+ * rcu_cpu_kthread() will wait until it is fully online, and then do
+ * the binding.
+ */
+static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
+{
+        struct sched_param sp;
+        struct task_struct *t;
+        if (!rcu_kthreads_spawnable ||
+            per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
+                return 0;
+        t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
+        if (IS_ERR(t))
+                return PTR_ERR(t);
+        if (cpu_online(cpu))
+                kthread_bind(t, cpu);
+        per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
+        WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
+        sp.sched_priority = RCU_KTHREAD_PRIO;
+        sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+        per_cpu(rcu_cpu_kthread_task, cpu) = t;
+        wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */
+        return 0;
+}
+/*
+ * Per-rcu_node kthread, which is in charge of waking up the per-CPU
+ * kthreads when needed.  We ignore requests to wake up kthreads
+ * for offline CPUs, which is OK because force_quiescent_state()
+ * takes care of this case.
+ */
+static int rcu_node_kthread(void *arg)
+{
+        int cpu;
+        unsigned long flags;
+        unsigned long mask;
+        struct rcu_node *rnp = (struct rcu_node *)arg;
+        struct sched_param sp;
+        struct task_struct *t;
+        for (;;) {
+                rnp->node_kthread_status = RCU_KTHREAD_WAITING;
+                rcu_wait(atomic_read(&rnp->wakemask) != 0);
+                rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
+                raw_spin_lock_irqsave(&rnp->lock, flags);
+                mask = atomic_xchg(&rnp->wakemask, 0);
+                rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
+                for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
+                        if ((mask & 0x1) == 0)
+                                continue;
+                        preempt_disable();
+                        t = per_cpu(rcu_cpu_kthread_task, cpu);
+                        if (!cpu_online(cpu) || t == NULL) {
+                                preempt_enable();
+                                continue;
+                        }
+                        per_cpu(rcu_cpu_has_work, cpu) = 1;
+                        sp.sched_priority = RCU_KTHREAD_PRIO;
+                        sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+                        preempt_enable();
+                }
+        }
+        /* NOTREACHED */
+        rnp->node_kthread_status = RCU_KTHREAD_STOPPED;
+        return 0;
+}
+/*
+ * Set the per-rcu_node kthread's affinity to cover all CPUs that are
+ * served by the rcu_node in question.  The CPU hotplug lock is still
+ * held, so the value of rnp->qsmaskinit will be stable.
+ *
+ * We don't include outgoingcpu in the affinity set, use -1 if there is
+ * no outgoing CPU.  If there are no CPUs left in the affinity set,
+ * this function allows the kthread to execute on any CPU.
+ */
+static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
+{
+        cpumask_var_t cm;
+        int cpu;
+        unsigned long mask = rnp->qsmaskinit;
+        if (rnp->node_kthread_task == NULL)
+                return;
+        if (!alloc_cpumask_var(&cm, GFP_KERNEL))
+                return;
+        cpumask_clear(cm);
+        for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
+                if ((mask & 0x1) && cpu != outgoingcpu)
+                        cpumask_set_cpu(cpu, cm);
+        if (cpumask_weight(cm) == 0) {
+                cpumask_setall(cm);
+                for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
+                        cpumask_clear_cpu(cpu, cm);
+                WARN_ON_ONCE(cpumask_weight(cm) == 0);
+        }
+        set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
+        rcu_boost_kthread_setaffinity(rnp, cm);
+        free_cpumask_var(cm);
+}
+/*
+ * Spawn a per-rcu_node kthread, setting priority and affinity.
+ * Called during boot before online/offline can happen, or, if
+ * during runtime, with the main CPU-hotplug locks held.  So only
+ * one of these can be executing at a time.
+ */
+static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
+                                                struct rcu_node *rnp)
+{
+        unsigned long flags;
+        int rnp_index = rnp - &rsp->node[0];
+        struct sched_param sp;
+        struct task_struct *t;
+        if (!rcu_kthreads_spawnable ||
+            rnp->qsmaskinit == 0)
+                return 0;
+        if (rnp->node_kthread_task == NULL) {
+                t = kthread_create(rcu_node_kthread, (void *)rnp,
+                                   "rcun%d", rnp_index);
+                if (IS_ERR(t))
+                        return PTR_ERR(t);
+                raw_spin_lock_irqsave(&rnp->lock, flags);
+                rnp->node_kthread_task = t;
+                raw_spin_unlock_irqrestore(&rnp->lock, flags);
+                sp.sched_priority = 99;
+                sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+                wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
+        }
+        return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
+}
+/*
+ * Spawn all kthreads -- called as soon as the scheduler is running.
+ */
+static int __init rcu_spawn_kthreads(void)
+{
+        int cpu;
+        struct rcu_node *rnp;
+        rcu_kthreads_spawnable = 1;
+        for_each_possible_cpu(cpu) {
+                per_cpu(rcu_cpu_has_work, cpu) = 0;
+                if (cpu_online(cpu))
+                        (void)rcu_spawn_one_cpu_kthread(cpu);
+        }
+        rnp = rcu_get_root(rcu_state);
+        (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
+        if (NUM_RCU_NODES > 1) {
+                rcu_for_each_leaf_node(rcu_state, rnp)
+                        (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
+        }
+        return 0;
+}
+early_initcall(rcu_spawn_kthreads);
+static void __cpuinit rcu_prepare_kthreads(int cpu)
+{
+        struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
+        struct rcu_node *rnp = rdp->mynode;
+        /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
+        if (rcu_kthreads_spawnable) {
+                (void)rcu_spawn_one_cpu_kthread(cpu);
+                if (rnp->node_kthread_task == NULL)
+                        (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
+        }
 }
 #else /* #ifdef CONFIG_RCU_BOOST */
@@ -1315,23 +1701,32 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
 }
-static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
+static void invoke_rcu_callbacks_kthread(void)
-                                          cpumask_var_t cm)
 {
+        WARN_ON_ONCE(1);
 }
 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
 {
 }
-static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
+#ifdef CONFIG_HOTPLUG_CPU
-                                                 struct rcu_node *rnp,
-                                                 int rnp_index)
+static void rcu_stop_cpu_kthread(int cpu)
+{
+}
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
+{
+}
+static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
 {
-        return 0;
 }
-static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
+static void __cpuinit rcu_prepare_kthreads(int cpu)
 {
 }
@@ -1509,7 +1904,7 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
 *
 * Because it is not legal to invoke rcu_process_callbacks() with irqs
 * disabled, we do one pass of force_quiescent_state(), then do a
- * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked
+ * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
 * later.  The per-cpu rcu_dyntick_drain variable controls the sequencing.
 */
 int rcu_needs_cpu(int cpu)
@@ -1560,7 +1955,7 @@ int rcu_needs_cpu(int cpu)
        /* If RCU callbacks are still pending, RCU still needs this CPU. */
        if (c)
-                invoke_rcu_cpu_kthread();
+                invoke_rcu_core();
        return c;
 }
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 9678cc3650f5..4e144876dc68 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -46,6 +46,8 @@
 #define RCU_TREE_NONCORE
 #include "rcutree.h"
+#ifdef CONFIG_RCU_BOOST
 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu);
 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
@@ -58,6 +60,8 @@ static char convert_kthread_status(unsigned int kthread_status)
        return "SRWOY"[kthread_status];
 }
+#endif /* #ifdef CONFIG_RCU_BOOST */
 static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
 {
        if (!rdp->beenonline)
@@ -76,7 +80,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
                   rdp->dynticks_fqs);
 #endif /* #ifdef CONFIG_NO_HZ */
        seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
-        seq_printf(m, " ql=%ld qs=%c%c%c%c kt=%d/%c/%d ktl=%x b=%ld",
+        seq_printf(m, " ql=%ld qs=%c%c%c%c",
                   rdp->qlen,
                   ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
                        rdp->nxttail[RCU_NEXT_TAIL]],
@@ -84,13 +88,16 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
                        rdp->nxttail[RCU_NEXT_READY_TAIL]],
                   ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
                        rdp->nxttail[RCU_WAIT_TAIL]],
-                   ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]],
+                   ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]);
+#ifdef CONFIG_RCU_BOOST
+        seq_printf(m, " kt=%d/%c/%d ktl=%x",
                   per_cpu(rcu_cpu_has_work, rdp->cpu),
                   convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
                                          rdp->cpu)),
                   per_cpu(rcu_cpu_kthread_cpu, rdp->cpu),
-                   per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff,
+                   per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff);
-                   rdp->blimit);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+        seq_printf(m, " b=%ld", rdp->blimit);
        seq_printf(m, " ci=%lu co=%lu ca=%lu\n",
                   rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
 }
@@ -147,18 +154,21 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
                   rdp->dynticks_fqs);
 #endif /* #ifdef CONFIG_NO_HZ */
        seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
-        seq_printf(m, ",%ld,\"%c%c%c%c\",%d,\"%c\",%ld", rdp->qlen,
+        seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen,
                   ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
                        rdp->nxttail[RCU_NEXT_TAIL]],
                   ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
                        rdp->nxttail[RCU_NEXT_READY_TAIL]],
                   ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
                        rdp->nxttail[RCU_WAIT_TAIL]],
-                   ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]],
+                   ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]);
+#ifdef CONFIG_RCU_BOOST
+        seq_printf(m, ",%d,\"%c\"",
                   per_cpu(rcu_cpu_has_work, rdp->cpu),
                   convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
-                                          rdp->cpu)),
+                                          rdp->cpu)));
-                   rdp->blimit);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+        seq_printf(m, ",%ld", rdp->blimit);
        seq_printf(m, ",%lu,%lu,%lu\n",
                   rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
 }
@@ -169,7 +179,11 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
 #ifdef CONFIG_NO_HZ
        seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
 #endif /* #ifdef CONFIG_NO_HZ */
-        seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n");
+        seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\"");
+#ifdef CONFIG_RCU_BOOST
+        seq_puts(m, "\"kt\",\"ktl\"");
+#endif /* #ifdef CONFIG_RCU_BOOST */
+        seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n");
 #ifdef CONFIG_TREE_PREEMPT_RCU
        seq_puts(m, "\"rcu_preempt:\"\n");
        PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m);
diff --git a/kernel/sched.c b/kernel/sched.c
index cbb3a0eee58e..3f2e502d609b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -605,10 +605,10 @@ static inline int cpu_of(struct rq *rq)
 /*
 * Return the group to which this tasks belongs.
 *
- * We use task_subsys_state_check() and extend the RCU verification
+ * We use task_subsys_state_check() and extend the RCU verification with
- * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach()
+ * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each
- * holds that lock for each task it moves into the cgroup. Therefore
+ * task it moves into the cgroup. Therefore by holding either of those locks,
- * by holding that lock, we pin the task to the current cgroup.
+ * we pin the task to the current cgroup.
 */
 static inline struct task_group *task_group(struct task_struct *p)
 {
@@ -616,7 +616,8 @@ static inline struct task_group *task_group(struct task_struct *p)
        struct cgroup_subsys_state *css;
        css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
-                        lockdep_is_held(&p->pi_lock));
+                        lockdep_is_held(&p->pi_lock) ||
+                        lockdep_is_held(&task_rq(p)->lock));
        tg = container_of(css, struct task_group, css);
        return autogroup_task_group(p, tg);
@@ -2200,6 +2201,16 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
                        !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
 #ifdef CONFIG_LOCKDEP
+        /*
+         * The caller should hold either p->pi_lock or rq->lock, when changing
+         * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
+         *
+         * sched_move_task() holds both and thus holding either pins the cgroup,
+         * see set_task_rq().
+         *
+         * Furthermore, all task_rq users should acquire both locks, see
+         * task_rq_lock().
+         */
        WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
                                      lockdep_is_held(&task_rq(p)->lock)));
 #endif
@@ -2447,6 +2458,10 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
                }
                rcu_read_unlock();
        }
+        if (wake_flags & WF_MIGRATED)
+                schedstat_inc(p, se.statistics.nr_wakeups_migrate);
 #endif /* CONFIG_SMP */
        schedstat_inc(rq, ttwu_count);
@@ -2455,9 +2470,6 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
        if (wake_flags & WF_SYNC)
                schedstat_inc(p, se.statistics.nr_wakeups_sync);
-        if (cpu != task_cpu(p))
-                schedstat_inc(p, se.statistics.nr_wakeups_migrate);
 #endif /* CONFIG_SCHEDSTATS */
 }
@@ -2600,6 +2612,7 @@ static void ttwu_queue(struct task_struct *p, int cpu)
 #if defined(CONFIG_SMP)
        if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
+                sched_clock_cpu(cpu); /* sync clocks x-cpu */
                ttwu_queue_remote(p, cpu);
                return;
        }
@@ -2674,8 +2687,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
                p->sched_class->task_waking(p);
        cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
-        if (task_cpu(p) != cpu)
+        if (task_cpu(p) != cpu) {
+                wake_flags |= WF_MIGRATED;
                set_task_cpu(p, cpu);
+        }
 #endif /* CONFIG_SMP */
        ttwu_queue(p, cpu);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 88725c939e0b..10d018212bab 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1096,7 +1096,7 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag
         * to move current somewhere else, making room for our non-migratable
         * task.
         */
-        if (p->prio == rq->curr->prio && !need_resched())
+        if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
                check_preempt_equal_prio(rq, p);
 #endif
 }
@@ -1239,6 +1239,10 @@ static int find_lowest_rq(struct task_struct *task)
        int this_cpu = smp_processor_id();
        int cpu      = task_cpu(task);
+        /* Make sure the mask is initialized first */
+        if (unlikely(!lowest_mask))
+                return -1;
        if (task->rt.nr_cpus_allowed == 1)
                return -1; /* No other targets possible */
diff --git a/kernel/signal.c b/kernel/signal.c
index 86c32b884f8e..ff7678603328 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2365,7 +2365,7 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
 /**
 *  sys_rt_sigprocmask - change the list of currently blocked signals
 *  @how: whether to add, remove, or set signals
- *  @set: stores pending signals
+ *  @nset: stores pending signals
 *  @oset: previous value of signal mask if non-null
 *  @sigsetsize: size of sigset_t type
 */
diff --git a/kernel/smp.c b/kernel/smp.c
index 73a195193558..fb67dfa8394e 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -74,7 +74,7 @@ static struct notifier_block __cpuinitdata hotplug_cfd_notifier = {
        .notifier_call          = hotplug_cfd,
 };
-static int __cpuinit init_call_single_data(void)
+void __init call_function_init(void)
 {
        void *cpu = (void *)(long)smp_processor_id();
        int i;
@@ -88,10 +88,7 @@ static int __cpuinit init_call_single_data(void)
        hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
        register_cpu_notifier(&hotplug_cfd_notifier);
-        return 0;
 }
-early_initcall(init_call_single_data);
 /*
 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 13960170cad4..40cf63ddd4b3 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -58,7 +58,7 @@ DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
 char *softirq_to_name[NR_SOFTIRQS] = {
        "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
-        "TASKLET", "SCHED", "HRTIMER"
+        "TASKLET", "SCHED", "HRTIMER", "RCU"
 };
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4fc92445a29c..f175d98bd355 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -938,6 +938,12 @@ static struct ctl_table kern_table[] = {
        },
 #endif
 #ifdef CONFIG_PERF_EVENTS
+        /*
+         * User-space scripts rely on the existence of this file
+         * as a feature check for perf_events being enabled.
+         *
+         * So it's an ABI, do not remove!
+         */
        {
                .procname       = "perf_event_paranoid",
                .data           = &sysctl_perf_event_paranoid,
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1c95fd677328..e0980f0d9a0a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -185,7 +185,6 @@ static struct clocksource *watchdog;
 static struct timer_list watchdog_timer;
 static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
 static DEFINE_SPINLOCK(watchdog_lock);
-static cycle_t watchdog_last;
 static int watchdog_running;
 static int clocksource_watchdog_kthread(void *data);
@@ -254,11 +253,6 @@ static void clocksource_watchdog(unsigned long data)
        if (!watchdog_running)
                goto out;
-        wdnow = watchdog->read(watchdog);
-        wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask,
-                                     watchdog->mult, watchdog->shift);
-        watchdog_last = wdnow;
        list_for_each_entry(cs, &watchdog_list, wd_list) {
                /* Clocksource already marked unstable? */
@@ -268,19 +262,28 @@ static void clocksource_watchdog(unsigned long data)
                        continue;
                }
+                local_irq_disable();
                csnow = cs->read(cs);
+                wdnow = watchdog->read(watchdog);
+                local_irq_enable();
                /* Clocksource initialized ? */
                if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) {
                        cs->flags |= CLOCK_SOURCE_WATCHDOG;
-                        cs->wd_last = csnow;
+                        cs->wd_last = wdnow;
+                        cs->cs_last = csnow;
                        continue;
                }
-                /* Check the deviation from the watchdog clocksource. */
+                wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask,
-                cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) &
+                                             watchdog->mult, watchdog->shift);
+                cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) &
                                             cs->mask, cs->mult, cs->shift);
-                cs->wd_last = csnow;
+                cs->cs_last = csnow;
+                cs->wd_last = wdnow;
+                /* Check the deviation from the watchdog clocksource. */
                if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
                        clocksource_unstable(cs, cs_nsec - wd_nsec);
                        continue;
@@ -318,7 +321,6 @@ static inline void clocksource_start_watchdog(void)
                return;
        init_timer(&watchdog_timer);
        watchdog_timer.function = clocksource_watchdog;
-        watchdog_last = watchdog->read(watchdog);
        watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
        add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
        watchdog_running = 1;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1ee417fcbfa5..908038f57440 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2740,7 +2740,7 @@ static int ftrace_process_regex(struct ftrace_hash *hash,
 {
        char *func, *command, *next = buff;
        struct ftrace_func_command *p;
-        int ret;
+        int ret = -EINVAL;
        func = strsep(&next, ":");
@@ -3330,6 +3330,7 @@ static int ftrace_process_locs(struct module *mod,
 {
        unsigned long *p;
        unsigned long addr;
+        unsigned long flags;
        mutex_lock(&ftrace_lock);
        p = start;
@@ -3346,7 +3347,13 @@ static int ftrace_process_locs(struct module *mod,
                ftrace_record_ip(addr);
        }
+        /*
+         * Disable interrupts to prevent interrupts from executing
+         * code that is being modified.
+         */
+        local_irq_save(flags);
        ftrace_update_code(mod);
+        local_irq_restore(flags);
        mutex_unlock(&ftrace_lock);
        return 0;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index f925c45f0afa..27d13b36b8be 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1870,8 +1870,12 @@ fs_initcall(init_kprobe_trace);
 #ifdef CONFIG_FTRACE_STARTUP_TEST
-static int kprobe_trace_selftest_target(int a1, int a2, int a3,
+/*
-                                        int a4, int a5, int a6)
+ * The "__used" keeps gcc from removing the function symbol
+ * from the kallsyms table.
+ */
+static __used int kprobe_trace_selftest_target(int a1, int a2, int a3,
+                                               int a4, int a5, int a6)
 {
        return a1 + a2 + a3 + a4 + a5 + a6;
 }
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index dff763b7baf1..1f06468a10d7 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -240,13 +240,10 @@ static const char **find_next(void *v, loff_t *pos)
        const char **fmt = v;
        int start_index;
-        if (!fmt)
-                fmt = __start___trace_bprintk_fmt + *pos;
        start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt;
        if (*pos < start_index)
-                return fmt;
+                return __start___trace_bprintk_fmt + *pos;
        return find_next_mod_format(start_index, v, fmt, pos);
 }