1 files changed, 407 insertions, 12 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index c8bff3099a8..14dc7dd0090 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -602,6 +602,15 @@ static void rcu_preempt_process_callbacks(void)
                                &__get_cpu_var(rcu_preempt_data));
 }
+#ifdef CONFIG_RCU_BOOST
+static void rcu_preempt_do_callbacks(void)
+{
+        rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
+}
+#endif /* #ifdef CONFIG_RCU_BOOST */
 /*
 * Queue a preemptible-RCU callback for invocation after a grace period.
 */
@@ -1249,6 +1258,23 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 }
 /*
+ * Wake up the per-CPU kthread to invoke RCU callbacks.
+ */
+static void invoke_rcu_callbacks_kthread(void)
+{
+        unsigned long flags;
+        local_irq_save(flags);
+        __this_cpu_write(rcu_cpu_has_work, 1);
+        if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) {
+                local_irq_restore(flags);
+                return;
+        }
+        wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
+        local_irq_restore(flags);
+}
+/*
 * Set the affinity of the boost kthread.  The CPU-hotplug locks are
 * held, so no one should be messing with the existence of the boost
 * kthread.
@@ -1288,6 +1314,7 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
        if (&rcu_preempt_state != rsp)
                return 0;
+        rsp->boost = 1;
        if (rnp->boost_kthread_task != NULL)
                return 0;
        t = kthread_create(rcu_boost_kthread, (void *)rnp,
@@ -1299,13 +1326,372 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
        sp.sched_priority = RCU_KTHREAD_PRIO;
        sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+        wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
        return 0;
 }
-static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Stop the RCU's per-CPU kthread when its CPU goes offline,.
+ */
+static void rcu_stop_cpu_kthread(int cpu)
 {
-        if (rnp->boost_kthread_task)
+        struct task_struct *t;
-                wake_up_process(rnp->boost_kthread_task);
+        /* Stop the CPU's kthread. */
+        t = per_cpu(rcu_cpu_kthread_task, cpu);
+        if (t != NULL) {
+                per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
+                kthread_stop(t);
+        }
+}
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+static void rcu_kthread_do_work(void)
+{
+        rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
+        rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
+        rcu_preempt_do_callbacks();
+}
+/*
+ * Wake up the specified per-rcu_node-structure kthread.
+ * Because the per-rcu_node kthreads are immortal, we don't need
+ * to do anything to keep them alive.
+ */
+static void invoke_rcu_node_kthread(struct rcu_node *rnp)
+{
+        struct task_struct *t;
+        t = rnp->node_kthread_task;
+        if (t != NULL)
+                wake_up_process(t);
+}
+/*
+ * Set the specified CPU's kthread to run RT or not, as specified by
+ * the to_rt argument.  The CPU-hotplug locks are held, so the task
+ * is not going away.
+ */
+static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
+{
+        int policy;
+        struct sched_param sp;
+        struct task_struct *t;
+        t = per_cpu(rcu_cpu_kthread_task, cpu);
+        if (t == NULL)
+                return;
+        if (to_rt) {
+                policy = SCHED_FIFO;
+                sp.sched_priority = RCU_KTHREAD_PRIO;
+        } else {
+                policy = SCHED_NORMAL;
+                sp.sched_priority = 0;
+        }
+        sched_setscheduler_nocheck(t, policy, &sp);
+}
+/*
+ * Timer handler to initiate the waking up of per-CPU kthreads that
+ * have yielded the CPU due to excess numbers of RCU callbacks.
+ * We wake up the per-rcu_node kthread, which in turn will wake up
+ * the booster kthread.
+ */
+static void rcu_cpu_kthread_timer(unsigned long arg)
+{
+        struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
+        struct rcu_node *rnp = rdp->mynode;
+        atomic_or(rdp->grpmask, &rnp->wakemask);
+        invoke_rcu_node_kthread(rnp);
+}
+/*
+ * Drop to non-real-time priority and yield, but only after posting a
+ * timer that will cause us to regain our real-time priority if we
+ * remain preempted.  Either way, we restore our real-time priority
+ * before returning.
+ */
+static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
+{
+        struct sched_param sp;
+        struct timer_list yield_timer;
+        setup_timer_on_stack(&yield_timer, f, arg);
+        mod_timer(&yield_timer, jiffies + 2);
+        sp.sched_priority = 0;
+        sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
+        set_user_nice(current, 19);
+        schedule();
+        sp.sched_priority = RCU_KTHREAD_PRIO;
+        sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+        del_timer(&yield_timer);
+}
+/*
+ * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
+ * This can happen while the corresponding CPU is either coming online
+ * or going offline.  We cannot wait until the CPU is fully online
+ * before starting the kthread, because the various notifier functions
+ * can wait for RCU grace periods.  So we park rcu_cpu_kthread() until
+ * the corresponding CPU is online.
+ *
+ * Return 1 if the kthread needs to stop, 0 otherwise.
+ *
+ * Caller must disable bh.  This function can momentarily enable it.
+ */
+static int rcu_cpu_kthread_should_stop(int cpu)
+{
+        while (cpu_is_offline(cpu) ||
+               !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) ||
+               smp_processor_id() != cpu) {
+                if (kthread_should_stop())
+                        return 1;
+                per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
+                per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id();
+                local_bh_enable();
+                schedule_timeout_uninterruptible(1);
+                if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)))
+                        set_cpus_allowed_ptr(current, cpumask_of(cpu));
+                local_bh_disable();
+        }
+        per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
+        return 0;
+}
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the
+ * earlier RCU softirq.
+ */
+static int rcu_cpu_kthread(void *arg)
+{
+        int cpu = (int)(long)arg;
+        unsigned long flags;
+        int spincnt = 0;
+        unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
+        char work;
+        char *workp = &per_cpu(rcu_cpu_has_work, cpu);
+        for (;;) {
+                *statusp = RCU_KTHREAD_WAITING;
+                rcu_wait(*workp != 0 || kthread_should_stop());
+                local_bh_disable();
+                if (rcu_cpu_kthread_should_stop(cpu)) {
+                        local_bh_enable();
+                        break;
+                }
+                *statusp = RCU_KTHREAD_RUNNING;
+                per_cpu(rcu_cpu_kthread_loops, cpu)++;
+                local_irq_save(flags);
+                work = *workp;
+                *workp = 0;
+                local_irq_restore(flags);
+                if (work)
+                        rcu_kthread_do_work();
+                local_bh_enable();
+                if (*workp != 0)
+                        spincnt++;
+                else
+                        spincnt = 0;
+                if (spincnt > 10) {
+                        *statusp = RCU_KTHREAD_YIELDING;
+                        rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
+                        spincnt = 0;
+                }
+        }
+        *statusp = RCU_KTHREAD_STOPPED;
+        return 0;
+}
+/*
+ * Spawn a per-CPU kthread, setting up affinity and priority.
+ * Because the CPU hotplug lock is held, no other CPU will be attempting
+ * to manipulate rcu_cpu_kthread_task.  There might be another CPU
+ * attempting to access it during boot, but the locking in kthread_bind()
+ * will enforce sufficient ordering.
+ *
+ * Please note that we cannot simply refuse to wake up the per-CPU
+ * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state,
+ * which can result in softlockup complaints if the task ends up being
+ * idle for more than a couple of minutes.
+ *
+ * However, please note also that we cannot bind the per-CPU kthread to its
+ * CPU until that CPU is fully online.  We also cannot wait until the
+ * CPU is fully online before we create its per-CPU kthread, as this would
+ * deadlock the system when CPU notifiers tried waiting for grace
+ * periods.  So we bind the per-CPU kthread to its CPU only if the CPU
+ * is online.  If its CPU is not yet fully online, then the code in
+ * rcu_cpu_kthread() will wait until it is fully online, and then do
+ * the binding.
+ */
+static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
+{
+        struct sched_param sp;
+        struct task_struct *t;
+        if (!rcu_kthreads_spawnable ||
+            per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
+                return 0;
+        t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
+        if (IS_ERR(t))
+                return PTR_ERR(t);
+        if (cpu_online(cpu))
+                kthread_bind(t, cpu);
+        per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
+        WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
+        sp.sched_priority = RCU_KTHREAD_PRIO;
+        sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+        per_cpu(rcu_cpu_kthread_task, cpu) = t;
+        wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */
+        return 0;
+}
+/*
+ * Per-rcu_node kthread, which is in charge of waking up the per-CPU
+ * kthreads when needed.  We ignore requests to wake up kthreads
+ * for offline CPUs, which is OK because force_quiescent_state()
+ * takes care of this case.
+ */
+static int rcu_node_kthread(void *arg)
+{
+        int cpu;
+        unsigned long flags;
+        unsigned long mask;
+        struct rcu_node *rnp = (struct rcu_node *)arg;
+        struct sched_param sp;
+        struct task_struct *t;
+        for (;;) {
+                rnp->node_kthread_status = RCU_KTHREAD_WAITING;
+                rcu_wait(atomic_read(&rnp->wakemask) != 0);
+                rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
+                raw_spin_lock_irqsave(&rnp->lock, flags);
+                mask = atomic_xchg(&rnp->wakemask, 0);
+                rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
+                for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
+                        if ((mask & 0x1) == 0)
+                                continue;
+                        preempt_disable();
+                        t = per_cpu(rcu_cpu_kthread_task, cpu);
+                        if (!cpu_online(cpu) || t == NULL) {
+                                preempt_enable();
+                                continue;
+                        }
+                        per_cpu(rcu_cpu_has_work, cpu) = 1;
+                        sp.sched_priority = RCU_KTHREAD_PRIO;
+                        sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+                        preempt_enable();
+                }
+        }
+        /* NOTREACHED */
+        rnp->node_kthread_status = RCU_KTHREAD_STOPPED;
+        return 0;
+}
+/*
+ * Set the per-rcu_node kthread's affinity to cover all CPUs that are
+ * served by the rcu_node in question.  The CPU hotplug lock is still
+ * held, so the value of rnp->qsmaskinit will be stable.
+ *
+ * We don't include outgoingcpu in the affinity set, use -1 if there is
+ * no outgoing CPU.  If there are no CPUs left in the affinity set,
+ * this function allows the kthread to execute on any CPU.
+ */
+static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
+{
+        cpumask_var_t cm;
+        int cpu;
+        unsigned long mask = rnp->qsmaskinit;
+        if (rnp->node_kthread_task == NULL)
+                return;
+        if (!alloc_cpumask_var(&cm, GFP_KERNEL))
+                return;
+        cpumask_clear(cm);
+        for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
+                if ((mask & 0x1) && cpu != outgoingcpu)
+                        cpumask_set_cpu(cpu, cm);
+        if (cpumask_weight(cm) == 0) {
+                cpumask_setall(cm);
+                for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
+                        cpumask_clear_cpu(cpu, cm);
+                WARN_ON_ONCE(cpumask_weight(cm) == 0);
+        }
+        set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
+        rcu_boost_kthread_setaffinity(rnp, cm);
+        free_cpumask_var(cm);
+}
+/*
+ * Spawn a per-rcu_node kthread, setting priority and affinity.
+ * Called during boot before online/offline can happen, or, if
+ * during runtime, with the main CPU-hotplug locks held.  So only
+ * one of these can be executing at a time.
+ */
+static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
+                                                struct rcu_node *rnp)
+{
+        unsigned long flags;
+        int rnp_index = rnp - &rsp->node[0];
+        struct sched_param sp;
+        struct task_struct *t;
+        if (!rcu_kthreads_spawnable ||
+            rnp->qsmaskinit == 0)
+                return 0;
+        if (rnp->node_kthread_task == NULL) {
+                t = kthread_create(rcu_node_kthread, (void *)rnp,
+                                   "rcun%d", rnp_index);
+                if (IS_ERR(t))
+                        return PTR_ERR(t);
+                raw_spin_lock_irqsave(&rnp->lock, flags);
+                rnp->node_kthread_task = t;
+                raw_spin_unlock_irqrestore(&rnp->lock, flags);
+                sp.sched_priority = 99;
+                sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+                wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
+        }
+        return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
+}
+/*
+ * Spawn all kthreads -- called as soon as the scheduler is running.
+ */
+static int __init rcu_spawn_kthreads(void)
+{
+        int cpu;
+        struct rcu_node *rnp;
+        rcu_kthreads_spawnable = 1;
+        for_each_possible_cpu(cpu) {
+                per_cpu(rcu_cpu_has_work, cpu) = 0;
+                if (cpu_online(cpu))
+                        (void)rcu_spawn_one_cpu_kthread(cpu);
+        }
+        rnp = rcu_get_root(rcu_state);
+        (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
+        if (NUM_RCU_NODES > 1) {
+                rcu_for_each_leaf_node(rcu_state, rnp)
+                        (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
+        }
+        return 0;
+}
+early_initcall(rcu_spawn_kthreads);
+static void __cpuinit rcu_prepare_kthreads(int cpu)
+{
+        struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
+        struct rcu_node *rnp = rdp->mynode;
+        /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
+        if (rcu_kthreads_spawnable) {
+                (void)rcu_spawn_one_cpu_kthread(cpu);
+                if (rnp->node_kthread_task == NULL)
+                        (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
+        }
 }
 #else /* #ifdef CONFIG_RCU_BOOST */
@@ -1315,23 +1701,32 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
 }
-static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
+static void invoke_rcu_callbacks_kthread(void)
-                                          cpumask_var_t cm)
 {
+        WARN_ON_ONCE(1);
 }
 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
 {
 }
-static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
+#ifdef CONFIG_HOTPLUG_CPU
-                                                 struct rcu_node *rnp,
-                                                 int rnp_index)
+static void rcu_stop_cpu_kthread(int cpu)
+{
+}
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
+{
+}
+static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
 {
-        return 0;
 }
-static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
+static void __cpuinit rcu_prepare_kthreads(int cpu)
 {
 }
@@ -1509,7 +1904,7 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
 *
 * Because it is not legal to invoke rcu_process_callbacks() with irqs
 * disabled, we do one pass of force_quiescent_state(), then do a
- * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked
+ * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
 * later.  The per-cpu rcu_dyntick_drain variable controls the sequencing.
 */
 int rcu_needs_cpu(int cpu)
@@ -1560,7 +1955,7 @@ int rcu_needs_cpu(int cpu)
        /* If RCU callbacks are still pending, RCU still needs this CPU. */
        if (c)
-                invoke_rcu_cpu_kthread();
+                invoke_rcu_core();
        return c;
 }

diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c8bff3099a8..14dc7dd0090 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h
@@ -602,6 +602,15 @@ static void rcu_preempt_process_callbacks(void)
602	&__get_cpu_var(rcu_preempt_data));	602	&__get_cpu_var(rcu_preempt_data));
603	}	603	}
604		604
		605	#ifdef CONFIG_RCU_BOOST
		606
		607	static void rcu_preempt_do_callbacks(void)
		608	{
		609	rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
		610	}
		611
		612	#endif /* #ifdef CONFIG_RCU_BOOST */
		613
605	/*	614	/*
606	* Queue a preemptible-RCU callback for invocation after a grace period.	615	* Queue a preemptible-RCU callback for invocation after a grace period.
607	*/	616	*/
@@ -1249,6 +1258,23 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1249	}	1258	}
1250		1259
1251	/*	1260	/*
		1261	* Wake up the per-CPU kthread to invoke RCU callbacks.
		1262	*/
		1263	static void invoke_rcu_callbacks_kthread(void)
		1264	{
		1265	unsigned long flags;
		1266
		1267	local_irq_save(flags);
		1268	__this_cpu_write(rcu_cpu_has_work, 1);
		1269	if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) {
		1270	local_irq_restore(flags);
		1271	return;
		1272	}
		1273	wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
		1274	local_irq_restore(flags);
		1275	}
		1276
		1277	/*
1252	* Set the affinity of the boost kthread. The CPU-hotplug locks are	1278	* Set the affinity of the boost kthread. The CPU-hotplug locks are
1253	* held, so no one should be messing with the existence of the boost	1279	* held, so no one should be messing with the existence of the boost
1254	* kthread.	1280	* kthread.
@@ -1288,6 +1314,7 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1288		1314
1289	if (&rcu_preempt_state != rsp)	1315	if (&rcu_preempt_state != rsp)
1290	return 0;	1316	return 0;
		1317	rsp->boost = 1;
1291	if (rnp->boost_kthread_task != NULL)	1318	if (rnp->boost_kthread_task != NULL)
1292	return 0;	1319	return 0;
1293	t = kthread_create(rcu_boost_kthread, (void *)rnp,	1320	t = kthread_create(rcu_boost_kthread, (void *)rnp,
@@ -1299,13 +1326,372 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1299	raw_spin_unlock_irqrestore(&rnp->lock, flags);	1326	raw_spin_unlock_irqrestore(&rnp->lock, flags);
1300	sp.sched_priority = RCU_KTHREAD_PRIO;	1327	sp.sched_priority = RCU_KTHREAD_PRIO;
1301	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);	1328	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
		1329	wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
1302	return 0;	1330	return 0;
1303	}	1331	}
1304		1332
1305	static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)	1333	#ifdef CONFIG_HOTPLUG_CPU
		1334
		1335	/*
		1336	* Stop the RCU's per-CPU kthread when its CPU goes offline,.
		1337	*/
		1338	static void rcu_stop_cpu_kthread(int cpu)
1306	{	1339	{
1307	if (rnp->boost_kthread_task)	1340	struct task_struct *t;
1308	wake_up_process(rnp->boost_kthread_task);	1341
		1342	/* Stop the CPU's kthread. */
		1343	t = per_cpu(rcu_cpu_kthread_task, cpu);
		1344	if (t != NULL) {
		1345	per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
		1346	kthread_stop(t);
		1347	}
		1348	}
		1349
		1350	#endif /* #ifdef CONFIG_HOTPLUG_CPU */
		1351
		1352	static void rcu_kthread_do_work(void)
		1353	{
		1354	rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
		1355	rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
		1356	rcu_preempt_do_callbacks();
		1357	}
		1358
		1359	/*
		1360	* Wake up the specified per-rcu_node-structure kthread.
		1361	* Because the per-rcu_node kthreads are immortal, we don't need
		1362	* to do anything to keep them alive.
		1363	*/
		1364	static void invoke_rcu_node_kthread(struct rcu_node *rnp)
		1365	{
		1366	struct task_struct *t;
		1367
		1368	t = rnp->node_kthread_task;
		1369	if (t != NULL)
		1370	wake_up_process(t);
		1371	}
		1372
		1373	/*
		1374	* Set the specified CPU's kthread to run RT or not, as specified by
		1375	* the to_rt argument. The CPU-hotplug locks are held, so the task
		1376	* is not going away.
		1377	*/
		1378	static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
		1379	{
		1380	int policy;
		1381	struct sched_param sp;
		1382	struct task_struct *t;
		1383
		1384	t = per_cpu(rcu_cpu_kthread_task, cpu);
		1385	if (t == NULL)
		1386	return;
		1387	if (to_rt) {
		1388	policy = SCHED_FIFO;
		1389	sp.sched_priority = RCU_KTHREAD_PRIO;
		1390	} else {
		1391	policy = SCHED_NORMAL;
		1392	sp.sched_priority = 0;
		1393	}
		1394	sched_setscheduler_nocheck(t, policy, &sp);
		1395	}
		1396
		1397	/*
		1398	* Timer handler to initiate the waking up of per-CPU kthreads that
		1399	* have yielded the CPU due to excess numbers of RCU callbacks.
		1400	* We wake up the per-rcu_node kthread, which in turn will wake up
		1401	* the booster kthread.
		1402	*/
		1403	static void rcu_cpu_kthread_timer(unsigned long arg)
		1404	{
		1405	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
		1406	struct rcu_node *rnp = rdp->mynode;
		1407
		1408	atomic_or(rdp->grpmask, &rnp->wakemask);
		1409	invoke_rcu_node_kthread(rnp);
		1410	}
		1411
		1412	/*
		1413	* Drop to non-real-time priority and yield, but only after posting a
		1414	* timer that will cause us to regain our real-time priority if we
		1415	* remain preempted. Either way, we restore our real-time priority
		1416	* before returning.
		1417	*/
		1418	static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
		1419	{
		1420	struct sched_param sp;
		1421	struct timer_list yield_timer;
		1422
		1423	setup_timer_on_stack(&yield_timer, f, arg);
		1424	mod_timer(&yield_timer, jiffies + 2);
		1425	sp.sched_priority = 0;
		1426	sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
		1427	set_user_nice(current, 19);
		1428	schedule();
		1429	sp.sched_priority = RCU_KTHREAD_PRIO;
		1430	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
		1431	del_timer(&yield_timer);
		1432	}
		1433
		1434	/*
		1435	* Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
		1436	* This can happen while the corresponding CPU is either coming online
		1437	* or going offline. We cannot wait until the CPU is fully online
		1438	* before starting the kthread, because the various notifier functions
		1439	* can wait for RCU grace periods. So we park rcu_cpu_kthread() until
		1440	* the corresponding CPU is online.
		1441	*
		1442	* Return 1 if the kthread needs to stop, 0 otherwise.
		1443	*
		1444	* Caller must disable bh. This function can momentarily enable it.
		1445	*/
		1446	static int rcu_cpu_kthread_should_stop(int cpu)
		1447	{
		1448	while (cpu_is_offline(cpu) \|\|
		1449	!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) \|\|
		1450	smp_processor_id() != cpu) {
		1451	if (kthread_should_stop())
		1452	return 1;
		1453	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
		1454	per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id();
		1455	local_bh_enable();
		1456	schedule_timeout_uninterruptible(1);
		1457	if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)))
		1458	set_cpus_allowed_ptr(current, cpumask_of(cpu));
		1459	local_bh_disable();
		1460	}
		1461	per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
		1462	return 0;
		1463	}
		1464
		1465	/*
		1466	* Per-CPU kernel thread that invokes RCU callbacks. This replaces the
		1467	* earlier RCU softirq.
		1468	*/
		1469	static int rcu_cpu_kthread(void *arg)
		1470	{
		1471	int cpu = (int)(long)arg;
		1472	unsigned long flags;
		1473	int spincnt = 0;
		1474	unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
		1475	char work;
		1476	char *workp = &per_cpu(rcu_cpu_has_work, cpu);
		1477
		1478	for (;;) {
		1479	*statusp = RCU_KTHREAD_WAITING;
		1480	rcu_wait(*workp != 0 \|\| kthread_should_stop());
		1481	local_bh_disable();
		1482	if (rcu_cpu_kthread_should_stop(cpu)) {
		1483	local_bh_enable();
		1484	break;
		1485	}
		1486	*statusp = RCU_KTHREAD_RUNNING;
		1487	per_cpu(rcu_cpu_kthread_loops, cpu)++;
		1488	local_irq_save(flags);
		1489	work = *workp;
		1490	*workp = 0;
		1491	local_irq_restore(flags);
		1492	if (work)
		1493	rcu_kthread_do_work();
		1494	local_bh_enable();
		1495	if (*workp != 0)
		1496	spincnt++;
		1497	else
		1498	spincnt = 0;
		1499	if (spincnt > 10) {
		1500	*statusp = RCU_KTHREAD_YIELDING;
		1501	rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
		1502	spincnt = 0;
		1503	}
		1504	}
		1505	*statusp = RCU_KTHREAD_STOPPED;
		1506	return 0;
		1507	}
		1508
		1509	/*
		1510	* Spawn a per-CPU kthread, setting up affinity and priority.
		1511	* Because the CPU hotplug lock is held, no other CPU will be attempting
		1512	* to manipulate rcu_cpu_kthread_task. There might be another CPU
		1513	* attempting to access it during boot, but the locking in kthread_bind()
		1514	* will enforce sufficient ordering.
		1515	*
		1516	* Please note that we cannot simply refuse to wake up the per-CPU
		1517	* kthread because kthreads are created in TASK_UNINTERRUPTIBLE state,
		1518	* which can result in softlockup complaints if the task ends up being
		1519	* idle for more than a couple of minutes.
		1520	*
		1521	* However, please note also that we cannot bind the per-CPU kthread to its
		1522	* CPU until that CPU is fully online. We also cannot wait until the
		1523	* CPU is fully online before we create its per-CPU kthread, as this would
		1524	* deadlock the system when CPU notifiers tried waiting for grace
		1525	* periods. So we bind the per-CPU kthread to its CPU only if the CPU
		1526	* is online. If its CPU is not yet fully online, then the code in
		1527	* rcu_cpu_kthread() will wait until it is fully online, and then do
		1528	* the binding.
		1529	*/
		1530	static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
		1531	{
		1532	struct sched_param sp;
		1533	struct task_struct *t;
		1534
		1535	if (!rcu_kthreads_spawnable \|\|
		1536	per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
		1537	return 0;
		1538	t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
		1539	if (IS_ERR(t))
		1540	return PTR_ERR(t);
		1541	if (cpu_online(cpu))
		1542	kthread_bind(t, cpu);
		1543	per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
		1544	WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
		1545	sp.sched_priority = RCU_KTHREAD_PRIO;
		1546	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
		1547	per_cpu(rcu_cpu_kthread_task, cpu) = t;
		1548	wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */
		1549	return 0;
		1550	}
		1551
		1552	/*
		1553	* Per-rcu_node kthread, which is in charge of waking up the per-CPU
		1554	* kthreads when needed. We ignore requests to wake up kthreads
		1555	* for offline CPUs, which is OK because force_quiescent_state()
		1556	* takes care of this case.
		1557	*/
		1558	static int rcu_node_kthread(void *arg)
		1559	{
		1560	int cpu;
		1561	unsigned long flags;
		1562	unsigned long mask;
		1563	struct rcu_node rnp = (struct rcu_node )arg;
		1564	struct sched_param sp;
		1565	struct task_struct *t;
		1566
		1567	for (;;) {
		1568	rnp->node_kthread_status = RCU_KTHREAD_WAITING;
		1569	rcu_wait(atomic_read(&rnp->wakemask) != 0);
		1570	rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
		1571	raw_spin_lock_irqsave(&rnp->lock, flags);
		1572	mask = atomic_xchg(&rnp->wakemask, 0);
		1573	rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
		1574	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
		1575	if ((mask & 0x1) == 0)
		1576	continue;
		1577	preempt_disable();
		1578	t = per_cpu(rcu_cpu_kthread_task, cpu);
		1579	if (!cpu_online(cpu) \|\| t == NULL) {
		1580	preempt_enable();
		1581	continue;
		1582	}
		1583	per_cpu(rcu_cpu_has_work, cpu) = 1;
		1584	sp.sched_priority = RCU_KTHREAD_PRIO;
		1585	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
		1586	preempt_enable();
		1587	}
		1588	}
		1589	/* NOTREACHED */
		1590	rnp->node_kthread_status = RCU_KTHREAD_STOPPED;
		1591	return 0;
		1592	}
		1593
		1594	/*
		1595	* Set the per-rcu_node kthread's affinity to cover all CPUs that are
		1596	* served by the rcu_node in question. The CPU hotplug lock is still
		1597	* held, so the value of rnp->qsmaskinit will be stable.
		1598	*
		1599	* We don't include outgoingcpu in the affinity set, use -1 if there is
		1600	* no outgoing CPU. If there are no CPUs left in the affinity set,
		1601	* this function allows the kthread to execute on any CPU.
		1602	*/
		1603	static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
		1604	{
		1605	cpumask_var_t cm;
		1606	int cpu;
		1607	unsigned long mask = rnp->qsmaskinit;
		1608
		1609	if (rnp->node_kthread_task == NULL)
		1610	return;
		1611	if (!alloc_cpumask_var(&cm, GFP_KERNEL))
		1612	return;
		1613	cpumask_clear(cm);
		1614	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
		1615	if ((mask & 0x1) && cpu != outgoingcpu)
		1616	cpumask_set_cpu(cpu, cm);
		1617	if (cpumask_weight(cm) == 0) {
		1618	cpumask_setall(cm);
		1619	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
		1620	cpumask_clear_cpu(cpu, cm);
		1621	WARN_ON_ONCE(cpumask_weight(cm) == 0);
		1622	}
		1623	set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
		1624	rcu_boost_kthread_setaffinity(rnp, cm);
		1625	free_cpumask_var(cm);
		1626	}
		1627
		1628	/*
		1629	* Spawn a per-rcu_node kthread, setting priority and affinity.
		1630	* Called during boot before online/offline can happen, or, if
		1631	* during runtime, with the main CPU-hotplug locks held. So only
		1632	* one of these can be executing at a time.
		1633	*/
		1634	static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
		1635	struct rcu_node *rnp)
		1636	{
		1637	unsigned long flags;
		1638	int rnp_index = rnp - &rsp->node[0];
		1639	struct sched_param sp;
		1640	struct task_struct *t;
		1641
		1642	if (!rcu_kthreads_spawnable \|\|
		1643	rnp->qsmaskinit == 0)
		1644	return 0;
		1645	if (rnp->node_kthread_task == NULL) {
		1646	t = kthread_create(rcu_node_kthread, (void *)rnp,
		1647	"rcun%d", rnp_index);
		1648	if (IS_ERR(t))
		1649	return PTR_ERR(t);
		1650	raw_spin_lock_irqsave(&rnp->lock, flags);
		1651	rnp->node_kthread_task = t;
		1652	raw_spin_unlock_irqrestore(&rnp->lock, flags);
		1653	sp.sched_priority = 99;
		1654	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
		1655	wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
		1656	}
		1657	return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
		1658	}
		1659
		1660	/*
		1661	* Spawn all kthreads -- called as soon as the scheduler is running.
		1662	*/
		1663	static int __init rcu_spawn_kthreads(void)
		1664	{
		1665	int cpu;
		1666	struct rcu_node *rnp;
		1667
		1668	rcu_kthreads_spawnable = 1;
		1669	for_each_possible_cpu(cpu) {
		1670	per_cpu(rcu_cpu_has_work, cpu) = 0;
		1671	if (cpu_online(cpu))
		1672	(void)rcu_spawn_one_cpu_kthread(cpu);
		1673	}
		1674	rnp = rcu_get_root(rcu_state);
		1675	(void)rcu_spawn_one_node_kthread(rcu_state, rnp);
		1676	if (NUM_RCU_NODES > 1) {
		1677	rcu_for_each_leaf_node(rcu_state, rnp)
		1678	(void)rcu_spawn_one_node_kthread(rcu_state, rnp);
		1679	}
		1680	return 0;
		1681	}
		1682	early_initcall(rcu_spawn_kthreads);
		1683
		1684	static void __cpuinit rcu_prepare_kthreads(int cpu)
		1685	{
		1686	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
		1687	struct rcu_node *rnp = rdp->mynode;
		1688
		1689	/* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
		1690	if (rcu_kthreads_spawnable) {
		1691	(void)rcu_spawn_one_cpu_kthread(cpu);
		1692	if (rnp->node_kthread_task == NULL)
		1693	(void)rcu_spawn_one_node_kthread(rcu_state, rnp);
		1694	}
1309	}	1695	}
1310		1696
1311	#else /* #ifdef CONFIG_RCU_BOOST */	1697	#else /* #ifdef CONFIG_RCU_BOOST */
@@ -1315,23 +1701,32 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1315	raw_spin_unlock_irqrestore(&rnp->lock, flags);	1701	raw_spin_unlock_irqrestore(&rnp->lock, flags);
1316	}	1702	}
1317		1703
1318	static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,	1704	static void invoke_rcu_callbacks_kthread(void)
1319	cpumask_var_t cm)
1320	{	1705	{
		1706	WARN_ON_ONCE(1);
1321	}	1707	}
1322		1708
1323	static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)	1709	static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1324	{	1710	{
1325	}	1711	}
1326		1712
1327	static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,	1713	#ifdef CONFIG_HOTPLUG_CPU
1328	struct rcu_node *rnp,	1714
1329	int rnp_index)	1715	static void rcu_stop_cpu_kthread(int cpu)
		1716	{
		1717	}
		1718
		1719	#endif /* #ifdef CONFIG_HOTPLUG_CPU */
		1720
		1721	static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
		1722	{
		1723	}
		1724
		1725	static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
1330	{	1726	{
1331	return 0;
1332	}	1727	}
1333		1728
1334	static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)	1729	static void __cpuinit rcu_prepare_kthreads(int cpu)
1335	{	1730	{
1336	}	1731	}
1337		1732
@@ -1509,7 +1904,7 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1509	*	1904	*
1510	* Because it is not legal to invoke rcu_process_callbacks() with irqs	1905	* Because it is not legal to invoke rcu_process_callbacks() with irqs
1511	* disabled, we do one pass of force_quiescent_state(), then do a	1906	* disabled, we do one pass of force_quiescent_state(), then do a
1512	* invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked	1907	* invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
1513	* later. The per-cpu rcu_dyntick_drain variable controls the sequencing.	1908	* later. The per-cpu rcu_dyntick_drain variable controls the sequencing.
1514	*/	1909	*/
1515	int rcu_needs_cpu(int cpu)	1910	int rcu_needs_cpu(int cpu)
@@ -1560,7 +1955,7 @@ int rcu_needs_cpu(int cpu)
1560		1955
1561	/* If RCU callbacks are still pending, RCU still needs this CPU. */	1956	/* If RCU callbacks are still pending, RCU still needs this CPU. */
1562	if (c)	1957	if (c)
1563	invoke_rcu_cpu_kthread();	1958	invoke_rcu_core();
1564	return c;	1959	return c;
1565	}	1960	}
1566		1961