From 9a432736904d386cda28b987b38ba14dae960ecc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 30 May 2011 20:38:55 -0700 Subject: rcu: Simplify curing of load woes Make the functions creating the kthreads wake them up. Leverage the fact that the per-node and boost kthreads can run anywhere, thus dispensing with the need to wake them up once the incoming CPU has gone fully online. Signed-off-by: Paul E. McKenney Tested-by: Daniel J Blueman --- kernel/rcutree.c | 65 ++++++++++++++++++-------------------------------------- 1 file changed, 21 insertions(+), 44 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 89419ff92e99..0a8ec5b2e208 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1635,6 +1635,20 @@ static int rcu_cpu_kthread(void *arg) * to manipulate rcu_cpu_kthread_task. There might be another CPU * attempting to access it during boot, but the locking in kthread_bind() * will enforce sufficient ordering. + * + * Please note that we cannot simply refuse to wake up the per-CPU + * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, + * which can result in softlockup complaints if the task ends up being + * idle for more than a couple of minutes. + * + * However, please note also that we cannot bind the per-CPU kthread to its + * CPU until that CPU is fully online. We also cannot wait until the + * CPU is fully online before we create its per-CPU kthread, as this would + * deadlock the system when CPU notifiers tried waiting for grace + * periods. So we bind the per-CPU kthread to its CPU only if the CPU + * is online. If its CPU is not yet fully online, then the code in + * rcu_cpu_kthread() will wait until it is fully online, and then do + * the binding. */ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) { @@ -1647,12 +1661,14 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); if (IS_ERR(t)) return PTR_ERR(t); - kthread_bind(t, cpu); + if (cpu_online(cpu)) + kthread_bind(t, cpu); per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); - per_cpu(rcu_cpu_kthread_task, cpu) = t; sp.sched_priority = RCU_KTHREAD_PRIO; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + per_cpu(rcu_cpu_kthread_task, cpu) = t; + wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ return 0; } @@ -1759,12 +1775,11 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = 99; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ } return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); } -static void rcu_wake_one_boost_kthread(struct rcu_node *rnp); - /* * Spawn all kthreads -- called as soon as the scheduler is running. */ @@ -1772,30 +1787,18 @@ static int __init rcu_spawn_kthreads(void) { int cpu; struct rcu_node *rnp; - struct task_struct *t; rcu_kthreads_spawnable = 1; for_each_possible_cpu(cpu) { per_cpu(rcu_cpu_has_work, cpu) = 0; - if (cpu_online(cpu)) { + if (cpu_online(cpu)) (void)rcu_spawn_one_cpu_kthread(cpu); - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t) - wake_up_process(t); - } } rnp = rcu_get_root(rcu_state); (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - if (rnp->node_kthread_task) - wake_up_process(rnp->node_kthread_task); if (NUM_RCU_NODES > 1) { - rcu_for_each_leaf_node(rcu_state, rnp) { + rcu_for_each_leaf_node(rcu_state, rnp) (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - t = rnp->node_kthread_task; - if (t) - wake_up_process(t); - rcu_wake_one_boost_kthread(rnp); - } } return 0; } @@ -2220,31 +2223,6 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) } } -/* - * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state, - * but the RCU threads are woken on demand, and if demand is low this - * could be a while triggering the hung task watchdog. - * - * In order to avoid this, poke all tasks once the CPU is fully - * up and running. - */ -static void __cpuinit rcu_online_kthreads(int cpu) -{ - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); - struct rcu_node *rnp = rdp->mynode; - struct task_struct *t; - - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t) - wake_up_process(t); - - t = rnp->node_kthread_task; - if (t) - wake_up_process(t); - - rcu_wake_one_boost_kthread(rnp); -} - /* * Handle CPU online/offline notification events. */ @@ -2262,7 +2240,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, rcu_prepare_kthreads(cpu); break; case CPU_ONLINE: - rcu_online_kthreads(cpu); case CPU_DOWN_FAILED: rcu_node_kthread_setaffinity(rnp, -1); rcu_cpu_kthread_setrt(cpu, 1); -- cgit v1.2.2 From 09223371deac67d08ca0b70bd18787920284c967 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 14 Jun 2011 13:26:25 +0800 Subject: rcu: Use softirq to address performance regression Commit a26ac2455ffcf3(rcu: move TREE_RCU from softirq to kthread) introduced performance regression. In an AIM7 test, this commit degraded performance by about 40%. The commit runs rcu callbacks in a kthread instead of softirq. We observed high rate of context switch which is caused by this. Out test system has 64 CPUs and HZ is 1000, so we saw more than 64k context switch per second which is caused by RCU's per-CPU kthread. A trace showed that most of the time the RCU per-CPU kthread doesn't actually handle any callbacks, but instead just does a very small amount of work handling grace periods. This means that RCU's per-CPU kthreads are making the scheduler do quite a bit of work in order to allow a very small amount of RCU-related processing to be done. Alex Shi's analysis determined that this slowdown is due to lock contention within the scheduler. Unfortunately, as Peter Zijlstra points out, the scheduler's real-time semantics require global action, which means that this contention is inherent in real-time scheduling. (Yes, perhaps someone will come up with a workaround -- otherwise, -rt is not going to do well on large SMP systems -- but this patch will work around this issue in the meantime. And "the meantime" might well be forever.) This patch therefore re-introduces softirq processing to RCU, but only for core RCU work. RCU callbacks are still executed in kthread context, so that only a small amount of RCU work runs in softirq context in the common case. This should minimize ksoftirqd execution, allowing us to skip boosting of ksoftirqd for CONFIG_RCU_BOOST=y kernels. Signed-off-by: Shaohua Li Tested-by: "Alex,Shi" Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0a8ec5b2e208..ae5c9ea68662 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -100,6 +100,7 @@ static char rcu_kthreads_spawnable; static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); static void invoke_rcu_cpu_kthread(void); +static void __invoke_rcu_cpu_kthread(void); #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ @@ -1442,13 +1443,21 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) } /* If there are callbacks ready, invoke them. */ - rcu_do_batch(rsp, rdp); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + __invoke_rcu_cpu_kthread(); +} + +static void rcu_kthread_do_work(void) +{ + rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); + rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + rcu_preempt_do_callbacks(); } /* * Do softirq processing for the current CPU. */ -static void rcu_process_callbacks(void) +static void rcu_process_callbacks(struct softirq_action *unused) { __rcu_process_callbacks(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); @@ -1465,7 +1474,7 @@ static void rcu_process_callbacks(void) * the current CPU with interrupts disabled, the rcu_cpu_kthread_task * cannot disappear out from under us. */ -static void invoke_rcu_cpu_kthread(void) +static void __invoke_rcu_cpu_kthread(void) { unsigned long flags; @@ -1479,6 +1488,11 @@ static void invoke_rcu_cpu_kthread(void) local_irq_restore(flags); } +static void invoke_rcu_cpu_kthread(void) +{ + raise_softirq(RCU_SOFTIRQ); +} + /* * Wake up the specified per-rcu_node-structure kthread. * Because the per-rcu_node kthreads are immortal, we don't need @@ -1613,7 +1627,7 @@ static int rcu_cpu_kthread(void *arg) *workp = 0; local_irq_restore(flags); if (work) - rcu_process_callbacks(); + rcu_kthread_do_work(); local_bh_enable(); if (*workp != 0) spincnt++; @@ -2387,6 +2401,7 @@ void __init rcu_init(void) rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data); __rcu_init_preempt(); + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); /* * We don't need protection against CPU-hotplug here because -- cgit v1.2.2 From a46e0899eec7a3069bcadd45dfba7bf67c6ed016 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 15 Jun 2011 15:47:09 -0700 Subject: rcu: use softirq instead of kthreads except when RCU_BOOST=y This patch #ifdefs RCU kthreads out of the kernel unless RCU_BOOST=y, thus eliminating context-switch overhead if RCU priority boosting has not been configured. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 59 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 15 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ae5c9ea68662..429d4949f0eb 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -87,6 +87,8 @@ static struct rcu_state *rcu_state; int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#ifdef CONFIG_RCU_BOOST + /* * Control variables for per-CPU and per-rcu_node kthreads. These * handle all flavors of RCU. @@ -98,9 +100,11 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DEFINE_PER_CPU(char, rcu_cpu_has_work); static char rcu_kthreads_spawnable; +#endif /* #ifdef CONFIG_RCU_BOOST */ + static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); -static void invoke_rcu_cpu_kthread(void); -static void __invoke_rcu_cpu_kthread(void); +static void invoke_rcu_core(void); +static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ @@ -1089,6 +1093,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) int need_report = 0; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp; +#ifdef CONFIG_RCU_BOOST struct task_struct *t; /* Stop the CPU's kthread. */ @@ -1097,6 +1102,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) per_cpu(rcu_cpu_kthread_task, cpu) = NULL; kthread_stop(t); } +#endif /* #ifdef CONFIG_RCU_BOOST */ /* Exclude any attempts to start a new grace period. */ raw_spin_lock_irqsave(&rsp->onofflock, flags); @@ -1232,7 +1238,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) /* Re-raise the RCU softirq if there are callbacks remaining. */ if (cpu_has_callbacks_ready_to_invoke(rdp)) - invoke_rcu_cpu_kthread(); + invoke_rcu_core(); } /* @@ -1278,7 +1284,7 @@ void rcu_check_callbacks(int cpu, int user) } rcu_preempt_check_callbacks(cpu); if (rcu_pending(cpu)) - invoke_rcu_cpu_kthread(); + invoke_rcu_core(); } #ifdef CONFIG_SMP @@ -1444,9 +1450,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) /* If there are callbacks ready, invoke them. */ if (cpu_has_callbacks_ready_to_invoke(rdp)) - __invoke_rcu_cpu_kthread(); + invoke_rcu_callbacks(rsp, rdp); } +#ifdef CONFIG_RCU_BOOST + static void rcu_kthread_do_work(void) { rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); @@ -1454,6 +1462,8 @@ static void rcu_kthread_do_work(void) rcu_preempt_do_callbacks(); } +#endif /* #ifdef CONFIG_RCU_BOOST */ + /* * Do softirq processing for the current CPU. */ @@ -1474,25 +1484,22 @@ static void rcu_process_callbacks(struct softirq_action *unused) * the current CPU with interrupts disabled, the rcu_cpu_kthread_task * cannot disappear out from under us. */ -static void __invoke_rcu_cpu_kthread(void) +static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) { - unsigned long flags; - - local_irq_save(flags); - __this_cpu_write(rcu_cpu_has_work, 1); - if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { - local_irq_restore(flags); + if (likely(!rsp->boost)) { + rcu_do_batch(rsp, rdp); return; } - wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); - local_irq_restore(flags); + invoke_rcu_callbacks_kthread(); } -static void invoke_rcu_cpu_kthread(void) +static void invoke_rcu_core(void) { raise_softirq(RCU_SOFTIRQ); } +#ifdef CONFIG_RCU_BOOST + /* * Wake up the specified per-rcu_node-structure kthread. * Because the per-rcu_node kthreads are immortal, we don't need @@ -1818,6 +1825,18 @@ static int __init rcu_spawn_kthreads(void) } early_initcall(rcu_spawn_kthreads); +#else /* #ifdef CONFIG_RCU_BOOST */ + +static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) +{ +} + +static void rcu_cpu_kthread_setrt(int cpu, int to_rt) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_BOOST */ + static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_state *rsp) @@ -2224,6 +2243,8 @@ static void __cpuinit rcu_prepare_cpu(int cpu) rcu_preempt_init_percpu_data(cpu); } +#ifdef CONFIG_RCU_BOOST + static void __cpuinit rcu_prepare_kthreads(int cpu) { struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); @@ -2237,6 +2258,14 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) } } +#else /* #ifdef CONFIG_RCU_BOOST */ + +static void __cpuinit rcu_prepare_kthreads(int cpu) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_BOOST */ + /* * Handle CPU online/offline notification events. */ -- cgit v1.2.2 From f8b7fc6b514f34a51875dd48dff70d4d17a54f38 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 16 Jun 2011 08:26:32 -0700 Subject: rcu: Move RCU_BOOST #ifdefs to header file The commit "use softirq instead of kthreads except when RCU_BOOST=y" just applied #ifdef in place. This commit is a cleanup that moves the newly #ifdef'ed code to the header file kernel/rcutree_plugin.h. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 383 +------------------------------------------------------ 1 file changed, 1 insertion(+), 382 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 429d4949f0eb..7e59ffb3d0ba 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1093,16 +1093,8 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) int need_report = 0; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp; -#ifdef CONFIG_RCU_BOOST - struct task_struct *t; - /* Stop the CPU's kthread. */ - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t != NULL) { - per_cpu(rcu_cpu_kthread_task, cpu) = NULL; - kthread_stop(t); - } -#endif /* #ifdef CONFIG_RCU_BOOST */ + rcu_stop_cpu_kthread(cpu); /* Exclude any attempts to start a new grace period. */ raw_spin_lock_irqsave(&rsp->onofflock, flags); @@ -1453,17 +1445,6 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) invoke_rcu_callbacks(rsp, rdp); } -#ifdef CONFIG_RCU_BOOST - -static void rcu_kthread_do_work(void) -{ - rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); - rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); - rcu_preempt_do_callbacks(); -} - -#endif /* #ifdef CONFIG_RCU_BOOST */ - /* * Do softirq processing for the current CPU. */ @@ -1498,345 +1479,6 @@ static void invoke_rcu_core(void) raise_softirq(RCU_SOFTIRQ); } -#ifdef CONFIG_RCU_BOOST - -/* - * Wake up the specified per-rcu_node-structure kthread. - * Because the per-rcu_node kthreads are immortal, we don't need - * to do anything to keep them alive. - */ -static void invoke_rcu_node_kthread(struct rcu_node *rnp) -{ - struct task_struct *t; - - t = rnp->node_kthread_task; - if (t != NULL) - wake_up_process(t); -} - -/* - * Set the specified CPU's kthread to run RT or not, as specified by - * the to_rt argument. The CPU-hotplug locks are held, so the task - * is not going away. - */ -static void rcu_cpu_kthread_setrt(int cpu, int to_rt) -{ - int policy; - struct sched_param sp; - struct task_struct *t; - - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t == NULL) - return; - if (to_rt) { - policy = SCHED_FIFO; - sp.sched_priority = RCU_KTHREAD_PRIO; - } else { - policy = SCHED_NORMAL; - sp.sched_priority = 0; - } - sched_setscheduler_nocheck(t, policy, &sp); -} - -/* - * Timer handler to initiate the waking up of per-CPU kthreads that - * have yielded the CPU due to excess numbers of RCU callbacks. - * We wake up the per-rcu_node kthread, which in turn will wake up - * the booster kthread. - */ -static void rcu_cpu_kthread_timer(unsigned long arg) -{ - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); - struct rcu_node *rnp = rdp->mynode; - - atomic_or(rdp->grpmask, &rnp->wakemask); - invoke_rcu_node_kthread(rnp); -} - -/* - * Drop to non-real-time priority and yield, but only after posting a - * timer that will cause us to regain our real-time priority if we - * remain preempted. Either way, we restore our real-time priority - * before returning. - */ -static void rcu_yield(void (*f)(unsigned long), unsigned long arg) -{ - struct sched_param sp; - struct timer_list yield_timer; - - setup_timer_on_stack(&yield_timer, f, arg); - mod_timer(&yield_timer, jiffies + 2); - sp.sched_priority = 0; - sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); - set_user_nice(current, 19); - schedule(); - sp.sched_priority = RCU_KTHREAD_PRIO; - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); - del_timer(&yield_timer); -} - -/* - * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. - * This can happen while the corresponding CPU is either coming online - * or going offline. We cannot wait until the CPU is fully online - * before starting the kthread, because the various notifier functions - * can wait for RCU grace periods. So we park rcu_cpu_kthread() until - * the corresponding CPU is online. - * - * Return 1 if the kthread needs to stop, 0 otherwise. - * - * Caller must disable bh. This function can momentarily enable it. - */ -static int rcu_cpu_kthread_should_stop(int cpu) -{ - while (cpu_is_offline(cpu) || - !cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)) || - smp_processor_id() != cpu) { - if (kthread_should_stop()) - return 1; - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; - per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); - local_bh_enable(); - schedule_timeout_uninterruptible(1); - if (!cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu))) - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - local_bh_disable(); - } - per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; - return 0; -} - -/* - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the - * earlier RCU softirq. - */ -static int rcu_cpu_kthread(void *arg) -{ - int cpu = (int)(long)arg; - unsigned long flags; - int spincnt = 0; - unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); - char work; - char *workp = &per_cpu(rcu_cpu_has_work, cpu); - - for (;;) { - *statusp = RCU_KTHREAD_WAITING; - rcu_wait(*workp != 0 || kthread_should_stop()); - local_bh_disable(); - if (rcu_cpu_kthread_should_stop(cpu)) { - local_bh_enable(); - break; - } - *statusp = RCU_KTHREAD_RUNNING; - per_cpu(rcu_cpu_kthread_loops, cpu)++; - local_irq_save(flags); - work = *workp; - *workp = 0; - local_irq_restore(flags); - if (work) - rcu_kthread_do_work(); - local_bh_enable(); - if (*workp != 0) - spincnt++; - else - spincnt = 0; - if (spincnt > 10) { - *statusp = RCU_KTHREAD_YIELDING; - rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); - spincnt = 0; - } - } - *statusp = RCU_KTHREAD_STOPPED; - return 0; -} - -/* - * Spawn a per-CPU kthread, setting up affinity and priority. - * Because the CPU hotplug lock is held, no other CPU will be attempting - * to manipulate rcu_cpu_kthread_task. There might be another CPU - * attempting to access it during boot, but the locking in kthread_bind() - * will enforce sufficient ordering. - * - * Please note that we cannot simply refuse to wake up the per-CPU - * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, - * which can result in softlockup complaints if the task ends up being - * idle for more than a couple of minutes. - * - * However, please note also that we cannot bind the per-CPU kthread to its - * CPU until that CPU is fully online. We also cannot wait until the - * CPU is fully online before we create its per-CPU kthread, as this would - * deadlock the system when CPU notifiers tried waiting for grace - * periods. So we bind the per-CPU kthread to its CPU only if the CPU - * is online. If its CPU is not yet fully online, then the code in - * rcu_cpu_kthread() will wait until it is fully online, and then do - * the binding. - */ -static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) -{ - struct sched_param sp; - struct task_struct *t; - - if (!rcu_kthreads_spawnable || - per_cpu(rcu_cpu_kthread_task, cpu) != NULL) - return 0; - t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); - if (IS_ERR(t)) - return PTR_ERR(t); - if (cpu_online(cpu)) - kthread_bind(t, cpu); - per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; - WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); - sp.sched_priority = RCU_KTHREAD_PRIO; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - per_cpu(rcu_cpu_kthread_task, cpu) = t; - wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ - return 0; -} - -/* - * Per-rcu_node kthread, which is in charge of waking up the per-CPU - * kthreads when needed. We ignore requests to wake up kthreads - * for offline CPUs, which is OK because force_quiescent_state() - * takes care of this case. - */ -static int rcu_node_kthread(void *arg) -{ - int cpu; - unsigned long flags; - unsigned long mask; - struct rcu_node *rnp = (struct rcu_node *)arg; - struct sched_param sp; - struct task_struct *t; - - for (;;) { - rnp->node_kthread_status = RCU_KTHREAD_WAITING; - rcu_wait(atomic_read(&rnp->wakemask) != 0); - rnp->node_kthread_status = RCU_KTHREAD_RUNNING; - raw_spin_lock_irqsave(&rnp->lock, flags); - mask = atomic_xchg(&rnp->wakemask, 0); - rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { - if ((mask & 0x1) == 0) - continue; - preempt_disable(); - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (!cpu_online(cpu) || t == NULL) { - preempt_enable(); - continue; - } - per_cpu(rcu_cpu_has_work, cpu) = 1; - sp.sched_priority = RCU_KTHREAD_PRIO; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - preempt_enable(); - } - } - /* NOTREACHED */ - rnp->node_kthread_status = RCU_KTHREAD_STOPPED; - return 0; -} - -/* - * Set the per-rcu_node kthread's affinity to cover all CPUs that are - * served by the rcu_node in question. The CPU hotplug lock is still - * held, so the value of rnp->qsmaskinit will be stable. - * - * We don't include outgoingcpu in the affinity set, use -1 if there is - * no outgoing CPU. If there are no CPUs left in the affinity set, - * this function allows the kthread to execute on any CPU. - */ -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) -{ - cpumask_var_t cm; - int cpu; - unsigned long mask = rnp->qsmaskinit; - - if (rnp->node_kthread_task == NULL) - return; - if (!alloc_cpumask_var(&cm, GFP_KERNEL)) - return; - cpumask_clear(cm); - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) - if ((mask & 0x1) && cpu != outgoingcpu) - cpumask_set_cpu(cpu, cm); - if (cpumask_weight(cm) == 0) { - cpumask_setall(cm); - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) - cpumask_clear_cpu(cpu, cm); - WARN_ON_ONCE(cpumask_weight(cm) == 0); - } - set_cpus_allowed_ptr(rnp->node_kthread_task, cm); - rcu_boost_kthread_setaffinity(rnp, cm); - free_cpumask_var(cm); -} - -/* - * Spawn a per-rcu_node kthread, setting priority and affinity. - * Called during boot before online/offline can happen, or, if - * during runtime, with the main CPU-hotplug locks held. So only - * one of these can be executing at a time. - */ -static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, - struct rcu_node *rnp) -{ - unsigned long flags; - int rnp_index = rnp - &rsp->node[0]; - struct sched_param sp; - struct task_struct *t; - - if (!rcu_kthreads_spawnable || - rnp->qsmaskinit == 0) - return 0; - if (rnp->node_kthread_task == NULL) { - t = kthread_create(rcu_node_kthread, (void *)rnp, - "rcun%d", rnp_index); - if (IS_ERR(t)) - return PTR_ERR(t); - raw_spin_lock_irqsave(&rnp->lock, flags); - rnp->node_kthread_task = t; - raw_spin_unlock_irqrestore(&rnp->lock, flags); - sp.sched_priority = 99; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ - } - return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); -} - -/* - * Spawn all kthreads -- called as soon as the scheduler is running. - */ -static int __init rcu_spawn_kthreads(void) -{ - int cpu; - struct rcu_node *rnp; - - rcu_kthreads_spawnable = 1; - for_each_possible_cpu(cpu) { - per_cpu(rcu_cpu_has_work, cpu) = 0; - if (cpu_online(cpu)) - (void)rcu_spawn_one_cpu_kthread(cpu); - } - rnp = rcu_get_root(rcu_state); - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - if (NUM_RCU_NODES > 1) { - rcu_for_each_leaf_node(rcu_state, rnp) - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - } - return 0; -} -early_initcall(rcu_spawn_kthreads); - -#else /* #ifdef CONFIG_RCU_BOOST */ - -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) -{ -} - -static void rcu_cpu_kthread_setrt(int cpu, int to_rt) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_BOOST */ - static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_state *rsp) @@ -2243,29 +1885,6 @@ static void __cpuinit rcu_prepare_cpu(int cpu) rcu_preempt_init_percpu_data(cpu); } -#ifdef CONFIG_RCU_BOOST - -static void __cpuinit rcu_prepare_kthreads(int cpu) -{ - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); - struct rcu_node *rnp = rdp->mynode; - - /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ - if (rcu_kthreads_spawnable) { - (void)rcu_spawn_one_cpu_kthread(cpu); - if (rnp->node_kthread_task == NULL) - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - } -} - -#else /* #ifdef CONFIG_RCU_BOOST */ - -static void __cpuinit rcu_prepare_kthreads(int cpu) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_BOOST */ - /* * Handle CPU online/offline notification events. */ -- cgit v1.2.2 From b0d304172f49061b4ff78f9e2b02719ac69c8a7e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 10 Jul 2011 15:57:35 -0700 Subject: rcu: Prevent RCU callbacks from executing before scheduler initialized Under some rare but real combinations of configuration parameters, RCU callbacks are posted during early boot that use kernel facilities that are not yet initialized. Therefore, when these callbacks are invoked, hard hangs and crashes ensue. This commit therefore prevents RCU callbacks from being invoked until after the scheduler is fully up and running, as in after multiple tasks have been spawned. It might well turn out that a better approach is to identify the specific RCU callbacks that are causing this problem, but that discussion will wait until such time as someone really needs an RCU callback to be invoked (as opposed to merely registered) during early boot. Reported-by: julie Sullivan Reported-by: RKK Signed-off-by: Paul E. McKenney Tested-by: Konrad Rzeszutek Wilk Tested-by: julie Sullivan Tested-by: RKK --- kernel/rcutree.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 7e59ffb3d0ba..ba06207b1dd3 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -84,9 +84,32 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; +/* + * The rcu_scheduler_active variable transitions from zero to one just + * before the first task is spawned. So when this variable is zero, RCU + * can assume that there is but one task, allowing RCU to (for example) + * optimized synchronize_sched() to a simple barrier(). When this variable + * is one, RCU must actually do all the hard work required to detect real + * grace periods. This variable is also used to suppress boot-time false + * positives from lockdep-RCU error checking. + */ int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); +/* + * The rcu_scheduler_fully_active variable transitions from zero to one + * during the early_initcall() processing, which is after the scheduler + * is capable of creating new tasks. So RCU processing (for example, + * creating tasks for RCU priority boosting) must be delayed until after + * rcu_scheduler_fully_active transitions from zero to one. We also + * currently delay invocation of any RCU callbacks until after this point. + * + * It might later prove better for people registering RCU callbacks during + * early boot to take responsibility for these callbacks, but one step at + * a time. + */ +static int rcu_scheduler_fully_active __read_mostly; + #ifdef CONFIG_RCU_BOOST /* @@ -98,7 +121,6 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DEFINE_PER_CPU(char, rcu_cpu_has_work); -static char rcu_kthreads_spawnable; #endif /* #ifdef CONFIG_RCU_BOOST */ @@ -1467,6 +1489,8 @@ static void rcu_process_callbacks(struct softirq_action *unused) */ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) { + if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active))) + return; if (likely(!rsp->boost)) { rcu_do_batch(rsp, rdp); return; -- cgit v1.2.2