aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-10-27 12:15:54 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-10-28 16:24:13 -0400
commitd7e29933969e5ca7c112ce1368a07911f4485dc2 (patch)
tree435005d019d5b815b16a54a28d376f81a05164ab /kernel
parentb2c4623dcd07af4b8ae3b56ae5f879e281c7b4f8 (diff)
rcu: Make rcu_barrier() understand about missing rcuo kthreads
Commit 35ce7f29a44a (rcu: Create rcuo kthreads only for onlined CPUs) avoids creating rcuo kthreads for CPUs that never come online. This fixes a bug in many instances of firmware: Instead of lying about their age, these systems instead lie about the number of CPUs that they have. Before commit 35ce7f29a44a, this could result in huge numbers of useless rcuo kthreads being created. It appears that experience indicates that I should have told the people suffering from this problem to fix their broken firmware, but I instead produced what turned out to be a partial fix. The missing piece supplied by this commit makes sure that rcu_barrier() knows not to post callbacks for no-CBs CPUs that have not yet come online, because otherwise rcu_barrier() will hang on systems having firmware that lies about the number of CPUs. It is tempting to simply have rcu_barrier() refuse to post a callback on any no-CBs CPU that does not have an rcuo kthread. This unfortunately does not work because rcu_barrier() is required to wait for all pending callbacks. It is therefore required to wait even for those callbacks that cannot possibly be invoked. Even if doing so hangs the system. Given that posting a callback to a no-CBs CPU that does not yet have an rcuo kthread can hang rcu_barrier(), It is tempting to report an error in this case. Unfortunately, this will result in false positives at boot time, when it is perfectly legal to post callbacks to the boot CPU before the scheduler has started, in other words, before it is legal to invoke rcu_barrier(). So this commit instead has rcu_barrier() avoid posting callbacks to CPUs having neither rcuo kthread nor pending callbacks, and has it complain bitterly if it finds CPUs having no rcuo kthread but some pending callbacks. And when rcu_barrier() does find CPUs having no rcuo kthread but pending callbacks, as noted earlier, it has no choice but to hang indefinitely. Reported-by: Yanko Kaneti <yaneti@declera.com> Reported-by: Jay Vosburgh <jay.vosburgh@canonical.com> Reported-by: Meelis Roos <mroos@linux.ee> Reported-by: Eric B Munson <emunson@akamai.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Tested-by: Eric B Munson <emunson@akamai.com> Tested-by: Jay Vosburgh <jay.vosburgh@canonical.com> Tested-by: Yanko Kaneti <yaneti@declera.com> Tested-by: Kevin Fenzi <kevin@scrye.com> Tested-by: Meelis Roos <mroos@linux.ee>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcu/tree.c15
-rw-r--r--kernel/rcu/tree.h1
-rw-r--r--kernel/rcu/tree_plugin.h33
3 files changed, 44 insertions, 5 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 133e47223095..9815447d22e0 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3299,11 +3299,16 @@ static void _rcu_barrier(struct rcu_state *rsp)
3299 continue; 3299 continue;
3300 rdp = per_cpu_ptr(rsp->rda, cpu); 3300 rdp = per_cpu_ptr(rsp->rda, cpu);
3301 if (rcu_is_nocb_cpu(cpu)) { 3301 if (rcu_is_nocb_cpu(cpu)) {
3302 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, 3302 if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) {
3303 rsp->n_barrier_done); 3303 _rcu_barrier_trace(rsp, "OfflineNoCB", cpu,
3304 atomic_inc(&rsp->barrier_cpu_count); 3304 rsp->n_barrier_done);
3305 __call_rcu(&rdp->barrier_head, rcu_barrier_callback, 3305 } else {
3306 rsp, cpu, 0); 3306 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
3307 rsp->n_barrier_done);
3308 atomic_inc(&rsp->barrier_cpu_count);
3309 __call_rcu(&rdp->barrier_head,
3310 rcu_barrier_callback, rsp, cpu, 0);
3311 }
3307 } else if (ACCESS_ONCE(rdp->qlen)) { 3312 } else if (ACCESS_ONCE(rdp->qlen)) {
3308 _rcu_barrier_trace(rsp, "OnlineQ", cpu, 3313 _rcu_barrier_trace(rsp, "OnlineQ", cpu,
3309 rsp->n_barrier_done); 3314 rsp->n_barrier_done);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index d03764652d91..bbdc45d8d74f 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -587,6 +587,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
587static void print_cpu_stall_info_end(void); 587static void print_cpu_stall_info_end(void);
588static void zero_cpu_stall_ticks(struct rcu_data *rdp); 588static void zero_cpu_stall_ticks(struct rcu_data *rdp);
589static void increment_cpu_stall_ticks(void); 589static void increment_cpu_stall_ticks(void);
590static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
590static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); 591static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
591static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); 592static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
592static void rcu_init_one_nocb(struct rcu_node *rnp); 593static void rcu_init_one_nocb(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 387dd4599344..c1d7f27bd38f 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2050,6 +2050,33 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
2050} 2050}
2051 2051
2052/* 2052/*
2053 * Does the specified CPU need an RCU callback for the specified flavor
2054 * of rcu_barrier()?
2055 */
2056static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
2057{
2058 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2059 struct rcu_head *rhp;
2060
2061 /* No-CBs CPUs might have callbacks on any of three lists. */
2062 rhp = ACCESS_ONCE(rdp->nocb_head);
2063 if (!rhp)
2064 rhp = ACCESS_ONCE(rdp->nocb_gp_head);
2065 if (!rhp)
2066 rhp = ACCESS_ONCE(rdp->nocb_follower_head);
2067
2068 /* Having no rcuo kthread but CBs after scheduler starts is bad! */
2069 if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp) {
2070 /* RCU callback enqueued before CPU first came online??? */
2071 pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n",
2072 cpu, rhp->func);
2073 WARN_ON_ONCE(1);
2074 }
2075
2076 return !!rhp;
2077}
2078
2079/*
2053 * Enqueue the specified string of rcu_head structures onto the specified 2080 * Enqueue the specified string of rcu_head structures onto the specified
2054 * CPU's no-CBs lists. The CPU is specified by rdp, the head of the 2081 * CPU's no-CBs lists. The CPU is specified by rdp, the head of the
2055 * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy 2082 * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy
@@ -2642,6 +2669,12 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
2642 2669
2643#else /* #ifdef CONFIG_RCU_NOCB_CPU */ 2670#else /* #ifdef CONFIG_RCU_NOCB_CPU */
2644 2671
2672static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
2673{
2674 WARN_ON_ONCE(1); /* Should be dead code. */
2675 return false;
2676}
2677
2645static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) 2678static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
2646{ 2679{
2647} 2680}