summaryrefslogtreecommitdiffstats
path: root/kernel/rcu
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2015-03-08 17:52:27 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2015-03-20 11:27:33 -0400
commita77da14ce9afb338040b405f6ab8afddc310411d (patch)
tree49c0ee20b6d871ef64f565e67c8ee2503f84d38a /kernel/rcu
parent5c60d25fa1b22fdcf141f8006d31c32b08db7311 (diff)
rcu: Yet another fix for preemption and CPU hotplug
As noted earlier, the following sequence of events can occur when running PREEMPT_RCU and HOTPLUG_CPU on a system with a multi-level rcu_node combining tree: 1. A group of tasks block on CPUs corresponding to a given leaf rcu_node structure while within RCU read-side critical sections. 2. All CPUs corrsponding to that rcu_node structure go offline. 3. The next grace period starts, but because there are still tasks blocked, the upper-level bits corresponding to this leaf rcu_node structure remain set. 4. All the tasks exit their RCU read-side critical sections and remove themselves from the leaf rcu_node structure's list, leaving it empty. 5. But because there now is code to check for this condition at force-quiescent-state time, the upper bits are cleared and the grace period completes. However, there is another complication that can occur following step 4 above: 4a. The grace period starts, and the leaf rcu_node structure's gp_tasks pointer is set to NULL because there are no tasks blocked on this structure. 4b. One of the CPUs corresponding to the leaf rcu_node structure comes back online. 4b. An endless stream of tasks are preempted within RCU read-side critical sections on this CPU, such that the ->blkd_tasks list is always non-empty. The grace period will never end. This commit therefore makes the force-quiescent-state processing check only for absence of tasks blocking the current grace period rather than absence of tasks altogether. This will cause a quiescent state to be reported if the current leaf rcu_node structure is not blocking the current grace period and its parent thinks that it is, regardless of how RCU managed to get itself into this state. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: <stable@vger.kernel.org> # 4.0.x Tested-by: Sasha Levin <sasha.levin@oracle.com>
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/tree.c43
1 files changed, 27 insertions, 16 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 17b5abf999ca..b3684b284677 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2199,8 +2199,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
2199 unsigned long mask; 2199 unsigned long mask;
2200 struct rcu_node *rnp_p; 2200 struct rcu_node *rnp_p;
2201 2201
2202 WARN_ON_ONCE(rsp == &rcu_bh_state || rsp == &rcu_sched_state); 2202 if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p ||
2203 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 2203 rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
2204 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2204 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2205 return; /* Still need more quiescent states! */ 2205 return; /* Still need more quiescent states! */
2206 } 2206 }
@@ -2208,9 +2208,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
2208 rnp_p = rnp->parent; 2208 rnp_p = rnp->parent;
2209 if (rnp_p == NULL) { 2209 if (rnp_p == NULL) {
2210 /* 2210 /*
2211 * Either there is only one rcu_node in the tree, 2211 * Only one rcu_node structure in the tree, so don't
2212 * or tasks were kicked up to root rcu_node due to 2212 * try to report up to its nonexistent parent!
2213 * CPUs going offline.
2214 */ 2213 */
2215 rcu_report_qs_rsp(rsp, flags); 2214 rcu_report_qs_rsp(rsp, flags);
2216 return; 2215 return;
@@ -2713,8 +2712,29 @@ static void force_qs_rnp(struct rcu_state *rsp,
2713 return; 2712 return;
2714 } 2713 }
2715 if (rnp->qsmask == 0) { 2714 if (rnp->qsmask == 0) {
2716 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ 2715 if (rcu_state_p == &rcu_sched_state ||
2717 continue; 2716 rsp != rcu_state_p ||
2717 rcu_preempt_blocked_readers_cgp(rnp)) {
2718 /*
2719 * No point in scanning bits because they
2720 * are all zero. But we might need to
2721 * priority-boost blocked readers.
2722 */
2723 rcu_initiate_boost(rnp, flags);
2724 /* rcu_initiate_boost() releases rnp->lock */
2725 continue;
2726 }
2727 if (rnp->parent &&
2728 (rnp->parent->qsmask & rnp->grpmask)) {
2729 /*
2730 * Race between grace-period
2731 * initialization and task exiting RCU
2732 * read-side critical section: Report.
2733 */
2734 rcu_report_unblock_qs_rnp(rsp, rnp, flags);
2735 /* rcu_report_unblock_qs_rnp() rlses ->lock */
2736 continue;
2737 }
2718 } 2738 }
2719 cpu = rnp->grplo; 2739 cpu = rnp->grplo;
2720 bit = 1; 2740 bit = 1;
@@ -2729,15 +2749,6 @@ static void force_qs_rnp(struct rcu_state *rsp,
2729 if (mask != 0) { 2749 if (mask != 0) {
2730 /* Idle/offline CPUs, report. */ 2750 /* Idle/offline CPUs, report. */
2731 rcu_report_qs_rnp(mask, rsp, rnp, flags); 2751 rcu_report_qs_rnp(mask, rsp, rnp, flags);
2732 } else if (rnp->parent &&
2733 list_empty(&rnp->blkd_tasks) &&
2734 !rnp->qsmask &&
2735 (rnp->parent->qsmask & rnp->grpmask)) {
2736 /*
2737 * Race between grace-period initialization and task
2738 * existing RCU read-side critical section, report.
2739 */
2740 rcu_report_unblock_qs_rnp(rsp, rnp, flags);
2741 } else { 2752 } else {
2742 /* Nothing to do here, so just drop the lock. */ 2753 /* Nothing to do here, so just drop the lock. */
2743 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2754 raw_spin_unlock_irqrestore(&rnp->lock, flags);