diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-03-08 17:52:27 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-03-20 11:27:33 -0400 |
commit | a77da14ce9afb338040b405f6ab8afddc310411d (patch) | |
tree | 49c0ee20b6d871ef64f565e67c8ee2503f84d38a /kernel/rcu | |
parent | 5c60d25fa1b22fdcf141f8006d31c32b08db7311 (diff) |
rcu: Yet another fix for preemption and CPU hotplug
As noted earlier, the following sequence of events can occur when
running PREEMPT_RCU and HOTPLUG_CPU on a system with a multi-level
rcu_node combining tree:
1. A group of tasks block on CPUs corresponding to a given leaf
rcu_node structure while within RCU read-side critical sections.
2. All CPUs corrsponding to that rcu_node structure go offline.
3. The next grace period starts, but because there are still tasks
blocked, the upper-level bits corresponding to this leaf rcu_node
structure remain set.
4. All the tasks exit their RCU read-side critical sections and
remove themselves from the leaf rcu_node structure's list,
leaving it empty.
5. But because there now is code to check for this condition at
force-quiescent-state time, the upper bits are cleared and the
grace period completes.
However, there is another complication that can occur following step 4 above:
4a. The grace period starts, and the leaf rcu_node structure's
gp_tasks pointer is set to NULL because there are no tasks
blocked on this structure.
4b. One of the CPUs corresponding to the leaf rcu_node structure
comes back online.
4b. An endless stream of tasks are preempted within RCU read-side
critical sections on this CPU, such that the ->blkd_tasks
list is always non-empty.
The grace period will never end.
This commit therefore makes the force-quiescent-state processing check only
for absence of tasks blocking the current grace period rather than absence
of tasks altogether. This will cause a quiescent state to be reported if
the current leaf rcu_node structure is not blocking the current grace period
and its parent thinks that it is, regardless of how RCU managed to get
itself into this state.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org> # 4.0.x
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Diffstat (limited to 'kernel/rcu')
-rw-r--r-- | kernel/rcu/tree.c | 43 |
1 files changed, 27 insertions, 16 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 17b5abf999ca..b3684b284677 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -2199,8 +2199,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp, | |||
2199 | unsigned long mask; | 2199 | unsigned long mask; |
2200 | struct rcu_node *rnp_p; | 2200 | struct rcu_node *rnp_p; |
2201 | 2201 | ||
2202 | WARN_ON_ONCE(rsp == &rcu_bh_state || rsp == &rcu_sched_state); | 2202 | if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p || |
2203 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { | 2203 | rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { |
2204 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2204 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
2205 | return; /* Still need more quiescent states! */ | 2205 | return; /* Still need more quiescent states! */ |
2206 | } | 2206 | } |
@@ -2208,9 +2208,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp, | |||
2208 | rnp_p = rnp->parent; | 2208 | rnp_p = rnp->parent; |
2209 | if (rnp_p == NULL) { | 2209 | if (rnp_p == NULL) { |
2210 | /* | 2210 | /* |
2211 | * Either there is only one rcu_node in the tree, | 2211 | * Only one rcu_node structure in the tree, so don't |
2212 | * or tasks were kicked up to root rcu_node due to | 2212 | * try to report up to its nonexistent parent! |
2213 | * CPUs going offline. | ||
2214 | */ | 2213 | */ |
2215 | rcu_report_qs_rsp(rsp, flags); | 2214 | rcu_report_qs_rsp(rsp, flags); |
2216 | return; | 2215 | return; |
@@ -2713,8 +2712,29 @@ static void force_qs_rnp(struct rcu_state *rsp, | |||
2713 | return; | 2712 | return; |
2714 | } | 2713 | } |
2715 | if (rnp->qsmask == 0) { | 2714 | if (rnp->qsmask == 0) { |
2716 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ | 2715 | if (rcu_state_p == &rcu_sched_state || |
2717 | continue; | 2716 | rsp != rcu_state_p || |
2717 | rcu_preempt_blocked_readers_cgp(rnp)) { | ||
2718 | /* | ||
2719 | * No point in scanning bits because they | ||
2720 | * are all zero. But we might need to | ||
2721 | * priority-boost blocked readers. | ||
2722 | */ | ||
2723 | rcu_initiate_boost(rnp, flags); | ||
2724 | /* rcu_initiate_boost() releases rnp->lock */ | ||
2725 | continue; | ||
2726 | } | ||
2727 | if (rnp->parent && | ||
2728 | (rnp->parent->qsmask & rnp->grpmask)) { | ||
2729 | /* | ||
2730 | * Race between grace-period | ||
2731 | * initialization and task exiting RCU | ||
2732 | * read-side critical section: Report. | ||
2733 | */ | ||
2734 | rcu_report_unblock_qs_rnp(rsp, rnp, flags); | ||
2735 | /* rcu_report_unblock_qs_rnp() rlses ->lock */ | ||
2736 | continue; | ||
2737 | } | ||
2718 | } | 2738 | } |
2719 | cpu = rnp->grplo; | 2739 | cpu = rnp->grplo; |
2720 | bit = 1; | 2740 | bit = 1; |
@@ -2729,15 +2749,6 @@ static void force_qs_rnp(struct rcu_state *rsp, | |||
2729 | if (mask != 0) { | 2749 | if (mask != 0) { |
2730 | /* Idle/offline CPUs, report. */ | 2750 | /* Idle/offline CPUs, report. */ |
2731 | rcu_report_qs_rnp(mask, rsp, rnp, flags); | 2751 | rcu_report_qs_rnp(mask, rsp, rnp, flags); |
2732 | } else if (rnp->parent && | ||
2733 | list_empty(&rnp->blkd_tasks) && | ||
2734 | !rnp->qsmask && | ||
2735 | (rnp->parent->qsmask & rnp->grpmask)) { | ||
2736 | /* | ||
2737 | * Race between grace-period initialization and task | ||
2738 | * existing RCU read-side critical section, report. | ||
2739 | */ | ||
2740 | rcu_report_unblock_qs_rnp(rsp, rnp, flags); | ||
2741 | } else { | 2752 | } else { |
2742 | /* Nothing to do here, so just drop the lock. */ | 2753 | /* Nothing to do here, so just drop the lock. */ |
2743 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2754 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |