aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree.c
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2018-05-02 15:49:21 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2018-07-12 18:38:58 -0400
commit962aff03c315b508d980422db5b49b49e4382119 (patch)
tree72338d995f10969e9f626273465de65444b746a2 /kernel/rcu/tree.c
parent226ca5e76692e2c82c17e8e8eedab22043f6ffee (diff)
rcu: Clean up handling of tasks blocked across full-rcu_node offline
Commit 0aa04b055e71 ("rcu: Process offlining and onlining only at grace-period start") deferred handling of CPU-hotplug events until the start of the next grace period, but consider the following sequence of events: 1. A task is preempted within an RCU-preempt read-side critical section. 2. The CPU that this task was running on goes offline, along with all other CPUs sharing the corresponding leaf rcu_node structure. 3. The task resumes execution. 4. One of those CPUs comes back online before a new grace period starts. In step 2, the code in the next rcu_gp_init() invocation will (correctly) defer removing the leaf rcu_node structure from the upper-level bitmasks, and will (correctly) set that structure's ->wait_blkd_tasks field. During the ensuing interval, RCU will (correctly) track the tasks preempted on that structure because they must block any subsequent grace period. In step 3, the code in rcu_read_unlock_special() will (correctly) remove the task from the leaf rcu_node structure. From this point forward, RCU need not pay attention to this structure, at least not until one of the corresponding CPUs comes back online. In step 4, the code in the next rcu_gp_init() invocation will (incorrectly) invoke rcu_init_new_rnp(). This is incorrect because the corresponding rcu_cleanup_dead_rnp() was never invoked. This is nevertheless harmless because the upper-level bits are still set. So, no harm, no foul, right? At least, all is well until a little further into rcu_gp_init() invocation, which will notice that there are no longer any tasks blocked on the leaf rcu_node structure, conclude that there is no longer anything left over from step 2's offline operation, and will therefore invoke rcu_cleanup_dead_rnp(). But this invocation of rcu_cleanup_dead_rnp() is for the beginning of the earlier offline interval, and the previous invocation of rcu_init_new_rnp() is for the end of that same interval. That is right, they are invoked out of order. That cannot be good, can it? It turns out that this is not a (correctness!) problem because rcu_cleanup_dead_rnp() checks to see if any of the corresponding CPUs are online, and refuses to do anything if so. In other words, in the case where rcu_init_new_rnp() and rcu_cleanup_dead_rnp() execute out of order, they both have no effect. But this is at best an accident waiting to happen. This commit therefore adds logic to rcu_gp_init() so that rcu_init_new_rnp() and rcu_cleanup_dead_rnp() are always invoked in order, and so that neither are invoked at all in cases where RCU had to pay attention to the leaf rcu_node structure during the entire time that all corresponding CPUs were offline. And, while in the area, this commit reduces confusion by using formal parameters rather than local variables that just happen to have the same value at that particular point in the code. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r--kernel/rcu/tree.c30
1 files changed, 17 insertions, 13 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a6863b813f0c..9a5ba6db7b60 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1909,12 +1909,14 @@ static bool rcu_gp_init(struct rcu_state *rsp)
1909 1909
1910 /* If zero-ness of ->qsmaskinit changed, propagate up tree. */ 1910 /* If zero-ness of ->qsmaskinit changed, propagate up tree. */
1911 if (!oldmask != !rnp->qsmaskinit) { 1911 if (!oldmask != !rnp->qsmaskinit) {
1912 if (!oldmask) /* First online CPU for this rcu_node. */ 1912 if (!oldmask) { /* First online CPU for rcu_node. */
1913 rcu_init_new_rnp(rnp); 1913 if (!rnp->wait_blkd_tasks) /* Ever offline? */
1914 else if (rcu_preempt_has_tasks(rnp)) /* blocked tasks */ 1914 rcu_init_new_rnp(rnp);
1915 rnp->wait_blkd_tasks = true; 1915 } else if (rcu_preempt_has_tasks(rnp)) {
1916 else /* Last offline CPU and can propagate. */ 1916 rnp->wait_blkd_tasks = true; /* blocked tasks */
1917 } else { /* Last offline CPU and can propagate. */
1917 rcu_cleanup_dead_rnp(rnp); 1918 rcu_cleanup_dead_rnp(rnp);
1919 }
1918 } 1920 }
1919 1921
1920 /* 1922 /*
@@ -1923,14 +1925,13 @@ static bool rcu_gp_init(struct rcu_state *rsp)
1923 * still offline, propagate up the rcu_node tree and 1925 * still offline, propagate up the rcu_node tree and
1924 * clear ->wait_blkd_tasks. Otherwise, if one of this 1926 * clear ->wait_blkd_tasks. Otherwise, if one of this
1925 * rcu_node structure's CPUs has since come back online, 1927 * rcu_node structure's CPUs has since come back online,
1926 * simply clear ->wait_blkd_tasks (but rcu_cleanup_dead_rnp() 1928 * simply clear ->wait_blkd_tasks.
1927 * checks for this, so just call it unconditionally).
1928 */ 1929 */
1929 if (rnp->wait_blkd_tasks && 1930 if (rnp->wait_blkd_tasks &&
1930 (!rcu_preempt_has_tasks(rnp) || 1931 (!rcu_preempt_has_tasks(rnp) || rnp->qsmaskinit)) {
1931 rnp->qsmaskinit)) {
1932 rnp->wait_blkd_tasks = false; 1932 rnp->wait_blkd_tasks = false;
1933 rcu_cleanup_dead_rnp(rnp); 1933 if (!rnp->qsmaskinit)
1934 rcu_cleanup_dead_rnp(rnp);
1934 } 1935 }
1935 1936
1936 raw_spin_unlock_irq_rcu_node(rnp); 1937 raw_spin_unlock_irq_rcu_node(rnp);
@@ -2450,9 +2451,10 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2450 long mask; 2451 long mask;
2451 struct rcu_node *rnp = rnp_leaf; 2452 struct rcu_node *rnp = rnp_leaf;
2452 2453
2453 raw_lockdep_assert_held_rcu_node(rnp); 2454 raw_lockdep_assert_held_rcu_node(rnp_leaf);
2454 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || 2455 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
2455 rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) 2456 WARN_ON_ONCE(rnp_leaf->qsmaskinit) ||
2457 WARN_ON_ONCE(rcu_preempt_has_tasks(rnp_leaf)))
2456 return; 2458 return;
2457 for (;;) { 2459 for (;;) {
2458 mask = rnp->grpmask; 2460 mask = rnp->grpmask;
@@ -2461,7 +2463,8 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2461 break; 2463 break;
2462 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ 2464 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
2463 rnp->qsmaskinit &= ~mask; 2465 rnp->qsmaskinit &= ~mask;
2464 rnp->qsmask &= ~mask; 2466 /* Between grace periods, so better already be zero! */
2467 WARN_ON_ONCE(rnp->qsmask);
2465 if (rnp->qsmaskinit) { 2468 if (rnp->qsmaskinit) {
2466 raw_spin_unlock_rcu_node(rnp); 2469 raw_spin_unlock_rcu_node(rnp);
2467 /* irqs remain disabled. */ 2470 /* irqs remain disabled. */
@@ -3479,6 +3482,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
3479 struct rcu_node *rnp = rnp_leaf; 3482 struct rcu_node *rnp = rnp_leaf;
3480 3483
3481 raw_lockdep_assert_held_rcu_node(rnp); 3484 raw_lockdep_assert_held_rcu_node(rnp);
3485 WARN_ON_ONCE(rnp->wait_blkd_tasks);
3482 for (;;) { 3486 for (;;) {
3483 mask = rnp->grpmask; 3487 mask = rnp->grpmask;
3484 rnp = rnp->parent; 3488 rnp = rnp->parent;