aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree.c
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2018-05-07 12:34:17 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2018-07-12 18:39:04 -0400
commitec2c29765a4ab12c236ac5a89b89660222ff6b01 (patch)
tree99351968dbecb12064f21a99e29660bc773c75c8 /kernel/rcu/tree.c
parent0b107d24d9361132758374a7b007c7c74efa007f (diff)
rcu: Fix grace-period hangs from mid-init task resume
Without special fail-safe quiescent-state-propagation checks, grace-period hangs can result from the following scenario: 1. A task running on a given CPU is preempted in its RCU read-side critical section. 2. That CPU goes offline, and there are now no online CPUs corresponding to that CPU's leaf rcu_node structure. 3. The rcu_gp_init() function does the first phase of grace-period initialization, and sets the aforementioned leaf rcu_node structure's ->qsmaskinit field to all zeroes. Because there is a blocked task, it does not propagate the zeroing of either ->qsmaskinit or ->qsmaskinitnext up the rcu_node tree. 4. The task resumes on some other CPU and exits its critical section. There is no grace period in progress, so the resulting quiescent state is not reported up the tree. 5. The rcu_gp_init() function does the second phase of grace-period initialization, which results in the leaf rcu_node structure being initialized to expect no further quiescent states, but with that structure's parent expecting a quiescent-state report. The parent will never receive a quiescent state from this leaf rcu_node structure, so the grace period will hang, resulting in RCU CPU stall warnings. It would be good to get rid of the special fail-safe quiescent-state propagation checks. This commit therefore checks the leaf rcu_node structure's ->wait_blkd_tasks field during grace-period initialization. If this flag is set, the rcu_report_qs_rnp() is invoked to immediately report the possible quiescent state. While in the neighborhood, this commit also report quiescent states for any CPUs that went offline between the two phases of grace-period initialization, thus reducing grace-period delays and hopefully eventually allowing removal of offline-CPU checks from the force-quiescent-state code path. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r--kernel/rcu/tree.c20
1 files changed, 17 insertions, 3 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 6275ed3925e9..7f872721c54e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -154,6 +154,9 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active);
154 */ 154 */
155static int rcu_scheduler_fully_active __read_mostly; 155static int rcu_scheduler_fully_active __read_mostly;
156 156
157static void
158rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
159 struct rcu_node *rnp, unsigned long gps, unsigned long flags);
157static void rcu_init_new_rnp(struct rcu_node *rnp_leaf); 160static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
158static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf); 161static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
159static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); 162static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
@@ -1858,7 +1861,9 @@ static void rcu_gp_slow(struct rcu_state *rsp, int delay)
1858 */ 1861 */
1859static bool rcu_gp_init(struct rcu_state *rsp) 1862static bool rcu_gp_init(struct rcu_state *rsp)
1860{ 1863{
1864 unsigned long flags;
1861 unsigned long oldmask; 1865 unsigned long oldmask;
1866 unsigned long mask;
1862 struct rcu_data *rdp; 1867 struct rcu_data *rdp;
1863 struct rcu_node *rnp = rcu_get_root(rsp); 1868 struct rcu_node *rnp = rcu_get_root(rsp);
1864 1869
@@ -1951,7 +1956,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
1951 */ 1956 */
1952 rcu_for_each_node_breadth_first(rsp, rnp) { 1957 rcu_for_each_node_breadth_first(rsp, rnp) {
1953 rcu_gp_slow(rsp, gp_init_delay); 1958 rcu_gp_slow(rsp, gp_init_delay);
1954 raw_spin_lock_irq_rcu_node(rnp); 1959 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1955 rdp = this_cpu_ptr(rsp->rda); 1960 rdp = this_cpu_ptr(rsp->rda);
1956 rcu_preempt_check_blocked_tasks(rnp); 1961 rcu_preempt_check_blocked_tasks(rnp);
1957 rnp->qsmask = rnp->qsmaskinit; 1962 rnp->qsmask = rnp->qsmaskinit;
@@ -1962,7 +1967,12 @@ static bool rcu_gp_init(struct rcu_state *rsp)
1962 trace_rcu_grace_period_init(rsp->name, rnp->gp_seq, 1967 trace_rcu_grace_period_init(rsp->name, rnp->gp_seq,
1963 rnp->level, rnp->grplo, 1968 rnp->level, rnp->grplo,
1964 rnp->grphi, rnp->qsmask); 1969 rnp->grphi, rnp->qsmask);
1965 raw_spin_unlock_irq_rcu_node(rnp); 1970 /* Quiescent states for tasks on any now-offline CPUs. */
1971 mask = rnp->qsmask & ~rnp->qsmaskinitnext;
1972 if ((mask || rnp->wait_blkd_tasks) && rcu_is_leaf_node(rnp))
1973 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);
1974 else
1975 raw_spin_unlock_irq_rcu_node(rnp);
1966 cond_resched_tasks_rcu_qs(); 1976 cond_resched_tasks_rcu_qs();
1967 WRITE_ONCE(rsp->gp_activity, jiffies); 1977 WRITE_ONCE(rsp->gp_activity, jiffies);
1968 } 1978 }
@@ -2233,6 +2243,10 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
2233 * is the grace-period snapshot, which means that the quiescent states 2243 * is the grace-period snapshot, which means that the quiescent states
2234 * are valid only if rnp->gp_seq is equal to gps. That structure's lock 2244 * are valid only if rnp->gp_seq is equal to gps. That structure's lock
2235 * must be held upon entry, and it is released before return. 2245 * must be held upon entry, and it is released before return.
2246 *
2247 * As a special case, if mask is zero, the bit-already-cleared check is
2248 * disabled. This allows propagating quiescent state due to resumed tasks
2249 * during grace-period initialization.
2236 */ 2250 */
2237static void 2251static void
2238rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, 2252rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
@@ -2246,7 +2260,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
2246 2260
2247 /* Walk up the rcu_node hierarchy. */ 2261 /* Walk up the rcu_node hierarchy. */
2248 for (;;) { 2262 for (;;) {
2249 if (!(rnp->qsmask & mask) || rnp->gp_seq != gps) { 2263 if ((!(rnp->qsmask & mask) && mask) || rnp->gp_seq != gps) {
2250 2264
2251 /* 2265 /*
2252 * Our bit has already been cleared, or the 2266 * Our bit has already been cleared, or the