summaryrefslogtreecommitdiffstats
path: root/kernel/rcu
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2015-03-15 12:19:35 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2015-03-20 11:28:25 -0400
commit654e953340491e498871321d7e2c9b0a12821933 (patch)
treecf7280a5a4d4d48df9112cf361e40d456aeb8809 /kernel/rcu
parenta77da14ce9afb338040b405f6ab8afddc310411d (diff)
rcu: Associate quiescent-state reports with grace period
As noted in earlier commit logs, CPU hotplug operations running concurrently with grace-period initialization can result in a given leaf rcu_node structure having all CPUs offline and no blocked readers, but with this rcu_node structure nevertheless blocking the current grace period. Therefore, the quiescent-state forcing code now checks for this situation and repairs it. Unfortunately, this checking can result in false positives, for example, when the last task has just removed itself from this leaf rcu_node structure, but has not yet started clearing the ->qsmask bits further up the structure. This means that the grace-period kthread (which forces quiescent states) and some other task might be attempting to concurrently clear these ->qsmask bits. This is usually not a problem: One of these tasks will be the first to acquire the upper-level rcu_node structure's lock and with therefore clear the bit, and the other task, seeing the bit already cleared, will stop trying to clear bits. Sadly, this means that the following unusual sequence of events -can- result in a problem: 1. The grace-period kthread wins, and clears the ->qsmask bits. 2. This is the last thing blocking the current grace period, so that the grace-period kthread clears ->qsmask bits all the way to the root and finds that the root ->qsmask field is now zero. 3. Another grace period is required, so that the grace period kthread initializes it, including setting all the needed qsmask bits. 4. The leaf rcu_node structure (the one that started this whole mess) is blocking this new grace period, either because it has at least one online CPU or because there is at least one task that had blocked within an RCU read-side critical section while running on one of this leaf rcu_node structure's CPUs. (And yes, that CPU might well have gone offline before the grace period in step (3) above started, which can mean that there is a task on the leaf rcu_node structure's ->blkd_tasks list, but ->qsmask equal to zero.) 5. The other kthread didn't get around to trying to clear the upper level ->qsmask bits until all the above had happened. This means that it now sees bits set in the upper-level ->qsmask field, so it proceeds to clear them. Too bad that it is doing so on behalf of a quiescent state that does not apply to the current grace period! This sequence of events can result in the new grace period being too short. It can also result in the new grace period ending before the leaf rcu_node structure's ->qsmask bits have been cleared, which will result in splats during initialization of the next grace period. In addition, it can result in tasks blocking the new grace period still being queued at the start of the next grace period, which will result in other splats. Sasha's testing turned up another of these splats, as did rcutorture testing. (And yes, rcutorture is being adjusted to make these splats show up more quickly. Which probably is having the undesirable side effect of making other problems show up less quickly. Can't have everything!) Reported-by: Sasha Levin <sasha.levin@oracle.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: <stable@vger.kernel.org> # 4.0.x Tested-by: Sasha Levin <sasha.levin@oracle.com>
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/tree.c34
1 files changed, 22 insertions, 12 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b3684b284677..8fcc64ed858c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2132,25 +2132,32 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
2132 * Similar to rcu_report_qs_rdp(), for which it is a helper function. 2132 * Similar to rcu_report_qs_rdp(), for which it is a helper function.
2133 * Allows quiescent states for a group of CPUs to be reported at one go 2133 * Allows quiescent states for a group of CPUs to be reported at one go
2134 * to the specified rcu_node structure, though all the CPUs in the group 2134 * to the specified rcu_node structure, though all the CPUs in the group
2135 * must be represented by the same rcu_node structure (which need not be 2135 * must be represented by the same rcu_node structure (which need not be a
2136 * a leaf rcu_node structure, though it often will be). That structure's 2136 * leaf rcu_node structure, though it often will be). The gps parameter
2137 * lock must be held upon entry, and it is released before return. 2137 * is the grace-period snapshot, which means that the quiescent states
2138 * are valid only if rnp->gpnum is equal to gps. That structure's lock
2139 * must be held upon entry, and it is released before return.
2138 */ 2140 */
2139static void 2141static void
2140rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, 2142rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
2141 struct rcu_node *rnp, unsigned long flags) 2143 struct rcu_node *rnp, unsigned long gps, unsigned long flags)
2142 __releases(rnp->lock) 2144 __releases(rnp->lock)
2143{ 2145{
2146 unsigned long oldmask = 0;
2144 struct rcu_node *rnp_c; 2147 struct rcu_node *rnp_c;
2145 2148
2146 /* Walk up the rcu_node hierarchy. */ 2149 /* Walk up the rcu_node hierarchy. */
2147 for (;;) { 2150 for (;;) {
2148 if (!(rnp->qsmask & mask)) { 2151 if (!(rnp->qsmask & mask) || rnp->gpnum != gps) {
2149 2152
2150 /* Our bit has already been cleared, so done. */ 2153 /*
2154 * Our bit has already been cleared, or the
2155 * relevant grace period is already over, so done.
2156 */
2151 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2157 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2152 return; 2158 return;
2153 } 2159 }
2160 WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
2154 rnp->qsmask &= ~mask; 2161 rnp->qsmask &= ~mask;
2155 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, 2162 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
2156 mask, rnp->qsmask, rnp->level, 2163 mask, rnp->qsmask, rnp->level,
@@ -2174,7 +2181,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
2174 rnp = rnp->parent; 2181 rnp = rnp->parent;
2175 raw_spin_lock_irqsave(&rnp->lock, flags); 2182 raw_spin_lock_irqsave(&rnp->lock, flags);
2176 smp_mb__after_unlock_lock(); 2183 smp_mb__after_unlock_lock();
2177 WARN_ON_ONCE(rnp_c->qsmask); 2184 oldmask = rnp_c->qsmask;
2178 } 2185 }
2179 2186
2180 /* 2187 /*
@@ -2196,6 +2203,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
2196 struct rcu_node *rnp, unsigned long flags) 2203 struct rcu_node *rnp, unsigned long flags)
2197 __releases(rnp->lock) 2204 __releases(rnp->lock)
2198{ 2205{
2206 unsigned long gps;
2199 unsigned long mask; 2207 unsigned long mask;
2200 struct rcu_node *rnp_p; 2208 struct rcu_node *rnp_p;
2201 2209
@@ -2215,12 +2223,13 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
2215 return; 2223 return;
2216 } 2224 }
2217 2225
2218 /* Report up the rest of the hierarchy. */ 2226 /* Report up the rest of the hierarchy, tracking current ->gpnum. */
2227 gps = rnp->gpnum;
2219 mask = rnp->grpmask; 2228 mask = rnp->grpmask;
2220 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2229 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2221 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ 2230 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
2222 smp_mb__after_unlock_lock(); 2231 smp_mb__after_unlock_lock();
2223 rcu_report_qs_rnp(mask, rsp, rnp_p, flags); 2232 rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags);
2224} 2233}
2225 2234
2226/* 2235/*
@@ -2271,7 +2280,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2271 */ 2280 */
2272 needwake = rcu_accelerate_cbs(rsp, rnp, rdp); 2281 needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
2273 2282
2274 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ 2283 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags);
2284 /* ^^^ Released rnp->lock */
2275 if (needwake) 2285 if (needwake)
2276 rcu_gp_kthread_wake(rsp); 2286 rcu_gp_kthread_wake(rsp);
2277 } 2287 }
@@ -2747,8 +2757,8 @@ static void force_qs_rnp(struct rcu_state *rsp,
2747 } 2757 }
2748 } 2758 }
2749 if (mask != 0) { 2759 if (mask != 0) {
2750 /* Idle/offline CPUs, report. */ 2760 /* Idle/offline CPUs, report (releases rnp->lock. */
2751 rcu_report_qs_rnp(mask, rsp, rnp, flags); 2761 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags);
2752 } else { 2762 } else {
2753 /* Nothing to do here, so just drop the lock. */ 2763 /* Nothing to do here, so just drop the lock. */
2754 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2764 raw_spin_unlock_irqrestore(&rnp->lock, flags);