diff options
author | Paul E. McKenney <paul.mckenney@linaro.org> | 2012-01-10 20:52:31 -0500 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-02-21 12:03:37 -0500 |
commit | a50c3af910e06f35bc0c68f89d8fef98c0fec0ea (patch) | |
tree | bbd86e30567d25dcea2faedf5990d8d0453465aa /kernel/rcutree.c | |
parent | 8146c4e2e2c1972216afece5c50e072e86120e42 (diff) |
rcu: Don't make callbacks go through second full grace period
RCU's current CPU-offline code path dumps all of the outgoing CPU's
callbacks onto the RCU_NEXT_TAIL portion of the surviving CPU's
callback list. This means that all the ready-to-invoke callbacks from
the outgoing CPU must wait for another full RCU grace period. This was
just fine when CPU-hotplug events were rare, but there is increasing
evidence that users are planning to make increasing use of CPU hotplug.
Therefore, this commit changes the callback-dumping procedure so that
callbacks that are ready to invoke are moved to the RCU_DONE_TAIL
portion of the surviving CPU's callback list. This avoids running
these callbacks through a second unnecessary grace period.
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 52 |
1 files changed, 46 insertions, 6 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ac3a810d2db7..7789e666394d 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -1270,24 +1270,64 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | |||
1270 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); | 1270 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); |
1271 | struct rcu_node *rnp = rdp->mynode; /* For dying CPU. */ | 1271 | struct rcu_node *rnp = rdp->mynode; /* For dying CPU. */ |
1272 | 1272 | ||
1273 | /* Move callbacks to some other CPU. */ | 1273 | /* First, adjust the counts. */ |
1274 | if (rdp->nxtlist != NULL) { | ||
1275 | receive_rdp->qlen_lazy += rdp->qlen_lazy; | ||
1276 | receive_rdp->qlen += rdp->qlen; | ||
1277 | rdp->qlen_lazy = 0; | ||
1278 | rdp->qlen = 0; | ||
1279 | } | ||
1280 | |||
1281 | /* | ||
1282 | * Next, move ready-to-invoke callbacks to be invoked on some | ||
1283 | * other CPU. These will not be required to pass through another | ||
1284 | * grace period: They are done, regardless of CPU. | ||
1285 | */ | ||
1286 | if (rdp->nxtlist != NULL && | ||
1287 | rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) { | ||
1288 | struct rcu_head *oldhead; | ||
1289 | struct rcu_head **oldtail; | ||
1290 | struct rcu_head **newtail; | ||
1291 | |||
1292 | oldhead = rdp->nxtlist; | ||
1293 | oldtail = receive_rdp->nxttail[RCU_DONE_TAIL]; | ||
1294 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | ||
1295 | *rdp->nxttail[RCU_DONE_TAIL] = *oldtail; | ||
1296 | *receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead; | ||
1297 | newtail = rdp->nxttail[RCU_DONE_TAIL]; | ||
1298 | for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) { | ||
1299 | if (receive_rdp->nxttail[i] == oldtail) | ||
1300 | receive_rdp->nxttail[i] = newtail; | ||
1301 | if (rdp->nxttail[i] == newtail) | ||
1302 | rdp->nxttail[i] = &rdp->nxtlist; | ||
1303 | } | ||
1304 | } | ||
1305 | |||
1306 | /* | ||
1307 | * Finally, put the rest of the callbacks at the end of the list. | ||
1308 | * The ones that made it partway through get to start over: We | ||
1309 | * cannot assume that grace periods are synchronized across CPUs. | ||
1310 | * (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but | ||
1311 | * this does not seem compelling. Not yet, anyway.) | ||
1312 | */ | ||
1274 | if (rdp->nxtlist != NULL) { | 1313 | if (rdp->nxtlist != NULL) { |
1275 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; | 1314 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; |
1276 | receive_rdp->nxttail[RCU_NEXT_TAIL] = | 1315 | receive_rdp->nxttail[RCU_NEXT_TAIL] = |
1277 | rdp->nxttail[RCU_NEXT_TAIL]; | 1316 | rdp->nxttail[RCU_NEXT_TAIL]; |
1278 | receive_rdp->qlen_lazy += rdp->qlen_lazy; | ||
1279 | receive_rdp->qlen += rdp->qlen; | ||
1280 | receive_rdp->n_cbs_adopted += rdp->qlen; | 1317 | receive_rdp->n_cbs_adopted += rdp->qlen; |
1281 | rdp->n_cbs_orphaned += rdp->qlen; | 1318 | rdp->n_cbs_orphaned += rdp->qlen; |
1282 | 1319 | ||
1283 | rdp->nxtlist = NULL; | 1320 | rdp->nxtlist = NULL; |
1284 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1321 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
1285 | rdp->nxttail[i] = &rdp->nxtlist; | 1322 | rdp->nxttail[i] = &rdp->nxtlist; |
1286 | rdp->qlen_lazy = 0; | ||
1287 | rdp->qlen = 0; | ||
1288 | } | 1323 | } |
1289 | 1324 | ||
1290 | /* Record a quiescent state for the dying CPU. */ | 1325 | /* |
1326 | * Record a quiescent state for the dying CPU. This is safe | ||
1327 | * only because we have already cleared out the callbacks. | ||
1328 | * (Otherwise, the RCU core might try to schedule the invocation | ||
1329 | * of callbacks on this now-offline CPU, which would be bad.) | ||
1330 | */ | ||
1291 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | 1331 | mask = rdp->grpmask; /* rnp->grplo is constant. */ |
1292 | trace_rcu_grace_period(rsp->name, | 1332 | trace_rcu_grace_period(rsp->name, |
1293 | rnp->gpnum + 1 - !!(rnp->qsmask & mask), | 1333 | rnp->gpnum + 1 - !!(rnp->qsmask & mask), |