rcu: Don't make callbacks go through second full grace period

RCU's current CPU-offline code path dumps all of the outgoing CPU's callbacks onto the RCU_NEXT_TAIL portion of the surviving CPU's callback list. This means that all the ready-to-invoke callbacks from the outgoing CPU must wait for another full RCU grace period. This was just fine when CPU-hotplug events were rare, but there is increasing evidence that users are planning to make increasing use of CPU hotplug. Therefore, this commit changes the callback-dumping procedure so that callbacks that are ready to invoke are moved to the RCU_DONE_TAIL portion of the surviving CPU's callback list. This avoids running these callbacks through a second unnecessary grace period. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
author: Paul E. McKenney <paul.mckenney@linaro.org> 2012-01-10 20:52:31 -0500
committer: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 2012-02-21 12:03:37 -0500
commit: a50c3af910e06f35bc0c68f89d8fef98c0fec0ea (patch)
tree: bbd86e30567d25dcea2faedf5990d8d0453465aa /kernel/rcutree.c
parent: 8146c4e2e2c1972216afece5c50e072e86120e42 (diff)
1 files changed, 46 insertions, 6 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ac3a810d2db7..7789e666394d 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1270,24 +1270,64 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
        struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
        struct rcu_node *rnp = rdp->mynode; /* For dying CPU. */
-        /* Move callbacks to some other CPU. */
+        /* First, adjust the counts. */
+        if (rdp->nxtlist != NULL) {
+                receive_rdp->qlen_lazy += rdp->qlen_lazy;
+                receive_rdp->qlen += rdp->qlen;
+                rdp->qlen_lazy = 0;
+                rdp->qlen = 0;
+        }
+        /*
+         * Next, move ready-to-invoke callbacks to be invoked on some
+         * other CPU.  These will not be required to pass through another
+         * grace period:  They are done, regardless of CPU.
+         */
+        if (rdp->nxtlist != NULL &&
+            rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) {
+                struct rcu_head *oldhead;
+                struct rcu_head **oldtail;
+                struct rcu_head **newtail;
+                oldhead = rdp->nxtlist;
+                oldtail = receive_rdp->nxttail[RCU_DONE_TAIL];
+                rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
+                *rdp->nxttail[RCU_DONE_TAIL] = *oldtail;
+                *receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead;
+                newtail = rdp->nxttail[RCU_DONE_TAIL];
+                for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) {
+                        if (receive_rdp->nxttail[i] == oldtail)
+                                receive_rdp->nxttail[i] = newtail;
+                        if (rdp->nxttail[i] == newtail)
+                                rdp->nxttail[i] = &rdp->nxtlist;
+                }
+        }
+        /*
+         * Finally, put the rest of the callbacks at the end of the list.
+         * The ones that made it partway through get to start over:  We
+         * cannot assume that grace periods are synchronized across CPUs.
+         * (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but
+         * this does not seem compelling.  Not yet, anyway.)
+         */
        if (rdp->nxtlist != NULL) {
                *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
                receive_rdp->nxttail[RCU_NEXT_TAIL] =
                                rdp->nxttail[RCU_NEXT_TAIL];
-                receive_rdp->qlen_lazy += rdp->qlen_lazy;
-                receive_rdp->qlen += rdp->qlen;
                receive_rdp->n_cbs_adopted += rdp->qlen;
                rdp->n_cbs_orphaned += rdp->qlen;
                rdp->nxtlist = NULL;
                for (i = 0; i < RCU_NEXT_SIZE; i++)
                        rdp->nxttail[i] = &rdp->nxtlist;
-                rdp->qlen_lazy = 0;
-                rdp->qlen = 0;
        }
-        /* Record a quiescent state for the dying CPU. */
+        /*
+         * Record a quiescent state for the dying CPU.  This is safe
+         * only because we have already cleared out the callbacks.
+         * (Otherwise, the RCU core might try to schedule the invocation
+         * of callbacks on this now-offline CPU, which would be bad.)
+         */
        mask = rdp->grpmask;    /* rnp->grplo is constant. */
        trace_rcu_grace_period(rsp->name,
                               rnp->gpnum + 1 - !!(rnp->qsmask & mask),
author	Paul E. McKenney <paul.mckenney@linaro.org>	2012-01-10 20:52:31 -0500
committer	Paul E. McKenney <paulmck@linux.vnet.ibm.com>	2012-02-21 12:03:37 -0500
commit	a50c3af910e06f35bc0c68f89d8fef98c0fec0ea (patch)
tree	bbd86e30567d25dcea2faedf5990d8d0453465aa /kernel/rcutree.c
parent	8146c4e2e2c1972216afece5c50e072e86120e42 (diff)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ac3a810d2db7..7789e666394d 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c
@@ -1270,24 +1270,64 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1270	struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);	1270	struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
1271	struct rcu_node rnp = rdp->mynode; / For dying CPU. */	1271	struct rcu_node rnp = rdp->mynode; / For dying CPU. */
1272		1272
1273	/* Move callbacks to some other CPU. */	1273	/* First, adjust the counts. */
		1274	if (rdp->nxtlist != NULL) {
		1275	receive_rdp->qlen_lazy += rdp->qlen_lazy;
		1276	receive_rdp->qlen += rdp->qlen;
		1277	rdp->qlen_lazy = 0;
		1278	rdp->qlen = 0;
		1279	}
		1280
		1281	/*
		1282	* Next, move ready-to-invoke callbacks to be invoked on some
		1283	* other CPU. These will not be required to pass through another
		1284	* grace period: They are done, regardless of CPU.
		1285	*/
		1286	if (rdp->nxtlist != NULL &&
		1287	rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) {
		1288	struct rcu_head *oldhead;
		1289	struct rcu_head **oldtail;
		1290	struct rcu_head **newtail;
		1291
		1292	oldhead = rdp->nxtlist;
		1293	oldtail = receive_rdp->nxttail[RCU_DONE_TAIL];
		1294	rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
		1295	rdp->nxttail[RCU_DONE_TAIL] = oldtail;
		1296	*receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead;
		1297	newtail = rdp->nxttail[RCU_DONE_TAIL];
		1298	for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) {
		1299	if (receive_rdp->nxttail[i] == oldtail)
		1300	receive_rdp->nxttail[i] = newtail;
		1301	if (rdp->nxttail[i] == newtail)
		1302	rdp->nxttail[i] = &rdp->nxtlist;
		1303	}
		1304	}
		1305
		1306	/*
		1307	* Finally, put the rest of the callbacks at the end of the list.
		1308	* The ones that made it partway through get to start over: We
		1309	* cannot assume that grace periods are synchronized across CPUs.
		1310	* (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but
		1311	* this does not seem compelling. Not yet, anyway.)
		1312	*/
1274	if (rdp->nxtlist != NULL) {	1313	if (rdp->nxtlist != NULL) {
1275	*receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;	1314	*receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
1276	receive_rdp->nxttail[RCU_NEXT_TAIL] =	1315	receive_rdp->nxttail[RCU_NEXT_TAIL] =
1277	rdp->nxttail[RCU_NEXT_TAIL];	1316	rdp->nxttail[RCU_NEXT_TAIL];
1278	receive_rdp->qlen_lazy += rdp->qlen_lazy;
1279	receive_rdp->qlen += rdp->qlen;
1280	receive_rdp->n_cbs_adopted += rdp->qlen;	1317	receive_rdp->n_cbs_adopted += rdp->qlen;
1281	rdp->n_cbs_orphaned += rdp->qlen;	1318	rdp->n_cbs_orphaned += rdp->qlen;
1282		1319
1283	rdp->nxtlist = NULL;	1320	rdp->nxtlist = NULL;
1284	for (i = 0; i < RCU_NEXT_SIZE; i++)	1321	for (i = 0; i < RCU_NEXT_SIZE; i++)
1285	rdp->nxttail[i] = &rdp->nxtlist;	1322	rdp->nxttail[i] = &rdp->nxtlist;
1286	rdp->qlen_lazy = 0;
1287	rdp->qlen = 0;
1288	}	1323	}
1289		1324
1290	/* Record a quiescent state for the dying CPU. */	1325	/*
		1326	* Record a quiescent state for the dying CPU. This is safe
		1327	* only because we have already cleared out the callbacks.
		1328	* (Otherwise, the RCU core might try to schedule the invocation
		1329	* of callbacks on this now-offline CPU, which would be bad.)
		1330	*/
1291	mask = rdp->grpmask; /* rnp->grplo is constant. */	1331	mask = rdp->grpmask; /* rnp->grplo is constant. */
1292	trace_rcu_grace_period(rsp->name,	1332	trace_rcu_grace_period(rsp->name,
1293	rnp->gpnum + 1 - !!(rnp->qsmask & mask),	1333	rnp->gpnum + 1 - !!(rnp->qsmask & mask),