rcu: Tag callback lists with corresponding grace-period number

Currently, callbacks are advanced each time the corresponding CPU notices a change in its leaf rcu_node structure's ->completed value (this value counts grace-period completions). This approach has worked quite well, but with the advent of RCU_FAST_NO_HZ, we cannot count on a given CPU seeing all the grace-period completions. When a CPU misses a grace-period completion that occurs while it is in dyntick-idle mode, this will delay invocation of its callbacks. In addition, acceleration of callbacks (when RCU realizes that a given callback need only wait until the end of the next grace period, rather than having to wait for a partial grace period followed by a full grace period) must be carried out extremely carefully. Insufficient acceleration will result in unnecessarily long grace-period latencies, while excessive acceleration will result in premature callback invocation. Changes that involve this tradeoff are therefore among the most nerve-wracking changes to RCU. This commit therefore explicitly tags groups of callbacks with the number of the grace period that they are waiting for. This means that callback-advancement and callback-acceleration functions are idempotent, so that excessive acceleration will merely waste a few CPU cycles. This also allows a CPU to take full advantage of any grace periods that have elapsed while it has been in dyntick-idle mode. It should also enable simulataneous simplifications to and optimizations of RCU_FAST_NO_HZ. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 2012-12-03 16:52:00 -0500
committer: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 2013-01-08 17:15:57 -0500
commit: dc35c8934eba959b690921615fcd987e8bc17e4a (patch)
tree: c8020ec8ac191505e35905e89e58f02aa4e5fc31 /kernel/rcutree.c
parent: 1b0048a44c502c5ab850203e6e0a6498d7d8676d (diff)
1 files changed, 167 insertions, 28 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e441b77b614e..ac6a75d152ef 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -305,17 +305,27 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
 }
 /*
- * Does the current CPU require a yet-as-unscheduled grace period?
+ * Does the current CPU require a not-yet-started grace period?
+ * The caller must have disabled interrupts to prevent races with
+ * normal callback registry.
 */
 static int
 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
 {
-        struct rcu_head **ntp;
+        int i;
-        ntp = rdp->nxttail[RCU_DONE_TAIL +
+        if (rcu_gp_in_progress(rsp))
-                           (ACCESS_ONCE(rsp->completed) != rdp->completed)];
+                return 0;  /* No, a grace period is already in progress. */
-        return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp &&
+        if (!rdp->nxttail[RCU_NEXT_TAIL])
-               !rcu_gp_in_progress(rsp);
+                return 0;  /* No, this is a no-CBs (or offline) CPU. */
+        if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
+                return 1;  /* Yes, this CPU has newly registered callbacks. */
+        for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
+                if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
+                    ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
+                                 rdp->nxtcompleted[i]))
+                        return 1;  /* Yes, CBs for future grace period. */
+        return 0; /* No grace period needed. */
 }
 /*
@@ -1071,6 +1081,139 @@ static void init_callback_list(struct rcu_data *rdp)
 }
 /*
+ * Determine the value that ->completed will have at the end of the
+ * next subsequent grace period.  This is used to tag callbacks so that
+ * a CPU can invoke callbacks in a timely fashion even if that CPU has
+ * been dyntick-idle for an extended period with callbacks under the
+ * influence of RCU_FAST_NO_HZ.
+ *
+ * The caller must hold rnp->lock with interrupts disabled.
+ */
+static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
+                                       struct rcu_node *rnp)
+{
+        /*
+         * If RCU is idle, we just wait for the next grace period.
+         * But we can only be sure that RCU is idle if we are looking
+         * at the root rcu_node structure -- otherwise, a new grace
+         * period might have started, but just not yet gotten around
+         * to initializing the current non-root rcu_node structure.
+         */
+        if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
+                return rnp->completed + 1;
+        /*
+         * Otherwise, wait for a possible partial grace period and
+         * then the subsequent full grace period.
+         */
+        return rnp->completed + 2;
+}
+/*
+ * If there is room, assign a ->completed number to any callbacks on
+ * this CPU that have not already been assigned.  Also accelerate any
+ * callbacks that were previously assigned a ->completed number that has
+ * since proven to be too conservative, which can happen if callbacks get
+ * assigned a ->completed number while RCU is idle, but with reference to
+ * a non-root rcu_node structure.  This function is idempotent, so it does
+ * not hurt to call it repeatedly.
+ *
+ * The caller must hold rnp->lock with interrupts disabled.
+ */
+static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
+                               struct rcu_data *rdp)
+{
+        unsigned long c;
+        int i;
+        /* If the CPU has no callbacks, nothing to do. */
+        if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
+                return;
+        /*
+         * Starting from the sublist containing the callbacks most
+         * recently assigned a ->completed number and working down, find the
+         * first sublist that is not assignable to an upcoming grace period.
+         * Such a sublist has something in it (first two tests) and has
+         * a ->completed number assigned that will complete sooner than
+         * the ->completed number for newly arrived callbacks (last test).
+         *
+         * The key point is that any later sublist can be assigned the
+         * same ->completed number as the newly arrived callbacks, which
+         * means that the callbacks in any of these later sublist can be
+         * grouped into a single sublist, whether or not they have already
+         * been assigned a ->completed number.
+         */
+        c = rcu_cbs_completed(rsp, rnp);
+        for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
+                if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
+                    !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
+                        break;
+        /*
+         * If there are no sublist for unassigned callbacks, leave.
+         * At the same time, advance "i" one sublist, so that "i" will
+         * index into the sublist where all the remaining callbacks should
+         * be grouped into.
+         */
+        if (++i >= RCU_NEXT_TAIL)
+                return;
+        /*
+         * Assign all subsequent callbacks' ->completed number to the next
+         * full grace period and group them all in the sublist initially
+         * indexed by "i".
+         */
+        for (; i <= RCU_NEXT_TAIL; i++) {
+                rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
+                rdp->nxtcompleted[i] = c;
+        }
+}
+/*
+ * Move any callbacks whose grace period has completed to the
+ * RCU_DONE_TAIL sublist, then compact the remaining sublists and
+ * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
+ * sublist.  This function is idempotent, so it does not hurt to
+ * invoke it repeatedly.  As long as it is not invoked -too- often...
+ *
+ * The caller must hold rnp->lock with interrupts disabled.
+ */
+static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
+                            struct rcu_data *rdp)
+{
+        int i, j;
+        /* If the CPU has no callbacks, nothing to do. */
+        if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
+                return;
+        /*
+         * Find all callbacks whose ->completed numbers indicate that they
+         * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
+         */
+        for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
+                if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
+                        break;
+                rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
+        }
+        /* Clean up any sublist tail pointers that were misordered above. */
+        for (j = RCU_WAIT_TAIL; j < i; j++)
+                rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
+        /* Copy down callbacks to fill in empty sublists. */
+        for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
+                if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
+                        break;
+                rdp->nxttail[j] = rdp->nxttail[i];
+                rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
+        }
+        /* Classify any remaining callbacks. */
+        rcu_accelerate_cbs(rsp, rnp, rdp);
+}
+/*
 * Advance this CPU's callbacks, but only if the current grace period
 * has ended.  This may be called only from the CPU to whom the rdp
 * belongs.  In addition, the corresponding leaf rcu_node structure's
@@ -1080,12 +1223,15 @@ static void
 __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
 {
        /* Did another grace period end? */
-        if (rdp->completed != rnp->completed) {
+        if (rdp->completed == rnp->completed) {
+                /* No, so just accelerate recent callbacks. */
+                rcu_accelerate_cbs(rsp, rnp, rdp);
-                /* Advance callbacks.  No harm if list empty. */
+        } else {
-                rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
-                rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
+                /* Advance callbacks. */
-                rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+                rcu_advance_cbs(rsp, rnp, rdp);
                /* Remember that we saw this grace-period completion. */
                rdp->completed = rnp->completed;
@@ -1392,17 +1538,10 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
        /*
         * Because there is no grace period in progress right now,
         * any callbacks we have up to this point will be satisfied
-         * by the next grace period.  So promote all callbacks to be
+         * by the next grace period.  So this is a good place to
-         * handled after the end of the next grace period.  If the
+         * assign a grace period number to recently posted callbacks.
-         * CPU is not yet aware of the end of the previous grace period,
-         * we need to allow for the callback advancement that will
-         * occur when it does become aware.  Deadlock prevents us from
-         * making it aware at this point: We cannot acquire a leaf
-         * rcu_node ->lock while holding the root rcu_node ->lock.
         */
-        rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+        rcu_accelerate_cbs(rsp, rnp, rdp);
-        if (rdp->completed == rsp->completed)
-                rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
        rsp->gp_flags = RCU_GP_FLAG_INIT;
        raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
@@ -1527,7 +1666,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
                 * This GP can't end until cpu checks in, so all of our
                 * callbacks can be processed during the next GP.
                 */
-                rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+                rcu_accelerate_cbs(rsp, rnp, rdp);
                rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
        }
@@ -1779,7 +1918,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
        long bl, count, count_lazy;
        int i;
-        /* If no callbacks are ready, just return.*/
+        /* If no callbacks are ready, just return. */
        if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
                trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
                trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
@@ -2008,19 +2147,19 @@ __rcu_process_callbacks(struct rcu_state *rsp)
        WARN_ON_ONCE(rdp->beenonline == 0);
-        /*
+        /* Handle the end of a grace period that some other CPU ended.  */
-         * Advance callbacks in response to end of earlier grace
-         * period that some other CPU ended.
-         */
        rcu_process_gp_end(rsp, rdp);
        /* Update RCU state based on any recent quiescent states. */
        rcu_check_quiescent_state(rsp, rdp);
        /* Does this CPU require a not-yet-started grace period? */
+        local_irq_save(flags);
        if (cpu_needs_another_gp(rsp, rdp)) {
-                raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
+                raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
                rcu_start_gp(rsp, flags);  /* releases above lock */
+        } else {
+                local_irq_restore(flags);
        }
        /* If there are callbacks ready, invoke them. */
author	Paul E. McKenney <paulmck@linux.vnet.ibm.com>	2012-12-03 16:52:00 -0500
committer	Paul E. McKenney <paulmck@linux.vnet.ibm.com>	2013-01-08 17:15:57 -0500
commit	dc35c8934eba959b690921615fcd987e8bc17e4a (patch)
tree	c8020ec8ac191505e35905e89e58f02aa4e5fc31 /kernel/rcutree.c
parent	1b0048a44c502c5ab850203e6e0a6498d7d8676d (diff)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e441b77b614e..ac6a75d152ef 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c
@@ -305,17 +305,27 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
305	}	305	}
306		306
307	/*	307	/*
308	* Does the current CPU require a yet-as-unscheduled grace period?	308	* Does the current CPU require a not-yet-started grace period?
		309	* The caller must have disabled interrupts to prevent races with
		310	* normal callback registry.
309	*/	311	*/
310	static int	312	static int
311	cpu_needs_another_gp(struct rcu_state rsp, struct rcu_data rdp)	313	cpu_needs_another_gp(struct rcu_state rsp, struct rcu_data rdp)
312	{	314	{
313	struct rcu_head **ntp;	315	int i;
314		316
315	ntp = rdp->nxttail[RCU_DONE_TAIL +	317	if (rcu_gp_in_progress(rsp))
316	(ACCESS_ONCE(rsp->completed) != rdp->completed)];	318	return 0; /* No, a grace period is already in progress. */
317	return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp &&	319	if (!rdp->nxttail[RCU_NEXT_TAIL])
318	!rcu_gp_in_progress(rsp);	320	return 0; /* No, this is a no-CBs (or offline) CPU. */
		321	if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
		322	return 1; /* Yes, this CPU has newly registered callbacks. */
		323	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
		324	if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
		325	ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
		326	rdp->nxtcompleted[i]))
		327	return 1; /* Yes, CBs for future grace period. */
		328	return 0; /* No grace period needed. */
319	}	329	}
320		330
321	/*	331	/*
@@ -1071,6 +1081,139 @@ static void init_callback_list(struct rcu_data *rdp)
1071	}	1081	}
1072		1082
1073	/*	1083	/*
		1084	* Determine the value that ->completed will have at the end of the
		1085	* next subsequent grace period. This is used to tag callbacks so that
		1086	* a CPU can invoke callbacks in a timely fashion even if that CPU has
		1087	* been dyntick-idle for an extended period with callbacks under the
		1088	* influence of RCU_FAST_NO_HZ.
		1089	*
		1090	* The caller must hold rnp->lock with interrupts disabled.
		1091	*/
		1092	static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
		1093	struct rcu_node *rnp)
		1094	{
		1095	/*
		1096	* If RCU is idle, we just wait for the next grace period.
		1097	* But we can only be sure that RCU is idle if we are looking
		1098	* at the root rcu_node structure -- otherwise, a new grace
		1099	* period might have started, but just not yet gotten around
		1100	* to initializing the current non-root rcu_node structure.
		1101	*/
		1102	if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
		1103	return rnp->completed + 1;
		1104
		1105	/*
		1106	* Otherwise, wait for a possible partial grace period and
		1107	* then the subsequent full grace period.
		1108	*/
		1109	return rnp->completed + 2;
		1110	}
		1111
		1112	/*
		1113	* If there is room, assign a ->completed number to any callbacks on
		1114	* this CPU that have not already been assigned. Also accelerate any
		1115	* callbacks that were previously assigned a ->completed number that has
		1116	* since proven to be too conservative, which can happen if callbacks get
		1117	* assigned a ->completed number while RCU is idle, but with reference to
		1118	* a non-root rcu_node structure. This function is idempotent, so it does
		1119	* not hurt to call it repeatedly.
		1120	*
		1121	* The caller must hold rnp->lock with interrupts disabled.
		1122	*/
		1123	static void rcu_accelerate_cbs(struct rcu_state rsp, struct rcu_node rnp,
		1124	struct rcu_data *rdp)
		1125	{
		1126	unsigned long c;
		1127	int i;
		1128
		1129	/* If the CPU has no callbacks, nothing to do. */
		1130	if (!rdp->nxttail[RCU_NEXT_TAIL] \|\| !*rdp->nxttail[RCU_DONE_TAIL])
		1131	return;
		1132
		1133	/*
		1134	* Starting from the sublist containing the callbacks most
		1135	* recently assigned a ->completed number and working down, find the
		1136	* first sublist that is not assignable to an upcoming grace period.
		1137	* Such a sublist has something in it (first two tests) and has
		1138	* a ->completed number assigned that will complete sooner than
		1139	* the ->completed number for newly arrived callbacks (last test).
		1140	*
		1141	* The key point is that any later sublist can be assigned the
		1142	* same ->completed number as the newly arrived callbacks, which
		1143	* means that the callbacks in any of these later sublist can be
		1144	* grouped into a single sublist, whether or not they have already
		1145	* been assigned a ->completed number.
		1146	*/
		1147	c = rcu_cbs_completed(rsp, rnp);
		1148	for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
		1149	if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
		1150	!ULONG_CMP_GE(rdp->nxtcompleted[i], c))
		1151	break;
		1152
		1153	/*
		1154	* If there are no sublist for unassigned callbacks, leave.
		1155	* At the same time, advance "i" one sublist, so that "i" will
		1156	* index into the sublist where all the remaining callbacks should
		1157	* be grouped into.
		1158	*/
		1159	if (++i >= RCU_NEXT_TAIL)
		1160	return;
		1161
		1162	/*
		1163	* Assign all subsequent callbacks' ->completed number to the next
		1164	* full grace period and group them all in the sublist initially
		1165	* indexed by "i".
		1166	*/
		1167	for (; i <= RCU_NEXT_TAIL; i++) {
		1168	rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
		1169	rdp->nxtcompleted[i] = c;
		1170	}
		1171	}
		1172
		1173	/*
		1174	* Move any callbacks whose grace period has completed to the
		1175	* RCU_DONE_TAIL sublist, then compact the remaining sublists and
		1176	* assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
		1177	* sublist. This function is idempotent, so it does not hurt to
		1178	* invoke it repeatedly. As long as it is not invoked -too- often...
		1179	*
		1180	* The caller must hold rnp->lock with interrupts disabled.
		1181	*/
		1182	static void rcu_advance_cbs(struct rcu_state rsp, struct rcu_node rnp,
		1183	struct rcu_data *rdp)
		1184	{
		1185	int i, j;
		1186
		1187	/* If the CPU has no callbacks, nothing to do. */
		1188	if (!rdp->nxttail[RCU_NEXT_TAIL] \|\| !*rdp->nxttail[RCU_DONE_TAIL])
		1189	return;
		1190
		1191	/*
		1192	* Find all callbacks whose ->completed numbers indicate that they
		1193	* are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
		1194	*/
		1195	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
		1196	if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
		1197	break;
		1198	rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
		1199	}
		1200	/* Clean up any sublist tail pointers that were misordered above. */
		1201	for (j = RCU_WAIT_TAIL; j < i; j++)
		1202	rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
		1203
		1204	/* Copy down callbacks to fill in empty sublists. */
		1205	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
		1206	if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
		1207	break;
		1208	rdp->nxttail[j] = rdp->nxttail[i];
		1209	rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
		1210	}
		1211
		1212	/* Classify any remaining callbacks. */
		1213	rcu_accelerate_cbs(rsp, rnp, rdp);
		1214	}
		1215
		1216	/*
1074	* Advance this CPU's callbacks, but only if the current grace period	1217	* Advance this CPU's callbacks, but only if the current grace period
1075	* has ended. This may be called only from the CPU to whom the rdp	1218	* has ended. This may be called only from the CPU to whom the rdp
1076	* belongs. In addition, the corresponding leaf rcu_node structure's	1219	* belongs. In addition, the corresponding leaf rcu_node structure's
@@ -1080,12 +1223,15 @@ static void
1080	__rcu_process_gp_end(struct rcu_state rsp, struct rcu_node rnp, struct rcu_data *rdp)	1223	__rcu_process_gp_end(struct rcu_state rsp, struct rcu_node rnp, struct rcu_data *rdp)
1081	{	1224	{
1082	/* Did another grace period end? */	1225	/* Did another grace period end? */
1083	if (rdp->completed != rnp->completed) {	1226	if (rdp->completed == rnp->completed) {
		1227
		1228	/* No, so just accelerate recent callbacks. */
		1229	rcu_accelerate_cbs(rsp, rnp, rdp);
1084		1230
1085	/* Advance callbacks. No harm if list empty. */	1231	} else {
1086	rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];	1232
1087	rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];	1233	/* Advance callbacks. */
1088	rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];	1234	rcu_advance_cbs(rsp, rnp, rdp);
1089		1235
1090	/* Remember that we saw this grace-period completion. */	1236	/* Remember that we saw this grace-period completion. */
1091	rdp->completed = rnp->completed;	1237	rdp->completed = rnp->completed;
@@ -1392,17 +1538,10 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
1392	/*	1538	/*
1393	* Because there is no grace period in progress right now,	1539	* Because there is no grace period in progress right now,
1394	* any callbacks we have up to this point will be satisfied	1540	* any callbacks we have up to this point will be satisfied
1395	* by the next grace period. So promote all callbacks to be	1541	* by the next grace period. So this is a good place to
1396	* handled after the end of the next grace period. If the	1542	* assign a grace period number to recently posted callbacks.
1397	* CPU is not yet aware of the end of the previous grace period,
1398	* we need to allow for the callback advancement that will
1399	* occur when it does become aware. Deadlock prevents us from
1400	* making it aware at this point: We cannot acquire a leaf
1401	* rcu_node ->lock while holding the root rcu_node ->lock.
1402	*/	1543	*/
1403	rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];	1544	rcu_accelerate_cbs(rsp, rnp, rdp);
1404	if (rdp->completed == rsp->completed)
1405	rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1406		1545
1407	rsp->gp_flags = RCU_GP_FLAG_INIT;	1546	rsp->gp_flags = RCU_GP_FLAG_INIT;
1408	raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */	1547	raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
@@ -1527,7 +1666,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state rsp, struct rcu_data rdp)
1527	* This GP can't end until cpu checks in, so all of our	1666	* This GP can't end until cpu checks in, so all of our
1528	* callbacks can be processed during the next GP.	1667	* callbacks can be processed during the next GP.
1529	*/	1668	*/
1530	rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];	1669	rcu_accelerate_cbs(rsp, rnp, rdp);
1531		1670
1532	rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */	1671	rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
1533	}	1672	}
@@ -1779,7 +1918,7 @@ static void rcu_do_batch(struct rcu_state rsp, struct rcu_data rdp)
1779	long bl, count, count_lazy;	1918	long bl, count, count_lazy;
1780	int i;	1919	int i;
1781		1920
1782	/* If no callbacks are ready, just return.*/	1921	/* If no callbacks are ready, just return. */
1783	if (!cpu_has_callbacks_ready_to_invoke(rdp)) {	1922	if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
1784	trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);	1923	trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
1785	trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),	1924	trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
@@ -2008,19 +2147,19 @@ __rcu_process_callbacks(struct rcu_state *rsp)
2008		2147
2009	WARN_ON_ONCE(rdp->beenonline == 0);	2148	WARN_ON_ONCE(rdp->beenonline == 0);
2010		2149
2011	/*	2150	/* Handle the end of a grace period that some other CPU ended. */
2012	* Advance callbacks in response to end of earlier grace
2013	* period that some other CPU ended.
2014	*/
2015	rcu_process_gp_end(rsp, rdp);	2151	rcu_process_gp_end(rsp, rdp);
2016		2152
2017	/* Update RCU state based on any recent quiescent states. */	2153	/* Update RCU state based on any recent quiescent states. */
2018	rcu_check_quiescent_state(rsp, rdp);	2154	rcu_check_quiescent_state(rsp, rdp);
2019		2155
2020	/* Does this CPU require a not-yet-started grace period? */	2156	/* Does this CPU require a not-yet-started grace period? */
		2157	local_irq_save(flags);
2021	if (cpu_needs_another_gp(rsp, rdp)) {	2158	if (cpu_needs_another_gp(rsp, rdp)) {
2022	raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);	2159	raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
2023	rcu_start_gp(rsp, flags); /* releases above lock */	2160	rcu_start_gp(rsp, flags); /* releases above lock */
		2161	} else {
		2162	local_irq_restore(flags);
2024	}	2163	}
2025		2164
2026	/* If there are callbacks ready, invoke them. */	2165	/* If there are callbacks ready, invoke them. */