rcu: Improve SRCU's wait_idx() comments

The safety of SRCU is provided byy wait_idx() rather than flipping. The flipping actually prevents starvation. This commit therefore updates the comments to more accurately and precisely describe what is going on. Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
author: Lai Jiangshan <laijs@cn.fujitsu.com> 2012-02-27 12:28:10 -0500
committer: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 2012-04-30 13:48:22 -0400
commit: 18108ebfebe9e871d0a9af830baf8f5df69eb5fc (patch)
tree: 363127aced66530e6c6808a55462e10c3e8fedb8 /kernel/srcu.c
parent: 944ce9af4767ca085d465e4add69df11a8faa9ef (diff)
1 files changed, 37 insertions, 40 deletions
diff --git a/kernel/srcu.c b/kernel/srcu.c
index b6b9ea2eb51c..1fecb4d858ed 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -249,6 +249,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 */
 #define SYNCHRONIZE_SRCU_READER_DELAY 5
+/*
+ * Wait until all pre-existing readers complete.  Such readers
+ * will have used the index specified by "idx".
+ */
 static void wait_idx(struct srcu_struct *sp, int idx, bool expedited)
 {
        int trycount = 0;
@@ -291,24 +295,9 @@ static void wait_idx(struct srcu_struct *sp, int idx, bool expedited)
        smp_mb(); /* E */
 }
-/*
+static void srcu_flip(struct srcu_struct *sp)
- * Flip the readers' index by incrementing ->completed, then wait
- * until there are no more readers using the counters referenced by
- * the old index value.  (Recall that the index is the bottom bit
- * of ->completed.)
- *
- * Of course, it is possible that a reader might be delayed for the
- * full duration of flip_idx_and_wait() between fetching the
- * index and incrementing its counter.  This possibility is handled
- * by the next __synchronize_srcu() invoking wait_idx() for such readers
- * before starting a new grace period.
- */
-static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
 {
-        int idx;
+        sp->completed++;
-        idx = sp->completed++ & 0x1;
-        wait_idx(sp, idx, expedited);
 }
 /*
@@ -316,6 +305,8 @@ static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
 */
 static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
 {
+        int busy_idx;
        rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
                           !lock_is_held(&rcu_bh_lock_map) &&
                           !lock_is_held(&rcu_lock_map) &&
@@ -323,8 +314,28 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
                           "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
        mutex_lock(&sp->mutex);
+        busy_idx = sp->completed & 0X1UL;
        /*
+         * If we recently flipped the index, there will be some readers
+         * using idx=0 and others using idx=1.  Therefore, two calls to
+         * wait_idx()s suffice to ensure that all pre-existing readers
+         * have completed:
+         *
+         * __synchronize_srcu() {
+         *      wait_idx(sp, 0, expedited);
+         *      wait_idx(sp, 1, expedited);
+         * }
+         *
+         * Starvation is prevented by the fact that we flip the index.
+         * While we wait on one index to clear out, almost all new readers
+         * will be using the other index.  The number of new readers using the
+         * index we are waiting on is sharply bounded by roughly the number
+         * of CPUs.
+         *
+         * How can new readers possibly using the old pre-flip value of
+         * the index?  Consider the following sequence of events:
+         *
         * Suppose that during the previous grace period, a reader
         * picked up the old value of the index, but did not increment
         * its counter until after the previous instance of
@@ -333,31 +344,17 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
         * not start until after the grace period started, so the grace
         * period was not obligated to wait for that reader.
         *
-         * However, the current SRCU grace period does have to wait for
+         * However, this sequence of events is quite improbable, so
-         * that reader.  This is handled by invoking wait_idx() on the
+         * this call to wait_idx(), which waits on really old readers
-         * non-active set of counters (hence sp->completed - 1).  Once
+         * describe in this comment above, will almost never need to wait.
-         * wait_idx() returns, we know that all readers that picked up
-         * the old value of ->completed and that already incremented their
-         * counter will have completed.
-         *
-         * But what about readers that picked up the old value of
-         * ->completed, but -still- have not managed to increment their
-         * counter?  We do not need to wait for those readers, because
-         * they will have started their SRCU read-side critical section
-         * after the current grace period starts.
-         *
-         * Because it is unlikely that readers will be preempted between
-         * fetching ->completed and incrementing their counter, wait_idx()
-         * will normally not need to wait.
         */
-        wait_idx(sp, (sp->completed - 1) & 0x1, expedited);
+        wait_idx(sp, 1 - busy_idx, expedited);
-        /*
+        /* Flip the index to avoid reader-induced starvation. */
-         * Now that wait_idx() has waited for the really old readers,
+        srcu_flip(sp);
-         * invoke flip_idx_and_wait() to flip the counter and wait
-         * for current SRCU readers.
+        /* Wait for recent pre-existing readers. */
-         */
+        wait_idx(sp, busy_idx, expedited);
-        flip_idx_and_wait(sp, expedited);
        mutex_unlock(&sp->mutex);
 }
author	Lai Jiangshan <laijs@cn.fujitsu.com>	2012-02-27 12:28:10 -0500
committer	Paul E. McKenney <paulmck@linux.vnet.ibm.com>	2012-04-30 13:48:22 -0400
commit	18108ebfebe9e871d0a9af830baf8f5df69eb5fc (patch)
tree	363127aced66530e6c6808a55462e10c3e8fedb8 /kernel/srcu.c
parent	944ce9af4767ca085d465e4add69df11a8faa9ef (diff)

diff --git a/kernel/srcu.c b/kernel/srcu.c index b6b9ea2eb51c..1fecb4d858ed 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c
@@ -249,6 +249,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
249	*/	249	*/
250	#define SYNCHRONIZE_SRCU_READER_DELAY 5	250	#define SYNCHRONIZE_SRCU_READER_DELAY 5
251		251
		252	/*
		253	* Wait until all pre-existing readers complete. Such readers
		254	* will have used the index specified by "idx".
		255	*/
252	static void wait_idx(struct srcu_struct *sp, int idx, bool expedited)	256	static void wait_idx(struct srcu_struct *sp, int idx, bool expedited)
253	{	257	{
254	int trycount = 0;	258	int trycount = 0;
@@ -291,24 +295,9 @@ static void wait_idx(struct srcu_struct *sp, int idx, bool expedited)
291	smp_mb(); /* E */	295	smp_mb(); /* E */
292	}	296	}
293		297
294	/*	298	static void srcu_flip(struct srcu_struct *sp)
295	* Flip the readers' index by incrementing ->completed, then wait
296	* until there are no more readers using the counters referenced by
297	* the old index value. (Recall that the index is the bottom bit
298	* of ->completed.)
299	*
300	* Of course, it is possible that a reader might be delayed for the
301	* full duration of flip_idx_and_wait() between fetching the
302	* index and incrementing its counter. This possibility is handled
303	* by the next __synchronize_srcu() invoking wait_idx() for such readers
304	* before starting a new grace period.
305	*/
306	static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
307	{	299	{
308	int idx;	300	sp->completed++;
309
310	idx = sp->completed++ & 0x1;
311	wait_idx(sp, idx, expedited);
312	}	301	}
313		302
314	/*	303	/*
@@ -316,6 +305,8 @@ static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
316	*/	305	*/
317	static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)	306	static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
318	{	307	{
		308	int busy_idx;
		309
319	rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&	310	rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
320	!lock_is_held(&rcu_bh_lock_map) &&	311	!lock_is_held(&rcu_bh_lock_map) &&
321	!lock_is_held(&rcu_lock_map) &&	312	!lock_is_held(&rcu_lock_map) &&
@@ -323,8 +314,28 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
323	"Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");	314	"Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
324		315
325	mutex_lock(&sp->mutex);	316	mutex_lock(&sp->mutex);
		317	busy_idx = sp->completed & 0X1UL;
326		318
327	/*	319	/*
		320	* If we recently flipped the index, there will be some readers
		321	* using idx=0 and others using idx=1. Therefore, two calls to
		322	* wait_idx()s suffice to ensure that all pre-existing readers
		323	* have completed:
		324	*
		325	* __synchronize_srcu() {
		326	* wait_idx(sp, 0, expedited);
		327	* wait_idx(sp, 1, expedited);
		328	* }
		329	*
		330	* Starvation is prevented by the fact that we flip the index.
		331	* While we wait on one index to clear out, almost all new readers
		332	* will be using the other index. The number of new readers using the
		333	* index we are waiting on is sharply bounded by roughly the number
		334	* of CPUs.
		335	*
		336	* How can new readers possibly using the old pre-flip value of
		337	* the index? Consider the following sequence of events:
		338	*
328	* Suppose that during the previous grace period, a reader	339	* Suppose that during the previous grace period, a reader
329	* picked up the old value of the index, but did not increment	340	* picked up the old value of the index, but did not increment
330	* its counter until after the previous instance of	341	* its counter until after the previous instance of
@@ -333,31 +344,17 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
333	* not start until after the grace period started, so the grace	344	* not start until after the grace period started, so the grace
334	* period was not obligated to wait for that reader.	345	* period was not obligated to wait for that reader.
335	*	346	*
336	* However, the current SRCU grace period does have to wait for	347	* However, this sequence of events is quite improbable, so
337	* that reader. This is handled by invoking wait_idx() on the	348	* this call to wait_idx(), which waits on really old readers
338	* non-active set of counters (hence sp->completed - 1). Once	349	* describe in this comment above, will almost never need to wait.
339	* wait_idx() returns, we know that all readers that picked up
340	* the old value of ->completed and that already incremented their
341	* counter will have completed.
342	*
343	* But what about readers that picked up the old value of
344	* ->completed, but -still- have not managed to increment their
345	* counter? We do not need to wait for those readers, because
346	* they will have started their SRCU read-side critical section
347	* after the current grace period starts.
348	*
349	* Because it is unlikely that readers will be preempted between
350	* fetching ->completed and incrementing their counter, wait_idx()
351	* will normally not need to wait.
352	*/	350	*/
353	wait_idx(sp, (sp->completed - 1) & 0x1, expedited);	351	wait_idx(sp, 1 - busy_idx, expedited);
354		352
355	/*	353	/* Flip the index to avoid reader-induced starvation. */
356	* Now that wait_idx() has waited for the really old readers,	354	srcu_flip(sp);
357	* invoke flip_idx_and_wait() to flip the counter and wait	355
358	* for current SRCU readers.	356	/* Wait for recent pre-existing readers. */
359	*/	357	wait_idx(sp, busy_idx, expedited);
360	flip_idx_and_wait(sp, expedited);
361		358
362	mutex_unlock(&sp->mutex);	359	mutex_unlock(&sp->mutex);
363	}	360	}