summaryrefslogtreecommitdiffstats
path: root/kernel/srcu.c
diff options
context:
space:
mode:
authorLai Jiangshan <laijs@cn.fujitsu.com>2012-02-27 12:28:10 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2012-04-30 13:48:22 -0400
commit18108ebfebe9e871d0a9af830baf8f5df69eb5fc (patch)
tree363127aced66530e6c6808a55462e10c3e8fedb8 /kernel/srcu.c
parent944ce9af4767ca085d465e4add69df11a8faa9ef (diff)
rcu: Improve SRCU's wait_idx() comments
The safety of SRCU is provided byy wait_idx() rather than flipping. The flipping actually prevents starvation. This commit therefore updates the comments to more accurately and precisely describe what is going on. Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/srcu.c')
-rw-r--r--kernel/srcu.c77
1 files changed, 37 insertions, 40 deletions
diff --git a/kernel/srcu.c b/kernel/srcu.c
index b6b9ea2eb51c..1fecb4d858ed 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -249,6 +249,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
249 */ 249 */
250#define SYNCHRONIZE_SRCU_READER_DELAY 5 250#define SYNCHRONIZE_SRCU_READER_DELAY 5
251 251
252/*
253 * Wait until all pre-existing readers complete. Such readers
254 * will have used the index specified by "idx".
255 */
252static void wait_idx(struct srcu_struct *sp, int idx, bool expedited) 256static void wait_idx(struct srcu_struct *sp, int idx, bool expedited)
253{ 257{
254 int trycount = 0; 258 int trycount = 0;
@@ -291,24 +295,9 @@ static void wait_idx(struct srcu_struct *sp, int idx, bool expedited)
291 smp_mb(); /* E */ 295 smp_mb(); /* E */
292} 296}
293 297
294/* 298static void srcu_flip(struct srcu_struct *sp)
295 * Flip the readers' index by incrementing ->completed, then wait
296 * until there are no more readers using the counters referenced by
297 * the old index value. (Recall that the index is the bottom bit
298 * of ->completed.)
299 *
300 * Of course, it is possible that a reader might be delayed for the
301 * full duration of flip_idx_and_wait() between fetching the
302 * index and incrementing its counter. This possibility is handled
303 * by the next __synchronize_srcu() invoking wait_idx() for such readers
304 * before starting a new grace period.
305 */
306static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
307{ 299{
308 int idx; 300 sp->completed++;
309
310 idx = sp->completed++ & 0x1;
311 wait_idx(sp, idx, expedited);
312} 301}
313 302
314/* 303/*
@@ -316,6 +305,8 @@ static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
316 */ 305 */
317static void __synchronize_srcu(struct srcu_struct *sp, bool expedited) 306static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
318{ 307{
308 int busy_idx;
309
319 rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && 310 rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
320 !lock_is_held(&rcu_bh_lock_map) && 311 !lock_is_held(&rcu_bh_lock_map) &&
321 !lock_is_held(&rcu_lock_map) && 312 !lock_is_held(&rcu_lock_map) &&
@@ -323,8 +314,28 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
323 "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); 314 "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
324 315
325 mutex_lock(&sp->mutex); 316 mutex_lock(&sp->mutex);
317 busy_idx = sp->completed & 0X1UL;
326 318
327 /* 319 /*
320 * If we recently flipped the index, there will be some readers
321 * using idx=0 and others using idx=1. Therefore, two calls to
322 * wait_idx()s suffice to ensure that all pre-existing readers
323 * have completed:
324 *
325 * __synchronize_srcu() {
326 * wait_idx(sp, 0, expedited);
327 * wait_idx(sp, 1, expedited);
328 * }
329 *
330 * Starvation is prevented by the fact that we flip the index.
331 * While we wait on one index to clear out, almost all new readers
332 * will be using the other index. The number of new readers using the
333 * index we are waiting on is sharply bounded by roughly the number
334 * of CPUs.
335 *
336 * How can new readers possibly using the old pre-flip value of
337 * the index? Consider the following sequence of events:
338 *
328 * Suppose that during the previous grace period, a reader 339 * Suppose that during the previous grace period, a reader
329 * picked up the old value of the index, but did not increment 340 * picked up the old value of the index, but did not increment
330 * its counter until after the previous instance of 341 * its counter until after the previous instance of
@@ -333,31 +344,17 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
333 * not start until after the grace period started, so the grace 344 * not start until after the grace period started, so the grace
334 * period was not obligated to wait for that reader. 345 * period was not obligated to wait for that reader.
335 * 346 *
336 * However, the current SRCU grace period does have to wait for 347 * However, this sequence of events is quite improbable, so
337 * that reader. This is handled by invoking wait_idx() on the 348 * this call to wait_idx(), which waits on really old readers
338 * non-active set of counters (hence sp->completed - 1). Once 349 * describe in this comment above, will almost never need to wait.
339 * wait_idx() returns, we know that all readers that picked up
340 * the old value of ->completed and that already incremented their
341 * counter will have completed.
342 *
343 * But what about readers that picked up the old value of
344 * ->completed, but -still- have not managed to increment their
345 * counter? We do not need to wait for those readers, because
346 * they will have started their SRCU read-side critical section
347 * after the current grace period starts.
348 *
349 * Because it is unlikely that readers will be preempted between
350 * fetching ->completed and incrementing their counter, wait_idx()
351 * will normally not need to wait.
352 */ 350 */
353 wait_idx(sp, (sp->completed - 1) & 0x1, expedited); 351 wait_idx(sp, 1 - busy_idx, expedited);
354 352
355 /* 353 /* Flip the index to avoid reader-induced starvation. */
356 * Now that wait_idx() has waited for the really old readers, 354 srcu_flip(sp);
357 * invoke flip_idx_and_wait() to flip the counter and wait 355
358 * for current SRCU readers. 356 /* Wait for recent pre-existing readers. */
359 */ 357 wait_idx(sp, busy_idx, expedited);
360 flip_idx_and_wait(sp, expedited);
361 358
362 mutex_unlock(&sp->mutex); 359 mutex_unlock(&sp->mutex);
363} 360}