diff options
author | Lai Jiangshan <laijs@cn.fujitsu.com> | 2012-02-27 12:28:10 -0500 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-04-30 13:48:22 -0400 |
commit | 18108ebfebe9e871d0a9af830baf8f5df69eb5fc (patch) | |
tree | 363127aced66530e6c6808a55462e10c3e8fedb8 /kernel/srcu.c | |
parent | 944ce9af4767ca085d465e4add69df11a8faa9ef (diff) |
rcu: Improve SRCU's wait_idx() comments
The safety of SRCU is provided byy wait_idx() rather than flipping.
The flipping actually prevents starvation.
This commit therefore updates the comments to more accurately and
precisely describe what is going on.
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/srcu.c')
-rw-r--r-- | kernel/srcu.c | 77 |
1 files changed, 37 insertions, 40 deletions
diff --git a/kernel/srcu.c b/kernel/srcu.c index b6b9ea2eb51c..1fecb4d858ed 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
@@ -249,6 +249,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); | |||
249 | */ | 249 | */ |
250 | #define SYNCHRONIZE_SRCU_READER_DELAY 5 | 250 | #define SYNCHRONIZE_SRCU_READER_DELAY 5 |
251 | 251 | ||
252 | /* | ||
253 | * Wait until all pre-existing readers complete. Such readers | ||
254 | * will have used the index specified by "idx". | ||
255 | */ | ||
252 | static void wait_idx(struct srcu_struct *sp, int idx, bool expedited) | 256 | static void wait_idx(struct srcu_struct *sp, int idx, bool expedited) |
253 | { | 257 | { |
254 | int trycount = 0; | 258 | int trycount = 0; |
@@ -291,24 +295,9 @@ static void wait_idx(struct srcu_struct *sp, int idx, bool expedited) | |||
291 | smp_mb(); /* E */ | 295 | smp_mb(); /* E */ |
292 | } | 296 | } |
293 | 297 | ||
294 | /* | 298 | static void srcu_flip(struct srcu_struct *sp) |
295 | * Flip the readers' index by incrementing ->completed, then wait | ||
296 | * until there are no more readers using the counters referenced by | ||
297 | * the old index value. (Recall that the index is the bottom bit | ||
298 | * of ->completed.) | ||
299 | * | ||
300 | * Of course, it is possible that a reader might be delayed for the | ||
301 | * full duration of flip_idx_and_wait() between fetching the | ||
302 | * index and incrementing its counter. This possibility is handled | ||
303 | * by the next __synchronize_srcu() invoking wait_idx() for such readers | ||
304 | * before starting a new grace period. | ||
305 | */ | ||
306 | static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited) | ||
307 | { | 299 | { |
308 | int idx; | 300 | sp->completed++; |
309 | |||
310 | idx = sp->completed++ & 0x1; | ||
311 | wait_idx(sp, idx, expedited); | ||
312 | } | 301 | } |
313 | 302 | ||
314 | /* | 303 | /* |
@@ -316,6 +305,8 @@ static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited) | |||
316 | */ | 305 | */ |
317 | static void __synchronize_srcu(struct srcu_struct *sp, bool expedited) | 306 | static void __synchronize_srcu(struct srcu_struct *sp, bool expedited) |
318 | { | 307 | { |
308 | int busy_idx; | ||
309 | |||
319 | rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && | 310 | rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && |
320 | !lock_is_held(&rcu_bh_lock_map) && | 311 | !lock_is_held(&rcu_bh_lock_map) && |
321 | !lock_is_held(&rcu_lock_map) && | 312 | !lock_is_held(&rcu_lock_map) && |
@@ -323,8 +314,28 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited) | |||
323 | "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); | 314 | "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); |
324 | 315 | ||
325 | mutex_lock(&sp->mutex); | 316 | mutex_lock(&sp->mutex); |
317 | busy_idx = sp->completed & 0X1UL; | ||
326 | 318 | ||
327 | /* | 319 | /* |
320 | * If we recently flipped the index, there will be some readers | ||
321 | * using idx=0 and others using idx=1. Therefore, two calls to | ||
322 | * wait_idx()s suffice to ensure that all pre-existing readers | ||
323 | * have completed: | ||
324 | * | ||
325 | * __synchronize_srcu() { | ||
326 | * wait_idx(sp, 0, expedited); | ||
327 | * wait_idx(sp, 1, expedited); | ||
328 | * } | ||
329 | * | ||
330 | * Starvation is prevented by the fact that we flip the index. | ||
331 | * While we wait on one index to clear out, almost all new readers | ||
332 | * will be using the other index. The number of new readers using the | ||
333 | * index we are waiting on is sharply bounded by roughly the number | ||
334 | * of CPUs. | ||
335 | * | ||
336 | * How can new readers possibly using the old pre-flip value of | ||
337 | * the index? Consider the following sequence of events: | ||
338 | * | ||
328 | * Suppose that during the previous grace period, a reader | 339 | * Suppose that during the previous grace period, a reader |
329 | * picked up the old value of the index, but did not increment | 340 | * picked up the old value of the index, but did not increment |
330 | * its counter until after the previous instance of | 341 | * its counter until after the previous instance of |
@@ -333,31 +344,17 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited) | |||
333 | * not start until after the grace period started, so the grace | 344 | * not start until after the grace period started, so the grace |
334 | * period was not obligated to wait for that reader. | 345 | * period was not obligated to wait for that reader. |
335 | * | 346 | * |
336 | * However, the current SRCU grace period does have to wait for | 347 | * However, this sequence of events is quite improbable, so |
337 | * that reader. This is handled by invoking wait_idx() on the | 348 | * this call to wait_idx(), which waits on really old readers |
338 | * non-active set of counters (hence sp->completed - 1). Once | 349 | * describe in this comment above, will almost never need to wait. |
339 | * wait_idx() returns, we know that all readers that picked up | ||
340 | * the old value of ->completed and that already incremented their | ||
341 | * counter will have completed. | ||
342 | * | ||
343 | * But what about readers that picked up the old value of | ||
344 | * ->completed, but -still- have not managed to increment their | ||
345 | * counter? We do not need to wait for those readers, because | ||
346 | * they will have started their SRCU read-side critical section | ||
347 | * after the current grace period starts. | ||
348 | * | ||
349 | * Because it is unlikely that readers will be preempted between | ||
350 | * fetching ->completed and incrementing their counter, wait_idx() | ||
351 | * will normally not need to wait. | ||
352 | */ | 350 | */ |
353 | wait_idx(sp, (sp->completed - 1) & 0x1, expedited); | 351 | wait_idx(sp, 1 - busy_idx, expedited); |
354 | 352 | ||
355 | /* | 353 | /* Flip the index to avoid reader-induced starvation. */ |
356 | * Now that wait_idx() has waited for the really old readers, | 354 | srcu_flip(sp); |
357 | * invoke flip_idx_and_wait() to flip the counter and wait | 355 | |
358 | * for current SRCU readers. | 356 | /* Wait for recent pre-existing readers. */ |
359 | */ | 357 | wait_idx(sp, busy_idx, expedited); |
360 | flip_idx_and_wait(sp, expedited); | ||
361 | 358 | ||
362 | mutex_unlock(&sp->mutex); | 359 | mutex_unlock(&sp->mutex); |
363 | } | 360 | } |